diff options
Diffstat (limited to 'arch')
813 files changed, 32318 insertions, 19296 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 786a85d4ad40..2e6f843d87c4 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -533,6 +533,31 @@ config STACKPROTECTOR_STRONG about 20% of all kernel functions, which increases the kernel code size by about 2%. +config ARCH_SUPPORTS_SHADOW_CALL_STACK + bool + help + An architecture should select this if it supports Clang's Shadow + Call Stack and implements runtime support for shadow stack + switching. + +config SHADOW_CALL_STACK + bool "Clang Shadow Call Stack" + depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK + depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER + help + This option enables Clang's Shadow Call Stack, which uses a + shadow stack to protect function return addresses from being + overwritten by an attacker. More information can be found in + Clang's documentation: + + https://clang.llvm.org/docs/ShadowCallStack.html + + Note that security guarantees in the kernel differ from the + ones documented for user space. The kernel must store addresses + of shadow stacks in memory, which means an attacker capable of + reading and writing arbitrary memory may be able to locate them + and hijack control flow by modifying the stacks. + config HAVE_ARCH_WITHIN_STACK_FRAMES bool help diff --git a/arch/alpha/include/asm/checksum.h b/arch/alpha/include/asm/checksum.h index 473e6ccb65a3..0eac81624d01 100644 --- a/arch/alpha/include/asm/checksum.h +++ b/arch/alpha/include/asm/checksum.h @@ -41,7 +41,8 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum); * here even more important to align src and dst on a 32-bit (or even * better 64-bit) boundary */ -__wsum csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *errp); +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER +__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *errp); __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum); diff --git a/arch/alpha/include/asm/floppy.h b/arch/alpha/include/asm/floppy.h index 942924756cf2..8dfdb3aa1d96 100644 --- a/arch/alpha/include/asm/floppy.h +++ b/arch/alpha/include/asm/floppy.h @@ -11,8 +11,8 @@ #define __ASM_ALPHA_FLOPPY_H -#define fd_inb(port) inb_p(port) -#define fd_outb(value,port) outb_p(value,port) +#define fd_inb(base, reg) inb_p((base) + (reg)) +#define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) #define fd_enable_dma() enable_dma(FLOPPY_DMA) #define fd_disable_dma() disable_dma(FLOPPY_DMA) diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 36d42da7466a..5ddd128d4b7a 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -477,3 +477,4 @@ # 545 reserved for clone3 547 common openat2 sys_openat2 548 common pidfd_getfd sys_pidfd_getfd +549 common faccessat2 sys_faccessat2 diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c index e53f96e8aa6d..af1dad74e933 100644 --- a/arch/alpha/lib/csum_partial_copy.c +++ b/arch/alpha/lib/csum_partial_copy.c @@ -325,7 +325,7 @@ csum_partial_cfu_unaligned(const unsigned long __user * src, } __wsum -csum_partial_copy_from_user(const void __user *src, void *dst, int len, +csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *errp) { unsigned long checksum = (__force u32) sum; @@ -369,7 +369,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len, } return (__force __wsum)checksum; } -EXPORT_SYMBOL(csum_partial_copy_from_user); +EXPORT_SYMBOL(csum_and_copy_from_user); __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) @@ -377,7 +377,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) __wsum checksum; mm_segment_t oldfs = get_fs(); set_fs(KERNEL_DS); - checksum = csum_partial_copy_from_user((__force const void __user *)src, + checksum = csum_and_copy_from_user((__force const void __user *)src, dst, len, sum, NULL); set_fs(oldfs); return checksum; diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index c77c93c485a0..16fbf74030fe 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -313,6 +313,9 @@ choice config ARCH_MULTIPLATFORM bool "Allow multiple platforms to be selected" depends on MMU + select ARCH_FLATMEM_ENABLE + select ARCH_SPARSEMEM_ENABLE + select ARCH_SELECT_MEMORY_MODEL select ARM_HAS_SG_CHAIN select ARM_PATCH_PHYS_VIRT select AUTO_ZRELADDR @@ -1516,11 +1519,15 @@ config OABI_COMPAT config ARCH_HAS_HOLES_MEMORYMODEL bool -config ARCH_SPARSEMEM_ENABLE +config ARCH_SELECT_MEMORY_MODEL + bool + +config ARCH_FLATMEM_ENABLE bool -config ARCH_SPARSEMEM_DEFAULT - def_bool ARCH_SPARSEMEM_ENABLE +config ARCH_SPARSEMEM_ENABLE + bool + select SPARSEMEM_STATIC if SPARSEMEM config HAVE_ARCH_PFN_VALID def_bool ARCH_HAS_HOLES_MEMORYMODEL || !SPARSEMEM @@ -1955,7 +1962,7 @@ config EFI select UCS2_STRING select EFI_PARAMS_FROM_FDT select EFI_STUB - select EFI_ARMSTUB + select EFI_GENERIC_STUB select EFI_RUNTIME_WRAPPERS ---help--- This option provides support for runtime services provided diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore index db05c6ef3e31..60606b0f378d 100644 --- a/arch/arm/boot/compressed/.gitignore +++ b/arch/arm/boot/compressed/.gitignore @@ -7,12 +7,3 @@ hyp-stub.S piggy_data vmlinux vmlinux.lds - -# borrowed libfdt files -fdt.c -fdt.h -fdt_ro.c -fdt_rw.c -fdt_wip.c -libfdt.h -libfdt_internal.h diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 9c11e7490292..00602a6fba04 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -76,29 +76,30 @@ compress-$(CONFIG_KERNEL_LZMA) = lzma compress-$(CONFIG_KERNEL_XZ) = xzkern compress-$(CONFIG_KERNEL_LZ4) = lz4 -# Borrowed libfdt files for the ATAG compatibility mode - -libfdt := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c -libfdt_hdrs := fdt.h libfdt.h libfdt_internal.h - -libfdt_objs := $(addsuffix .o, $(basename $(libfdt))) - -$(addprefix $(obj)/,$(libfdt) $(libfdt_hdrs)): $(obj)/%: $(srctree)/scripts/dtc/libfdt/% - $(call cmd,shipped) - -$(addprefix $(obj)/,$(libfdt_objs) atags_to_fdt.o): \ - $(addprefix $(obj)/,$(libfdt_hdrs)) +libfdt_objs := fdt_rw.o fdt_ro.o fdt_wip.o fdt.o ifeq ($(CONFIG_ARM_ATAG_DTB_COMPAT),y) OBJS += $(libfdt_objs) atags_to_fdt.o endif +# -fstack-protector-strong triggers protection checks in this code, +# but it is being used too early to link to meaningful stack_chk logic. +nossp-flags-$(CONFIG_CC_HAS_STACKPROTECTOR_NONE) := -fno-stack-protector +$(foreach o, $(libfdt_objs) atags_to_fdt.o, \ + $(eval CFLAGS_$(o) := -I $(srctree)/scripts/dtc/libfdt $(nossp-flags-y))) + +# These were previously generated C files. When you are building the kernel +# with O=, make sure to remove the stale files in the output tree. Otherwise, +# the build system wrongly compiles the stale ones. +ifdef building_out_of_srctree +$(shell rm -f $(addprefix $(obj)/, fdt_rw.c fdt_ro.c fdt_wip.c fdt.c)) +endif + targets := vmlinux vmlinux.lds piggy_data piggy.o \ lib1funcs.o ashldi3.o bswapsdi2.o \ head.o $(OBJS) -clean-files += piggy_data lib1funcs.S ashldi3.S bswapsdi2.S \ - $(libfdt) $(libfdt_hdrs) hyp-stub.S +clean-files += piggy_data lib1funcs.S ashldi3.S bswapsdi2.S hyp-stub.S KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING @@ -107,15 +108,6 @@ ORIG_CFLAGS := $(KBUILD_CFLAGS) KBUILD_CFLAGS = $(subst -pg, , $(ORIG_CFLAGS)) endif -# -fstack-protector-strong triggers protection checks in this code, -# but it is being used too early to link to meaningful stack_chk logic. -nossp-flags-$(CONFIG_CC_HAS_STACKPROTECTOR_NONE) := -fno-stack-protector -CFLAGS_atags_to_fdt.o := $(nossp-flags-y) -CFLAGS_fdt.o := $(nossp-flags-y) -CFLAGS_fdt_ro.o := $(nossp-flags-y) -CFLAGS_fdt_rw.o := $(nossp-flags-y) -CFLAGS_fdt_wip.o := $(nossp-flags-y) - ccflags-y := -fpic $(call cc-option,-mno-single-pic-base,) -fno-builtin \ -I$(obj) $(DISABLE_ARM_SSP_PER_TASK_PLUGIN) asflags-y := -DZIMAGE diff --git a/arch/arm/boot/compressed/atags_to_fdt.c b/arch/arm/boot/compressed/atags_to_fdt.c index 64c49747f8a3..8452753efebe 100644 --- a/arch/arm/boot/compressed/atags_to_fdt.c +++ b/arch/arm/boot/compressed/atags_to_fdt.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/libfdt_env.h> #include <asm/setup.h> #include <libfdt.h> diff --git a/arch/arm/boot/compressed/efi-header.S b/arch/arm/boot/compressed/efi-header.S index 62286da318e7..c0e7a745103e 100644 --- a/arch/arm/boot/compressed/efi-header.S +++ b/arch/arm/boot/compressed/efi-header.S @@ -60,7 +60,7 @@ optional_header: .long __pecoff_code_size @ SizeOfCode .long __pecoff_data_size @ SizeOfInitializedData .long 0 @ SizeOfUninitializedData - .long efi_entry - start @ AddressOfEntryPoint + .long efi_pe_entry - start @ AddressOfEntryPoint .long start_offset @ BaseOfCode .long __pecoff_data_start - start @ BaseOfData diff --git a/arch/arm/boot/compressed/fdt.c b/arch/arm/boot/compressed/fdt.c new file mode 100644 index 000000000000..f8ea7a201ab1 --- /dev/null +++ b/arch/arm/boot/compressed/fdt.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "../../../../lib/fdt.c" diff --git a/arch/arm/boot/compressed/fdt_ro.c b/arch/arm/boot/compressed/fdt_ro.c new file mode 100644 index 000000000000..93970a4ad5ae --- /dev/null +++ b/arch/arm/boot/compressed/fdt_ro.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "../../../../lib/fdt_ro.c" diff --git a/arch/arm/boot/compressed/fdt_rw.c b/arch/arm/boot/compressed/fdt_rw.c new file mode 100644 index 000000000000..f7c6b8b7e01c --- /dev/null +++ b/arch/arm/boot/compressed/fdt_rw.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "../../../../lib/fdt_rw.c" diff --git a/arch/arm/boot/compressed/fdt_wip.c b/arch/arm/boot/compressed/fdt_wip.c new file mode 100644 index 000000000000..048d2c7a088d --- /dev/null +++ b/arch/arm/boot/compressed/fdt_wip.c @@ -0,0 +1,2 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include "../../../../lib/fdt_wip.c" diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index e8e1c866e413..c79db44ba128 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -287,28 +287,22 @@ not_angel: */ mov r0, pc cmp r0, r4 - ldrcc r0, LC0+28 + ldrcc r0, .Lheadroom addcc r0, r0, pc cmpcc r4, r0 orrcc r4, r4, #1 @ remember we skipped cache_on blcs cache_on -restart: adr r0, LC0 - ldmia r0, {r1, r2, r3, r6, r11, r12} - ldr sp, [r0, #24] - - /* - * We might be running at a different address. We need - * to fix up various pointers. - */ - sub r0, r0, r1 @ calculate the delta offset - add r6, r6, r0 @ _edata +restart: adr r0, LC1 + ldr sp, [r0] + ldr r6, [r0, #4] + add sp, sp, r0 + add r6, r6, r0 get_inflated_image_size r9, r10, lr #ifndef CONFIG_ZBOOT_ROM /* malloc space is above the relocated stack (64k max) */ - add sp, sp, r0 add r10, sp, #0x10000 #else /* @@ -322,9 +316,6 @@ restart: adr r0, LC0 mov r5, #0 @ init dtb size to 0 #ifdef CONFIG_ARM_APPENDED_DTB /* - * r0 = delta - * r2 = BSS start - * r3 = BSS end * r4 = final kernel address (possibly with LSB set) * r5 = appended dtb size (still unknown) * r6 = _edata @@ -332,8 +323,6 @@ restart: adr r0, LC0 * r8 = atags/device tree pointer * r9 = size of decompressed image * r10 = end of this image, including bss/stack/malloc space if non XIP - * r11 = GOT start - * r12 = GOT end * sp = stack pointer * * if there are device trees (dtb) appended to zImage, advance r10 so that the @@ -381,7 +370,6 @@ restart: adr r0, LC0 /* temporarily relocate the stack past the DTB work space */ add sp, sp, r5 - stmfd sp!, {r0-r3, ip, lr} mov r0, r8 mov r1, r6 mov r2, r5 @@ -400,7 +388,6 @@ restart: adr r0, LC0 mov r2, r5 bleq atags_to_fdt - ldmfd sp!, {r0-r3, ip, lr} sub sp, sp, r5 #endif @@ -537,6 +524,10 @@ dtb_check_done: mov pc, r0 wont_overwrite: + adr r0, LC0 + ldmia r0, {r1, r2, r3, r11, r12} + sub r0, r0, r1 @ calculate the delta offset + /* * If delta is zero, we are running at the address we were linked at. * r0 = delta @@ -660,13 +651,18 @@ not_relocated: mov r0, #0 LC0: .word LC0 @ r1 .word __bss_start @ r2 .word _end @ r3 - .word _edata @ r6 .word _got_start @ r11 .word _got_end @ ip - .word .L_user_stack_end @ sp - .word _end - restart + 16384 + 1024*1024 .size LC0, . - LC0 + .type LC1, #object +LC1: .word .L_user_stack_end - LC1 @ sp + .word _edata - LC1 @ r6 + .size LC1, . - LC1 + +.Lheadroom: + .word _end - restart + 16384 + 1024*1024 + .Linflated_image_size_offset: .long (input_data_end - 4) - . @@ -1434,38 +1430,26 @@ reloc_code_end: #ifdef CONFIG_EFI_STUB ENTRY(efi_enter_kernel) - mov r7, r0 @ preserve image base - mov r4, r1 @ preserve DT pointer + mov r4, r0 @ preserve image base + mov r8, r1 @ preserve DT pointer - mov r0, r4 @ DT start - add r1, r4, r2 @ DT end - bl cache_clean_flush + mrc p15, 0, r0, c1, c0, 0 @ read SCTLR + tst r0, #0x1 @ MMU enabled? + orreq r4, r4, #1 @ set LSB if not - mov r0, r7 @ relocated zImage - ldr r1, =_edata @ size of zImage - add r1, r1, r0 @ end of zImage + mov r0, r8 @ DT start + add r1, r8, r2 @ DT end bl cache_clean_flush - @ The PE/COFF loader might not have cleaned the code we are - @ running beyond the PoU, and so calling cache_off below from - @ inside the PE/COFF loader allocated region is unsafe unless - @ we explicitly clean it to the PoC. - ARM( adrl r0, call_cache_fn ) - THUMB( adr r0, call_cache_fn ) @ region of code we will - adr r1, 0f @ run with MMU off - bl cache_clean_flush - bl cache_off + adr r0, 0f @ switch to our stack + ldr sp, [r0] + add sp, sp, r0 - @ Set parameters for booting zImage according to boot protocol - @ put FDT address in r2, it was returned by efi_entry() - @ r1 is the machine type, and r0 needs to be 0 - mov r0, #0 - mov r1, #0xFFFFFFFF - mov r2, r4 - add r7, r7, #(__efi_start - start) - mov pc, r7 @ no mode switch + mov r5, #0 @ appended DTB size + mov r7, #0xFFFFFFFF @ machine ID + b wont_overwrite ENDPROC(efi_enter_kernel) -0: +0: .long .L_user_stack_end - . #endif .align diff --git a/arch/arm/boot/compressed/libfdt_env.h b/arch/arm/boot/compressed/libfdt_env.h deleted file mode 100644 index 6a0f1f524466..000000000000 --- a/arch/arm/boot/compressed/libfdt_env.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ARM_LIBFDT_ENV_H -#define _ARM_LIBFDT_ENV_H - -#include <linux/limits.h> -#include <linux/types.h> -#include <linux/string.h> -#include <asm/byteorder.h> - -#define INT32_MAX S32_MAX -#define UINT32_MAX U32_MAX - -typedef __be16 fdt16_t; -typedef __be32 fdt32_t; -typedef __be64 fdt64_t; - -#define fdt16_to_cpu(x) be16_to_cpu(x) -#define cpu_to_fdt16(x) cpu_to_be16(x) -#define fdt32_to_cpu(x) be32_to_cpu(x) -#define cpu_to_fdt32(x) cpu_to_be32(x) -#define fdt64_to_cpu(x) be64_to_cpu(x) -#define cpu_to_fdt64(x) cpu_to_be64(x) - -#endif diff --git a/arch/arm/boot/compressed/vmlinux.lds.S b/arch/arm/boot/compressed/vmlinux.lds.S index f82b5962d97e..09ac33f52814 100644 --- a/arch/arm/boot/compressed/vmlinux.lds.S +++ b/arch/arm/boot/compressed/vmlinux.lds.S @@ -63,9 +63,11 @@ SECTIONS _etext = .; .got.plt : { *(.got.plt) } +#ifndef CONFIG_EFI_STUB _got_start = .; .got : { *(.got) } _got_end = .; +#endif /* ensure the zImage file size is always a multiple of 64 bits */ /* (without a dummy byte, ld just ignores the empty section) */ @@ -74,11 +76,14 @@ SECTIONS #ifdef CONFIG_EFI_STUB .data : ALIGN(4096) { __pecoff_data_start = .; + _got_start = .; + *(.got) + _got_end = .; /* * The EFI stub always executes from RAM, and runs strictly before the * decompressor, so we can make an exception for its r/w data, and keep it */ - *(.data.efistub) + *(.data.efistub .bss.efistub) __pecoff_data_end = .; /* diff --git a/arch/arm/boot/dts/am335x-guardian.dts b/arch/arm/boot/dts/am335x-guardian.dts index 81e0f63e94d3..0ebe9e2c150e 100644 --- a/arch/arm/boot/dts/am335x-guardian.dts +++ b/arch/arm/boot/dts/am335x-guardian.dts @@ -105,6 +105,7 @@ ti,timers = <&timer7>; pinctrl-names = "default"; pinctrl-0 = <&dmtimer7_pins>; + ti,clock-source = <0x01>; }; vmmcsd_fixed: regulator-3v3 { diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts index a1fd3e63e86e..92466b9eb6ba 100644 --- a/arch/arm/boot/dts/am3517-evm.dts +++ b/arch/arm/boot/dts/am3517-evm.dts @@ -156,6 +156,7 @@ pinctrl-0 = <&pwm_pins>; ti,timers = <&timer11>; #pwm-cells = <3>; + ti,clock-source = <0x01>; }; /* HS USB Host PHY on PORT 1 */ diff --git a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi index f7b82ced4080..381f0e82bb70 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi +++ b/arch/arm/boot/dts/logicpd-torpedo-baseboard.dtsi @@ -65,6 +65,7 @@ pinctrl-0 = <&pwm_pins>; ti,timers = <&timer10>; #pwm-cells = <3>; + ti,clock-source = <0x01>; }; }; diff --git a/arch/arm/boot/dts/omap3-gta04.dtsi b/arch/arm/boot/dts/omap3-gta04.dtsi index 409a758c99f1..ecc45862b4f3 100644 --- a/arch/arm/boot/dts/omap3-gta04.dtsi +++ b/arch/arm/boot/dts/omap3-gta04.dtsi @@ -150,6 +150,7 @@ compatible = "ti,omap-dmtimer-pwm"; ti,timers = <&timer11>; #pwm-cells = <3>; + ti,clock-source = <0x01>; }; hsusb2_phy: hsusb2_phy { diff --git a/arch/arm/configs/omap2plus_defconfig b/arch/arm/configs/omap2plus_defconfig index 8b83d4a5d309..fe383f5a92fb 100644 --- a/arch/arm/configs/omap2plus_defconfig +++ b/arch/arm/configs/omap2plus_defconfig @@ -81,7 +81,7 @@ CONFIG_PARTITION_ADVANCED=y CONFIG_BINFMT_MISC=y CONFIG_CMA=y CONFIG_ZSMALLOC=m -CONFIG_PGTABLE_MAPPING=y +CONFIG_ZSMALLOC_PGTABLE_MAPPING=y CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c index c80b0ebfd02f..4e954b3f7ecd 100644 --- a/arch/arm/crypto/sha1_glue.c +++ b/arch/arm/crypto/sha1_glue.c @@ -14,7 +14,6 @@ #include <crypto/internal/hash.h> #include <linux/init.h> #include <linux/module.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha1_base.h> diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c index 2c3627334335..0071e5e4411a 100644 --- a/arch/arm/crypto/sha1_neon_glue.c +++ b/arch/arm/crypto/sha1_neon_glue.c @@ -18,7 +18,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha1_base.h> diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c index 215497f011f2..b8a4f79020cf 100644 --- a/arch/arm/crypto/sha256_glue.c +++ b/arch/arm/crypto/sha256_glue.c @@ -15,7 +15,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <linux/string.h> #include <crypto/sha.h> diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c index 38645e415196..79820b9e2541 100644 --- a/arch/arm/crypto/sha256_neon_glue.c +++ b/arch/arm/crypto/sha256_neon_glue.c @@ -11,7 +11,6 @@ #include <crypto/internal/hash.h> #include <crypto/internal/simd.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <linux/string.h> #include <crypto/sha.h> diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index 3546d294d55f..feac2c8b86f2 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -269,10 +269,9 @@ .endif ;\ .popsection #define ALT_UP_B(label) \ - .equ up_b_offset, label - 9998b ;\ .pushsection ".alt.smp.init", "a" ;\ .long 9998b ;\ - W(b) . + up_b_offset ;\ + W(b) . + (label - 9998b) ;\ .popsection #else #define ALT_SMP(instr...) diff --git a/arch/arm/include/asm/checksum.h b/arch/arm/include/asm/checksum.h index 20043e0ebb07..ed6073fee338 100644 --- a/arch/arm/include/asm/checksum.h +++ b/arch/arm/include/asm/checksum.h @@ -40,6 +40,20 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum); __wsum csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr); +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER +static inline +__wsum csum_and_copy_from_user (const void __user *src, void *dst, + int len, __wsum sum, int *err_ptr) +{ + if (access_ok(src, len)) + return csum_partial_copy_from_user(src, dst, len, sum, err_ptr); + + if (len) + *err_ptr = -EFAULT; + + return sum; +} + /* * Fold a partial checksum without adding pseudo headers */ diff --git a/arch/arm/include/asm/efi.h b/arch/arm/include/asm/efi.h index 5ac46e2860bc..9383f236e795 100644 --- a/arch/arm/include/asm/efi.h +++ b/arch/arm/include/asm/efi.h @@ -50,14 +50,6 @@ void efi_virtmap_unload(void); /* arch specific definitions used by the stub code */ -#define efi_bs_call(func, ...) efi_system_table()->boottime->func(__VA_ARGS__) -#define efi_rt_call(func, ...) efi_system_table()->runtime->func(__VA_ARGS__) -#define efi_is_native() (true) - -#define efi_table_attr(inst, attr) (inst->attr) - -#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__) - struct screen_info *alloc_screen_info(void); void free_screen_info(struct screen_info *si); diff --git a/arch/arm/include/asm/floppy.h b/arch/arm/include/asm/floppy.h index 79fa327238e8..e1cb04ed5008 100644 --- a/arch/arm/include/asm/floppy.h +++ b/arch/arm/include/asm/floppy.h @@ -9,20 +9,20 @@ #ifndef __ASM_ARM_FLOPPY_H #define __ASM_ARM_FLOPPY_H -#define fd_outb(val,port) \ +#define fd_outb(val, base, reg) \ do { \ int new_val = (val); \ - if (((port) & 7) == FD_DOR) { \ + if ((reg) == FD_DOR) { \ if (new_val & 0xf0) \ new_val = (new_val & 0x0c) | \ floppy_selects[new_val & 3]; \ else \ new_val &= 0x0c; \ } \ - outb(new_val, (port)); \ + outb(new_val, (base) + (reg)); \ } while(0) -#define fd_inb(port) inb((port)) +#define fd_inb(base, reg) inb((base) + (reg)) #define fd_request_irq() request_irq(IRQ_FLOPPYDISK,floppy_interrupt,\ 0,"floppy",NULL) #define fd_free_irq() free_irq(IRQ_FLOPPYDISK,NULL) diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index 98bdea51089d..82e96ac83684 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -7,7 +7,6 @@ #include <linux/export.h> #include <linux/sched.h> #include <linux/string.h> -#include <linux/cryptohash.h> #include <linux/delay.h> #include <linux/in6.h> #include <linux/syscalls.h> diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index deef17f34bd2..af0a8500a24e 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -55,6 +55,13 @@ void *module_alloc(unsigned long size) } #endif +bool module_init_section(const char *name) +{ + return strstarts(name, ".init") || + strstarts(name, ".ARM.extab.init") || + strstarts(name, ".ARM.exidx.init"); +} + bool module_exit_section(const char *name) { return strstarts(name, ".exit") || @@ -409,8 +416,17 @@ module_arch_cleanup(struct module *mod) #ifdef CONFIG_ARM_UNWIND int i; - for (i = 0; i < ARM_SEC_MAX; i++) - if (mod->arch.unwind[i]) - unwind_table_del(mod->arch.unwind[i]); + for (i = 0; i < ARM_SEC_MAX; i++) { + unwind_table_del(mod->arch.unwind[i]); + mod->arch.unwind[i] = NULL; + } +#endif +} + +void __weak module_arch_freeing_init(struct module *mod) +{ +#ifdef CONFIG_ARM_UNWIND + unwind_table_del(mod->arch.unwind[ARM_SEC_INIT]); + mod->arch.unwind[ARM_SEC_INIT] = NULL; #endif } diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 17bd32b22371..0203e545bbc8 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -253,20 +253,15 @@ asmlinkage long sys_oabi_epoll_ctl(int epfd, int op, int fd, { struct oabi_epoll_event user; struct epoll_event kernel; - mm_segment_t fs; - long ret; - if (op == EPOLL_CTL_DEL) - return sys_epoll_ctl(epfd, op, fd, NULL); - if (copy_from_user(&user, event, sizeof(user))) + if (ep_op_has_event(op) && + copy_from_user(&user, event, sizeof(user))) return -EFAULT; + kernel.events = user.events; kernel.data = user.data; - fs = get_fs(); - set_fs(KERNEL_DS); - ret = sys_epoll_ctl(epfd, op, fd, &kernel); - set_fs(fs); - return ret; + + return do_epoll_ctl(epfd, op, fd, &kernel, false); } asmlinkage long sys_oabi_epoll_wait(int epfd, diff --git a/arch/arm/mach-sa1100/shannon.c b/arch/arm/mach-sa1100/shannon.c index 5bc82e2671c6..351f891b4842 100644 --- a/arch/arm/mach-sa1100/shannon.c +++ b/arch/arm/mach-sa1100/shannon.c @@ -104,6 +104,14 @@ static struct fixed_voltage_config shannon_cf_vcc_pdata __initdata = { .enabled_at_boot = 1, }; +static struct gpiod_lookup_table shannon_display_gpio_table = { + .dev_id = "sa11x0-fb", + .table = { + GPIO_LOOKUP("gpio", 22, "shannon-lcden", GPIO_ACTIVE_HIGH), + { }, + }, +}; + static void __init shannon_init(void) { sa11x0_register_fixed_regulator(0, &shannon_cf_vcc_pdata, @@ -113,6 +121,7 @@ static void __init shannon_init(void) sa11x0_register_pcmcia(0, &shannon_pcmcia0_gpio_table); sa11x0_register_pcmcia(1, &shannon_pcmcia1_gpio_table); sa11x0_ppc_configure_mcp(); + gpiod_add_lookup_table(&shannon_display_gpio_table); sa11x0_register_lcd(&shannon_lcd_info); sa11x0_register_mtd(&shannon_flash_data, &shannon_flash_resource, 1); sa11x0_register_mcp(&shannon_mcp_data); diff --git a/arch/arm/mm/proc-macros.S b/arch/arm/mm/proc-macros.S index 5461d589a1e2..60ac7c5999a9 100644 --- a/arch/arm/mm/proc-macros.S +++ b/arch/arm/mm/proc-macros.S @@ -5,6 +5,7 @@ * VMA_VM_FLAGS * VM_EXEC */ +#include <linux/const.h> #include <asm/asm-offsets.h> #include <asm/thread_info.h> @@ -30,7 +31,7 @@ * act_mm - get current->active_mm */ .macro act_mm, rd - bic \rd, sp, #8128 + bic \rd, sp, #(THREAD_SIZE - 1) & ~63 bic \rd, \rd, #63 ldr \rd, [\rd, #TI_TASK] .if (TSK_ACTIVE_MM > IMM12_MASK) diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 4d1cf74a2caa..d5cae5ffede0 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -451,3 +451,4 @@ 435 common clone3 sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 5d513f461957..552d36cacc05 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -9,6 +9,7 @@ config ARM64 select ACPI_MCFG if (ACPI && PCI) select ACPI_SPCR_TABLE if ACPI select ACPI_PPTT if ACPI + select ARCH_BINFMT_ELF_STATE select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_DMA_PREP_COHERENT @@ -33,6 +34,7 @@ config ARM64 select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TEARDOWN_DMA_OPS if IOMMU_SUPPORT select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAVE_ELF_PROT select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK if !PREEMPTION select ARCH_INLINE_READ_LOCK_BH if !PREEMPTION @@ -62,9 +64,12 @@ config ARM64 select ARCH_INLINE_SPIN_UNLOCK_IRQRESTORE if !PREEMPTION select ARCH_KEEP_MEMBLOCK select ARCH_USE_CMPXCHG_LOCKREF + select ARCH_USE_GNU_PROPERTY select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_USE_SYM_ANNOTATIONS select ARCH_SUPPORTS_MEMORY_FAILURE + select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) select ARCH_SUPPORTS_NUMA_BALANCING @@ -525,13 +530,13 @@ config ARM64_ERRATUM_1418040 If unsure, say Y. -config ARM64_WORKAROUND_SPECULATIVE_AT_VHE +config ARM64_WORKAROUND_SPECULATIVE_AT bool config ARM64_ERRATUM_1165522 - bool "Cortex-A76: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" + bool "Cortex-A76: 1165522: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" default y - select ARM64_WORKAROUND_SPECULATIVE_AT_VHE + select ARM64_WORKAROUND_SPECULATIVE_AT help This option adds a workaround for ARM Cortex-A76 erratum 1165522. @@ -541,10 +546,23 @@ config ARM64_ERRATUM_1165522 If unsure, say Y. +config ARM64_ERRATUM_1319367 + bool "Cortex-A57/A72: 1319537: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" + default y + select ARM64_WORKAROUND_SPECULATIVE_AT + help + This option adds work arounds for ARM Cortex-A57 erratum 1319537 + and A72 erratum 1319367 + + Cortex-A57 and A72 cores could end-up with corrupted TLBs by + speculating an AT instruction during a guest context switch. + + If unsure, say Y. + config ARM64_ERRATUM_1530923 - bool "Cortex-A55: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" + bool "Cortex-A55: 1530923: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" default y - select ARM64_WORKAROUND_SPECULATIVE_AT_VHE + select ARM64_WORKAROUND_SPECULATIVE_AT help This option adds a workaround for ARM Cortex-A55 erratum 1530923. @@ -554,6 +572,9 @@ config ARM64_ERRATUM_1530923 If unsure, say Y. +config ARM64_WORKAROUND_REPEAT_TLBI + bool + config ARM64_ERRATUM_1286807 bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation" default y @@ -570,22 +591,6 @@ config ARM64_ERRATUM_1286807 invalidated has been observed by other observers. The workaround repeats the TLBI+DSB operation. -config ARM64_WORKAROUND_SPECULATIVE_AT_NVHE - bool - -config ARM64_ERRATUM_1319367 - bool "Cortex-A57/A72: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" - default y - select ARM64_WORKAROUND_SPECULATIVE_AT_NVHE - help - This option adds work arounds for ARM Cortex-A57 erratum 1319537 - and A72 erratum 1319367 - - Cortex-A57 and A72 cores could end-up with corrupted TLBs by - speculating an AT instruction during a guest context switch. - - If unsure, say Y. - config ARM64_ERRATUM_1463225 bool "Cortex-A76: Software Step might prevent interrupt recognition" default y @@ -695,6 +700,35 @@ config CAVIUM_TX2_ERRATUM_219 If unsure, say Y. +config FUJITSU_ERRATUM_010001 + bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly" + default y + help + This option adds a workaround for Fujitsu-A64FX erratum E#010001. + On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory + accesses may cause undefined fault (Data abort, DFSC=0b111111). + This fault occurs under a specific hardware condition when a + load/store instruction performs an address translation using: + case-1 TTBR0_EL1 with TCR_EL1.NFD0 == 1. + case-2 TTBR0_EL2 with TCR_EL2.NFD0 == 1. + case-3 TTBR1_EL1 with TCR_EL1.NFD1 == 1. + case-4 TTBR1_EL2 with TCR_EL2.NFD1 == 1. + + The workaround is to ensure these bits are clear in TCR_ELx. + The workaround only affects the Fujitsu-A64FX. + + If unsure, say Y. + +config HISILICON_ERRATUM_161600802 + bool "Hip07 161600802: Erroneous redistributor VLPI base" + default y + help + The HiSilicon Hip07 SoC uses the wrong redistributor base + when issued ITS commands such as VMOVP and VMAPP, and requires + a 128kB offset to be applied to the target address in this commands. + + If unsure, say Y. + config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y @@ -706,9 +740,6 @@ config QCOM_FALKOR_ERRATUM_1003 is unchanged. Work around the erratum by invalidating the walk cache entries for the trampoline before entering the kernel proper. -config ARM64_WORKAROUND_REPEAT_TLBI - bool - config QCOM_FALKOR_ERRATUM_1009 bool "Falkor E1009: Prematurely complete a DSB after a TLBI" default y @@ -730,25 +761,6 @@ config QCOM_QDF2400_ERRATUM_0065 If unsure, say Y. -config SOCIONEXT_SYNQUACER_PREITS - bool "Socionext Synquacer: Workaround for GICv3 pre-ITS" - default y - help - Socionext Synquacer SoCs implement a separate h/w block to generate - MSI doorbell writes with non-zero values for the device ID. - - If unsure, say Y. - -config HISILICON_ERRATUM_161600802 - bool "Hip07 161600802: Erroneous redistributor VLPI base" - default y - help - The HiSilicon Hip07 SoC uses the wrong redistributor base - when issued ITS commands such as VMOVP and VMAPP, and requires - a 128kB offset to be applied to the target address in this commands. - - If unsure, say Y. - config QCOM_FALKOR_ERRATUM_E1041 bool "Falkor E1041: Speculative instruction fetches might cause errant memory access" default y @@ -759,22 +771,12 @@ config QCOM_FALKOR_ERRATUM_E1041 If unsure, say Y. -config FUJITSU_ERRATUM_010001 - bool "Fujitsu-A64FX erratum E#010001: Undefined fault may occur wrongly" +config SOCIONEXT_SYNQUACER_PREITS + bool "Socionext Synquacer: Workaround for GICv3 pre-ITS" default y help - This option adds a workaround for Fujitsu-A64FX erratum E#010001. - On some variants of the Fujitsu-A64FX cores ver(1.0, 1.1), memory - accesses may cause undefined fault (Data abort, DFSC=0b111111). - This fault occurs under a specific hardware condition when a - load/store instruction performs an address translation using: - case-1 TTBR0_EL1 with TCR_EL1.NFD0 == 1. - case-2 TTBR0_EL2 with TCR_EL2.NFD0 == 1. - case-3 TTBR1_EL1 with TCR_EL1.NFD1 == 1. - case-4 TTBR1_EL2 with TCR_EL2.NFD1 == 1. - - The workaround is to ensure these bits are clear in TCR_ELx. - The workaround only affects the Fujitsu-A64FX. + Socionext Synquacer SoCs implement a separate h/w block to generate + MSI doorbell writes with non-zero values for the device ID. If unsure, say Y. @@ -1026,6 +1028,10 @@ config ARCH_HAS_CACHE_LINE_SIZE config ARCH_ENABLE_SPLIT_PMD_PTLOCK def_bool y if PGTABLE_LEVELS > 2 +# Supported by clang >= 7.0 +config CC_HAVE_SHADOW_CALL_STACK + def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18) + config SECCOMP bool "Enable seccomp to safely compute untrusted bytecode" ---help--- @@ -1585,6 +1591,48 @@ endmenu menu "ARMv8.5 architectural features" +config ARM64_BTI + bool "Branch Target Identification support" + default y + help + Branch Target Identification (part of the ARMv8.5 Extensions) + provides a mechanism to limit the set of locations to which computed + branch instructions such as BR or BLR can jump. + + To make use of BTI on CPUs that support it, say Y. + + BTI is intended to provide complementary protection to other control + flow integrity protection mechanisms, such as the Pointer + authentication mechanism provided as part of the ARMv8.3 Extensions. + For this reason, it does not make sense to enable this option without + also enabling support for pointer authentication. Thus, when + enabling this option you should also select ARM64_PTR_AUTH=y. + + Userspace binaries must also be specifically compiled to make use of + this mechanism. If you say N here or the hardware does not support + BTI, such binaries can still run, but you get no additional + enforcement of branch destinations. + +config ARM64_BTI_KERNEL + bool "Use Branch Target Identification for kernel" + default y + depends on ARM64_BTI + depends on ARM64_PTR_AUTH + depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI + # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 + depends on !CC_IS_GCC || GCC_VERSION >= 100100 + depends on !(CC_IS_CLANG && GCOV_KERNEL) + depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) + help + Build the kernel with Branch Target Identification annotations + and enable enforcement of this for kernel code. When this option + is enabled and the system supports BTI all kernel code including + modular code must have BTI enabled. + +config CC_HAS_BRANCH_PROT_PAC_RET_BTI + # GCC 9 or later, clang 8 or later + def_bool $(cc-option,-mbranch-protection=pac-ret+leaf+bti) + config ARM64_E0PD bool "Enable support for E0PD" default y @@ -1786,7 +1834,7 @@ config EFI select EFI_PARAMS_FROM_FDT select EFI_RUNTIME_WRAPPERS select EFI_STUB - select EFI_ARMSTUB + select EFI_GENERIC_STUB default y help This option provides support for runtime services provided diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 85e4149cc5d5..650e1185c190 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -12,7 +12,6 @@ LDFLAGS_vmlinux :=--no-undefined -X CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) -GZFLAGS :=-9 ifeq ($(CONFIG_RELOCATABLE), y) # Pass --no-apply-dynamic-relocs to restore pre-binutils-2.27 behaviour @@ -71,7 +70,14 @@ branch-prot-flags-y += $(call cc-option,-mbranch-protection=none) ifeq ($(CONFIG_ARM64_PTR_AUTH),y) branch-prot-flags-$(CONFIG_CC_HAS_SIGN_RETURN_ADDRESS) := -msign-return-address=all +# We enable additional protection for leaf functions as there is some +# narrow potential for ROP protection benefits and no substantial +# performance impact has been observed. +ifeq ($(CONFIG_ARM64_BTI_KERNEL),y) +branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET_BTI) := -mbranch-protection=pac-ret+leaf+bti +else branch-prot-flags-$(CONFIG_CC_HAS_BRANCH_PROT_PAC_RET) := -mbranch-protection=pac-ret+leaf +endif # -march=armv8.3-a enables the non-nops instructions for PAC, to avoid the # compiler to generate them and consequently to break the single image contract # we pass it only to the assembler. This option is utilized only in case of non @@ -81,6 +87,10 @@ endif KBUILD_CFLAGS += $(branch-prot-flags-y) +ifeq ($(CONFIG_SHADOW_CALL_STACK), y) +KBUILD_CFLAGS += -ffixed-x18 +endif + ifeq ($(CONFIG_CPU_BIG_ENDIAN), y) KBUILD_CPPFLAGS += -mbig-endian CHECKFLAGS += -D__AARCH64EB__ @@ -118,7 +128,7 @@ TEXT_OFFSET := $(shell awk "BEGIN {srand(); printf \"0x%06x\n\", \ int(2 * 1024 * 1024 / (2 ^ $(CONFIG_ARM64_PAGE_SHIFT)) * \ rand()) * (2 ^ $(CONFIG_ARM64_PAGE_SHIFT))}") else -TEXT_OFFSET := 0x00080000 +TEXT_OFFSET := 0x0 endif ifeq ($(CONFIG_KASAN_SW_TAGS), y) @@ -131,7 +141,7 @@ KBUILD_CFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_CPPFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) KBUILD_AFLAGS += -DKASAN_SHADOW_SCALE_SHIFT=$(KASAN_SHADOW_SCALE_SHIFT) -export TEXT_OFFSET GZFLAGS +export TEXT_OFFSET core-y += arch/arm64/ libs-y := arch/arm64/lib/ $(libs-y) diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index ed5409c6abf4..395bbf64b2ab 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -158,7 +158,6 @@ static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm, unsigned int key_len) { struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); - SHASH_DESC_ON_STACK(desc, ctx->hash); u8 digest[SHA256_DIGEST_SIZE]; int ret; @@ -166,8 +165,7 @@ static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm, if (ret) return ret; - desc->tfm = ctx->hash; - crypto_shash_digest(desc, in_key, key_len, digest); + crypto_shash_tfm_digest(ctx->hash, in_key, key_len, digest); return aes_expandkey(&ctx->key2, digest, sizeof(digest)); } diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S index 5a95c2628fbf..111d9c9abddd 100644 --- a/arch/arm64/crypto/crct10dif-ce-core.S +++ b/arch/arm64/crypto/crct10dif-ce-core.S @@ -66,7 +66,7 @@ #include <asm/assembler.h> .text - .cpu generic+crypto + .arch armv8-a+crypto init_crc .req w19 buf .req x20 diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c index ddf4a0d85c1c..77bc6e72abae 100644 --- a/arch/arm64/crypto/sha256-glue.c +++ b/arch/arm64/crypto/sha256-glue.c @@ -12,7 +12,6 @@ #include <crypto/internal/simd.h> #include <crypto/sha.h> #include <crypto/sha256_base.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <linux/string.h> diff --git a/arch/arm64/crypto/sha512-glue.c b/arch/arm64/crypto/sha512-glue.c index 78d3083de6b7..370ccb29602f 100644 --- a/arch/arm64/crypto/sha512-glue.c +++ b/arch/arm64/crypto/sha512-glue.c @@ -6,7 +6,6 @@ */ #include <crypto/internal/hash.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <linux/string.h> #include <crypto/sha.h> diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index ce2a8486992b..52dead2a8640 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -39,25 +39,58 @@ alternative_if ARM64_HAS_GENERIC_AUTH alternative_else_nop_endif .endm - .macro ptrauth_keys_install_kernel tsk, sync, tmp1, tmp2, tmp3 -alternative_if ARM64_HAS_ADDRESS_AUTH + .macro __ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 mov \tmp1, #THREAD_KEYS_KERNEL add \tmp1, \tsk, \tmp1 ldp \tmp2, \tmp3, [\tmp1, #PTRAUTH_KERNEL_KEY_APIA] msr_s SYS_APIAKEYLO_EL1, \tmp2 msr_s SYS_APIAKEYHI_EL1, \tmp3 - .if \sync == 1 + .endm + + .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 +alternative_if ARM64_HAS_ADDRESS_AUTH + __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3 +alternative_else_nop_endif + .endm + + .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3 +alternative_if ARM64_HAS_ADDRESS_AUTH + __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3 isb - .endif alternative_else_nop_endif .endm + .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 + mrs \tmp1, id_aa64isar1_el1 + ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8 + cbz \tmp1, .Lno_addr_auth\@ + mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ + SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) + mrs \tmp2, sctlr_el1 + orr \tmp2, \tmp2, \tmp1 + msr sctlr_el1, \tmp2 + __ptrauth_keys_install_kernel_nosync \tsk, \tmp1, \tmp2, \tmp3 + isb +.Lno_addr_auth\@: + .endm + + .macro ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 +alternative_if_not ARM64_HAS_ADDRESS_AUTH + b .Lno_addr_auth\@ +alternative_else_nop_endif + __ptrauth_keys_init_cpu \tsk, \tmp1, \tmp2, \tmp3 +.Lno_addr_auth\@: + .endm + #else /* CONFIG_ARM64_PTR_AUTH */ .macro ptrauth_keys_install_user tsk, tmp1, tmp2, tmp3 .endm - .macro ptrauth_keys_install_kernel tsk, sync, tmp1, tmp2, tmp3 + .macro ptrauth_keys_install_kernel_nosync tsk, tmp1, tmp2, tmp3 + .endm + + .macro ptrauth_keys_install_kernel tsk, tmp1, tmp2, tmp3 .endm #endif /* CONFIG_ARM64_PTR_AUTH */ diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 0bff325117b4..54d181177656 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -736,4 +736,54 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .Lyield_out_\@ : .endm +/* + * This macro emits a program property note section identifying + * architecture features which require special handling, mainly for + * use in assembly files included in the VDSO. + */ + +#define NT_GNU_PROPERTY_TYPE_0 5 +#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000 + +#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0) +#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1) + +#ifdef CONFIG_ARM64_BTI_KERNEL +#define GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT \ + ((GNU_PROPERTY_AARCH64_FEATURE_1_BTI | \ + GNU_PROPERTY_AARCH64_FEATURE_1_PAC)) +#endif + +#ifdef GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT +.macro emit_aarch64_feature_1_and, feat=GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT + .pushsection .note.gnu.property, "a" + .align 3 + .long 2f - 1f + .long 6f - 3f + .long NT_GNU_PROPERTY_TYPE_0 +1: .string "GNU" +2: + .align 3 +3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND + .long 5f - 4f +4: + /* + * This is described with an array of char in the Linux API + * spec but the text and all other usage (including binutils, + * clang and GCC) treat this as a 32 bit value so no swizzling + * is required for big endian. + */ + .long \feat +5: + .align 3 +6: + .popsection +.endm + +#else +.macro emit_aarch64_feature_1_and, feat=0 +.endm + +#endif /* GNU_PROPERTY_AARCH64_FEATURE_1_DEFAULT */ + #endif /* __ASM_ASSEMBLER_H */ diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index e6cca3d4acf7..ce50c1f1f1ea 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -79,7 +79,7 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) * IPI all online CPUs so that they undergo a context synchronization * event and are forced to refetch the new instructions. */ -#ifdef CONFIG_KGDB + /* * KGDB performs cache maintenance with interrupts disabled, so we * will deadlock trying to IPI the secondary CPUs. In theory, we can @@ -89,9 +89,9 @@ static inline void flush_icache_range(unsigned long start, unsigned long end) * the patching operation, so we don't need extra IPIs here anyway. * In which case, add a KGDB-specific bodge and return early. */ - if (kgdb_connected && irqs_disabled()) + if (in_dbg_master()) return; -#endif + kick_all_cpus_sync(); } diff --git a/arch/arm64/include/asm/compiler.h b/arch/arm64/include/asm/compiler.h index eece20d2c55f..51a7ce87cdfe 100644 --- a/arch/arm64/include/asm/compiler.h +++ b/arch/arm64/include/asm/compiler.h @@ -2,8 +2,6 @@ #ifndef __ASM_COMPILER_H #define __ASM_COMPILER_H -#if defined(CONFIG_ARM64_PTR_AUTH) - /* * The EL0/EL1 pointer bits used by a pointer authentication code. * This is dependent on TBI0/TBI1 being enabled, or bits 63:56 would also apply. @@ -19,6 +17,4 @@ #define __builtin_return_address(val) \ (void *)(ptrauth_clear_pac((unsigned long)__builtin_return_address(val))) -#endif /* CONFIG_ARM64_PTR_AUTH */ - #endif /* __ASM_COMPILER_H */ diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h index b4a40535a3d8..7faae6ff3ab4 100644 --- a/arch/arm64/include/asm/cpu.h +++ b/arch/arm64/include/asm/cpu.h @@ -33,6 +33,7 @@ struct cpuinfo_arm64 { u64 reg_id_aa64zfr0; u32 reg_id_dfr0; + u32 reg_id_dfr1; u32 reg_id_isar0; u32 reg_id_isar1; u32 reg_id_isar2; @@ -44,8 +45,11 @@ struct cpuinfo_arm64 { u32 reg_id_mmfr1; u32 reg_id_mmfr2; u32 reg_id_mmfr3; + u32 reg_id_mmfr4; + u32 reg_id_mmfr5; u32 reg_id_pfr0; u32 reg_id_pfr1; + u32 reg_id_pfr2; u32 reg_mvfr0; u32 reg_mvfr1; diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index 8eb5a088ae65..d7b3bb0cb180 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -44,7 +44,7 @@ #define ARM64_SSBS 34 #define ARM64_WORKAROUND_1418040 35 #define ARM64_HAS_SB 36 -#define ARM64_WORKAROUND_SPECULATIVE_AT_VHE 37 +#define ARM64_WORKAROUND_SPECULATIVE_AT 37 #define ARM64_HAS_ADDRESS_AUTH_ARCH 38 #define ARM64_HAS_ADDRESS_AUTH_IMP_DEF 39 #define ARM64_HAS_GENERIC_AUTH_ARCH 40 @@ -55,13 +55,14 @@ #define ARM64_WORKAROUND_CAVIUM_TX2_219_TVM 45 #define ARM64_WORKAROUND_CAVIUM_TX2_219_PRFM 46 #define ARM64_WORKAROUND_1542419 47 -#define ARM64_WORKAROUND_SPECULATIVE_AT_NVHE 48 -#define ARM64_HAS_E0PD 49 -#define ARM64_HAS_RNG 50 -#define ARM64_HAS_AMU_EXTN 51 -#define ARM64_HAS_ADDRESS_AUTH 52 -#define ARM64_HAS_GENERIC_AUTH 53 +#define ARM64_HAS_E0PD 48 +#define ARM64_HAS_RNG 49 +#define ARM64_HAS_AMU_EXTN 50 +#define ARM64_HAS_ADDRESS_AUTH 51 +#define ARM64_HAS_GENERIC_AUTH 52 +#define ARM64_HAS_32BIT_EL1 53 +#define ARM64_BTI 54 -#define ARM64_NCAPS 54 +#define ARM64_NCAPS 55 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index afe08251ff95..5d1f4ae42799 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -551,6 +551,13 @@ static inline bool id_aa64mmfr0_mixed_endian_el0(u64 mmfr0) cpuid_feature_extract_unsigned_field(mmfr0, ID_AA64MMFR0_BIGENDEL0_SHIFT) == 0x1; } +static inline bool id_aa64pfr0_32bit_el1(u64 pfr0) +{ + u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT); + + return val == ID_AA64PFR0_EL1_32BIT_64BIT; +} + static inline bool id_aa64pfr0_32bit_el0(u64 pfr0) { u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT); @@ -680,6 +687,11 @@ static inline bool system_has_prio_mask_debugging(void) system_uses_irq_prio_masking(); } +static inline bool system_supports_bti(void) +{ + return IS_ENABLED(CONFIG_ARM64_BTI) && cpus_have_const_cap(ARM64_BTI); +} + #define ARM64_BP_HARDEN_UNKNOWN -1 #define ARM64_BP_HARDEN_WA_NEEDED 0 #define ARM64_BP_HARDEN_NOT_REQUIRED 1 @@ -745,6 +757,24 @@ static inline bool cpu_has_hw_af(void) extern bool cpu_has_amu_feat(int cpu); #endif +static inline unsigned int get_vmid_bits(u64 mmfr1) +{ + int vmid_bits; + + vmid_bits = cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_VMIDBITS_SHIFT); + if (vmid_bits == ID_AA64MMFR1_VMIDBITS_16) + return 16; + + /* + * Return the default here even if any reserved + * value is fetched from the system register. + */ + return 8; +} + +u32 get_kvm_ipa_limit(void); + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 7619f473155f..e5ceea213e39 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -125,5 +125,7 @@ static inline int reinstall_suspended_bps(struct pt_regs *regs) int aarch32_break_handler(struct pt_regs *regs); +void debug_traps_init(void); + #endif /* __ASSEMBLY */ #endif /* __ASM_DEBUG_MONITORS_H */ diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 45e821222774..d4ab3f73e7a3 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -86,14 +86,6 @@ static inline unsigned long efi_get_max_initrd_addr(unsigned long dram_base, return (image_addr & ~(SZ_1G - 1UL)) + (1UL << (VA_BITS_MIN - 1)); } -#define efi_bs_call(func, ...) efi_system_table()->boottime->func(__VA_ARGS__) -#define efi_rt_call(func, ...) efi_system_table()->runtime->func(__VA_ARGS__) -#define efi_is_native() (true) - -#define efi_table_attr(inst, attr) (inst->attr) - -#define efi_call_proto(inst, func, ...) inst->func(inst, ##__VA_ARGS__) - #define alloc_screen_info(x...) &screen_info static inline void free_screen_info(struct screen_info *si) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index b618017205a3..4f00d50585a4 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -114,7 +114,11 @@ #ifndef __ASSEMBLY__ +#include <uapi/linux/elf.h> #include <linux/bug.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/types.h> #include <asm/processor.h> /* for signal_minsigstksz, used by ARCH_DLINFO */ typedef unsigned long elf_greg_t; @@ -224,6 +228,52 @@ extern int aarch32_setup_additional_pages(struct linux_binprm *bprm, #endif /* CONFIG_COMPAT */ +struct arch_elf_state { + int flags; +}; + +#define ARM64_ELF_BTI (1 << 0) + +#define INIT_ARCH_ELF_STATE { \ + .flags = 0, \ +} + +static inline int arch_parse_elf_property(u32 type, const void *data, + size_t datasz, bool compat, + struct arch_elf_state *arch) +{ + /* No known properties for AArch32 yet */ + if (IS_ENABLED(CONFIG_COMPAT) && compat) + return 0; + + if (type == GNU_PROPERTY_AARCH64_FEATURE_1_AND) { + const u32 *p = data; + + if (datasz != sizeof(*p)) + return -ENOEXEC; + + if (system_supports_bti() && + (*p & GNU_PROPERTY_AARCH64_FEATURE_1_BTI)) + arch->flags |= ARM64_ELF_BTI; + } + + return 0; +} + +static inline int arch_elf_pt_proc(void *ehdr, void *phdr, + struct file *f, bool is_interp, + struct arch_elf_state *state) +{ + return 0; +} + +static inline int arch_check_elf(void *ehdr, bool has_interp, + void *interp_ehdr, + struct arch_elf_state *state) +{ + return 0; +} + #endif /* !__ASSEMBLY__ */ #endif diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 6a395a7e6707..035003acfa87 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -22,7 +22,7 @@ #define ESR_ELx_EC_PAC (0x09) /* EL2 and above */ /* Unallocated EC: 0x0A - 0x0B */ #define ESR_ELx_EC_CP14_64 (0x0C) -/* Unallocated EC: 0x0d */ +#define ESR_ELx_EC_BTI (0x0D) #define ESR_ELx_EC_ILL (0x0E) /* Unallocated EC: 0x0F - 0x10 */ #define ESR_ELx_EC_SVC32 (0x11) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index 7a6e81ca23a8..7577a754d443 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -34,6 +34,7 @@ static inline u32 disr_to_esr(u64 disr) asmlinkage void enter_from_user_mode(void); void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); void do_undefinstr(struct pt_regs *regs); +void do_bti(struct pt_regs *regs); asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr); void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, struct pt_regs *regs); diff --git a/arch/arm64/include/asm/hardirq.h b/arch/arm64/include/asm/hardirq.h index 87ad961f3c97..985493af704b 100644 --- a/arch/arm64/include/asm/hardirq.h +++ b/arch/arm64/include/asm/hardirq.h @@ -32,30 +32,70 @@ u64 smp_irq_stat_cpu(unsigned int cpu); struct nmi_ctx { u64 hcr; + unsigned int cnt; }; DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts); -#define arch_nmi_enter() \ - do { \ - if (is_kernel_in_hyp_mode()) { \ - struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ - nmi_ctx->hcr = read_sysreg(hcr_el2); \ - if (!(nmi_ctx->hcr & HCR_TGE)) { \ - write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \ - isb(); \ - } \ - } \ - } while (0) +#define arch_nmi_enter() \ +do { \ + struct nmi_ctx *___ctx; \ + u64 ___hcr; \ + \ + if (!is_kernel_in_hyp_mode()) \ + break; \ + \ + ___ctx = this_cpu_ptr(&nmi_contexts); \ + if (___ctx->cnt) { \ + ___ctx->cnt++; \ + break; \ + } \ + \ + ___hcr = read_sysreg(hcr_el2); \ + if (!(___hcr & HCR_TGE)) { \ + write_sysreg(___hcr | HCR_TGE, hcr_el2); \ + isb(); \ + } \ + /* \ + * Make sure the sysreg write is performed before ___ctx->cnt \ + * is set to 1. NMIs that see cnt == 1 will rely on us. \ + */ \ + barrier(); \ + ___ctx->cnt = 1; \ + /* \ + * Make sure ___ctx->cnt is set before we save ___hcr. We \ + * don't want ___ctx->hcr to be overwritten. \ + */ \ + barrier(); \ + ___ctx->hcr = ___hcr; \ +} while (0) -#define arch_nmi_exit() \ - do { \ - if (is_kernel_in_hyp_mode()) { \ - struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ - if (!(nmi_ctx->hcr & HCR_TGE)) \ - write_sysreg(nmi_ctx->hcr, hcr_el2); \ - } \ - } while (0) +#define arch_nmi_exit() \ +do { \ + struct nmi_ctx *___ctx; \ + u64 ___hcr; \ + \ + if (!is_kernel_in_hyp_mode()) \ + break; \ + \ + ___ctx = this_cpu_ptr(&nmi_contexts); \ + ___hcr = ___ctx->hcr; \ + /* \ + * Make sure we read ___ctx->hcr before we release \ + * ___ctx->cnt as it makes ___ctx->hcr updatable again. \ + */ \ + barrier(); \ + ___ctx->cnt--; \ + /* \ + * Make sure ___ctx->cnt release is visible before we \ + * restore the sysreg. Otherwise a new NMI occurring \ + * right after write_sysreg() can be fooled and think \ + * we secured things for it. \ + */ \ + barrier(); \ + if (!___ctx->cnt && !(___hcr & HCR_TGE)) \ + write_sysreg(___hcr, hcr_el2); \ +} while (0) static inline void ack_bad_irq(unsigned int irq) { diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 0f00265248b5..d683bcbf1e7c 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -94,6 +94,7 @@ #define KERNEL_HWCAP_BF16 __khwcap2_feature(BF16) #define KERNEL_HWCAP_DGH __khwcap2_feature(DGH) #define KERNEL_HWCAP_RNG __khwcap2_feature(RNG) +#define KERNEL_HWCAP_BTI __khwcap2_feature(BTI) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index bb313dde58a4..0bc46149e491 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -39,13 +39,37 @@ enum aarch64_insn_encoding_class { * system instructions */ }; -enum aarch64_insn_hint_op { +enum aarch64_insn_hint_cr_op { AARCH64_INSN_HINT_NOP = 0x0 << 5, AARCH64_INSN_HINT_YIELD = 0x1 << 5, AARCH64_INSN_HINT_WFE = 0x2 << 5, AARCH64_INSN_HINT_WFI = 0x3 << 5, AARCH64_INSN_HINT_SEV = 0x4 << 5, AARCH64_INSN_HINT_SEVL = 0x5 << 5, + + AARCH64_INSN_HINT_XPACLRI = 0x07 << 5, + AARCH64_INSN_HINT_PACIA_1716 = 0x08 << 5, + AARCH64_INSN_HINT_PACIB_1716 = 0x0A << 5, + AARCH64_INSN_HINT_AUTIA_1716 = 0x0C << 5, + AARCH64_INSN_HINT_AUTIB_1716 = 0x0E << 5, + AARCH64_INSN_HINT_PACIAZ = 0x18 << 5, + AARCH64_INSN_HINT_PACIASP = 0x19 << 5, + AARCH64_INSN_HINT_PACIBZ = 0x1A << 5, + AARCH64_INSN_HINT_PACIBSP = 0x1B << 5, + AARCH64_INSN_HINT_AUTIAZ = 0x1C << 5, + AARCH64_INSN_HINT_AUTIASP = 0x1D << 5, + AARCH64_INSN_HINT_AUTIBZ = 0x1E << 5, + AARCH64_INSN_HINT_AUTIBSP = 0x1F << 5, + + AARCH64_INSN_HINT_ESB = 0x10 << 5, + AARCH64_INSN_HINT_PSB = 0x11 << 5, + AARCH64_INSN_HINT_TSB = 0x12 << 5, + AARCH64_INSN_HINT_CSDB = 0x14 << 5, + + AARCH64_INSN_HINT_BTI = 0x20 << 5, + AARCH64_INSN_HINT_BTIC = 0x22 << 5, + AARCH64_INSN_HINT_BTIJ = 0x24 << 5, + AARCH64_INSN_HINT_BTIJC = 0x26 << 5, }; enum aarch64_insn_imm_type { @@ -344,7 +368,7 @@ __AARCH64_INSN_FUNCS(msr_reg, 0xFFF00000, 0xD5100000) #undef __AARCH64_INSN_FUNCS -bool aarch64_insn_is_nop(u32 insn); +bool aarch64_insn_is_steppable_hint(u32 insn); bool aarch64_insn_is_branch_imm(u32 insn); static inline bool aarch64_insn_is_adr_adrp(u32 insn) @@ -370,7 +394,7 @@ u32 aarch64_insn_gen_comp_branch_imm(unsigned long pc, unsigned long addr, enum aarch64_insn_branch_type type); u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, enum aarch64_insn_condition cond); -u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_op op); +u32 aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op); u32 aarch64_insn_gen_nop(void); u32 aarch64_insn_gen_branch_reg(enum aarch64_insn_register reg, enum aarch64_insn_branch_type type); diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 7c7eeeaab9fa..0c9b5fc4ba0a 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -64,12 +64,14 @@ extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern void __kvm_tlb_flush_local_vmid(struct kvm_vcpu *vcpu); -extern void __kvm_timer_set_cntvoff(u32 cntvoff_low, u32 cntvoff_high); +extern void __kvm_timer_set_cntvoff(u64 cntvoff); extern int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu); extern int __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu); +extern void __kvm_enable_ssbs(void); + extern u64 __vgic_v3_get_ich_vtr_el2(void); extern u64 __vgic_v3_read_vmcr(void); extern void __vgic_v3_write_vmcr(u32 vmcr); diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index a30b4eec7cb4..6ea53e6e8b26 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -507,10 +507,12 @@ static inline unsigned long vcpu_data_host_to_guest(struct kvm_vcpu *vcpu, static __always_inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) { - if (vcpu_mode_is_32bit(vcpu)) + if (vcpu_mode_is_32bit(vcpu)) { kvm_skip_instr32(vcpu, is_wide_instr); - else + } else { *vcpu_pc(vcpu) += 4; + *vcpu_cpsr(vcpu) &= ~PSR_BTYPE_MASK; + } /* advance the singlestep state machine */ *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS; diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 32c8a675e5a4..abbdf9703e20 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -46,6 +46,9 @@ #define KVM_REQ_RECORD_STEAL KVM_ARCH_REQ(3) #define KVM_REQ_RELOAD_GICv4 KVM_ARCH_REQ(4) +#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \ + KVM_DIRTY_LOG_INITIALLY_SET) + DECLARE_STATIC_KEY_FALSE(userspace_irqchip_in_use); extern unsigned int kvm_sve_max_vl; @@ -112,12 +115,8 @@ struct kvm_vcpu_fault_info { u64 disr_el1; /* Deferred [SError] Status Register */ }; -/* - * 0 is reserved as an invalid value. - * Order should be kept in sync with the save/restore code. - */ enum vcpu_sysreg { - __INVALID_SYSREG__, + __INVALID_SYSREG__, /* 0 is reserved as an invalid value */ MPIDR_EL1, /* MultiProcessor Affinity Register */ CSSELR_EL1, /* Cache Size Selection Register */ SCTLR_EL1, /* System Control Register */ @@ -415,6 +414,8 @@ struct kvm_vm_stat { struct kvm_vcpu_stat { u64 halt_successful_poll; u64 halt_attempted_poll; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; u64 halt_poll_invalid; u64 halt_wakeup; u64 hvc_exit_stat; @@ -530,39 +531,6 @@ static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt) cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr(); } -void __kvm_enable_ssbs(void); - -static inline void __cpu_init_hyp_mode(phys_addr_t pgd_ptr, - unsigned long hyp_stack_ptr, - unsigned long vector_ptr) -{ - /* - * Calculate the raw per-cpu offset without a translation from the - * kernel's mapping to the linear mapping, and store it in tpidr_el2 - * so that we can use adr_l to access per-cpu variables in EL2. - */ - u64 tpidr_el2 = ((u64)this_cpu_ptr(&kvm_host_data) - - (u64)kvm_ksym_ref(kvm_host_data)); - - /* - * Call initialization code, and switch to the full blown HYP code. - * If the cpucaps haven't been finalized yet, something has gone very - * wrong, and hyp will crash and burn when it uses any - * cpus_have_const_cap() wrapper. - */ - BUG_ON(!system_capabilities_finalized()); - __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); - - /* - * Disabling SSBD on a non-VHE system requires us to enable SSBS - * at EL2. - */ - if (!has_vhe() && this_cpu_has_cap(ARM64_SSBS) && - arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { - kvm_call_hyp(__kvm_enable_ssbs); - } -} - static inline bool kvm_arch_requires_vhe(void) { /* @@ -573,10 +541,6 @@ static inline bool kvm_arch_requires_vhe(void) if (system_supports_sve()) return true; - /* Some implementations have defects that confine them to VHE */ - if (cpus_have_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) - return true; - return false; } @@ -598,8 +562,6 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); -static inline void __cpu_init_stage2(void) {} - /* Guest/host FPSIMD coordination helpers */ int kvm_arch_vcpu_run_map_fp(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_load_fp(struct kvm_vcpu *vcpu); @@ -670,7 +632,7 @@ static inline int kvm_arm_have_ssbd(void) void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); -void kvm_set_ipa_limit(void); +int kvm_set_ipa_limit(void); #define __KVM_HAVE_ARCH_VM_ALLOC struct kvm *kvm_arch_alloc_vm(void); diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index fe57f60f06a8..ce3080834bfa 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -10,10 +10,9 @@ #include <linux/compiler.h> #include <linux/kvm_host.h> #include <asm/alternative.h> -#include <asm/kvm_mmu.h> #include <asm/sysreg.h> -#define __hyp_text __section(.hyp.text) notrace +#define __hyp_text __section(.hyp.text) notrace __noscs #define read_sysreg_elx(r,nvh,vh) \ ({ \ @@ -56,12 +55,12 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); -void __vgic_v3_save_state(struct kvm_vcpu *vcpu); -void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); -void __vgic_v3_activate_traps(struct kvm_vcpu *vcpu); -void __vgic_v3_deactivate_traps(struct kvm_vcpu *vcpu); -void __vgic_v3_save_aprs(struct kvm_vcpu *vcpu); -void __vgic_v3_restore_aprs(struct kvm_vcpu *vcpu); +void __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if); +void __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if); int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); void __timer_enable_traps(struct kvm_vcpu *vcpu); @@ -88,22 +87,5 @@ void deactivate_traps_vhe_put(void); u64 __guest_enter(struct kvm_vcpu *vcpu, struct kvm_cpu_context *host_ctxt); void __noreturn __hyp_do_panic(unsigned long, ...); -/* - * Must be called from hyp code running at EL2 with an updated VTTBR - * and interrupts disabled. - */ -static __always_inline void __hyp_text __load_guest_stage2(struct kvm *kvm) -{ - write_sysreg(kvm->arch.vtcr, vtcr_el2); - write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); - - /* - * ARM errata 1165522 and 1530923 require the actual execution of the - * above before we can switch to the EL1/EL0 translation regime used by - * the guest. - */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE)); -} - #endif /* __ARM64_KVM_HYP_H__ */ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 30b0e8d6b895..324c8483d2b9 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -363,8 +363,6 @@ static inline void __kvm_flush_dcache_pud(pud_t pud) } } -#define kvm_virt_to_phys(x) __pa_symbol(x) - void kvm_set_way_flush(struct kvm_vcpu *vcpu); void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled); @@ -416,7 +414,7 @@ static inline unsigned int kvm_get_vmid_bits(void) { int reg = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); - return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; + return get_vmid_bits(reg); } /* @@ -473,7 +471,7 @@ static inline int kvm_write_guest_lock(struct kvm *kvm, gpa_t gpa, extern void *__kvm_bp_vect_base; extern int __kvm_harden_el2_vector_slot; -/* This is only called on a VHE system */ +/* This is called on both VHE and !VHE systems */ static inline void *kvm_get_hyp_vector(void) { struct bp_hardening_data *data = arm64_get_bp_hardening_data(); @@ -604,5 +602,22 @@ static __always_inline u64 kvm_get_vttbr(struct kvm *kvm) return kvm_phys_to_vttbr(baddr) | vmid_field | cnp; } +/* + * Must be called from hyp code running at EL2 with an updated VTTBR + * and interrupts disabled. + */ +static __always_inline void __load_guest_stage2(struct kvm *kvm) +{ + write_sysreg(kvm->arch.vtcr, vtcr_el2); + write_sysreg(kvm_get_vttbr(kvm), vttbr_el2); + + /* + * ARM errata 1165522 and 1530923 require the actual execution of the + * above before we can switch to the EL1/EL0 translation regime used by + * the guest. + */ + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); +} + #endif /* __ASSEMBLY__ */ #endif /* __ARM64_KVM_MMU_H__ */ diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index ebee3113a62f..81fefd2a1d02 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -4,6 +4,52 @@ #define __ALIGN .align 2 #define __ALIGN_STR ".align 2" +#if defined(CONFIG_ARM64_BTI_KERNEL) && defined(__aarch64__) + +/* + * Since current versions of gas reject the BTI instruction unless we + * set the architecture version to v8.5 we use the hint instruction + * instead. + */ +#define BTI_C hint 34 ; +#define BTI_J hint 36 ; + +/* + * When using in-kernel BTI we need to ensure that PCS-conformant assembly + * functions have suitable annotations. Override SYM_FUNC_START to insert + * a BTI landing pad at the start of everything. + */ +#define SYM_FUNC_START(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) \ + BTI_C + +#define SYM_FUNC_START_NOALIGN(name) \ + SYM_START(name, SYM_L_GLOBAL, SYM_A_NONE) \ + BTI_C + +#define SYM_FUNC_START_LOCAL(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_ALIGN) \ + BTI_C + +#define SYM_FUNC_START_LOCAL_NOALIGN(name) \ + SYM_START(name, SYM_L_LOCAL, SYM_A_NONE) \ + BTI_C + +#define SYM_FUNC_START_WEAK(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_ALIGN) \ + BTI_C + +#define SYM_FUNC_START_WEAK_NOALIGN(name) \ + SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ + BTI_C + +#define SYM_INNER_LABEL(name, linkage) \ + .type name SYM_T_NONE ASM_NL \ + SYM_ENTRY(name, linkage, SYM_A_NONE) \ + BTI_J + +#endif + /* * Annotate a function as position independent, i.e., safe to be called before * the kernel virtual mapping is activated. diff --git a/arch/arm64/include/asm/mman.h b/arch/arm64/include/asm/mman.h new file mode 100644 index 000000000000..081ec8de9ea6 --- /dev/null +++ b/arch/arm64/include/asm/mman.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_MMAN_H__ +#define __ASM_MMAN_H__ + +#include <linux/compiler.h> +#include <linux/types.h> +#include <uapi/asm/mman.h> + +static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot, + unsigned long pkey __always_unused) +{ + if (system_supports_bti() && (prot & PROT_BTI)) + return VM_ARM64_BTI; + + return 0; +} +#define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey) + +static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags) +{ + return (vm_flags & VM_ARM64_BTI) ? __pgprot(PTE_GP) : __pgprot(0); +} +#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags) + +static inline bool arch_validate_prot(unsigned long prot, + unsigned long addr __always_unused) +{ + unsigned long supported = PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM; + + if (system_supports_bti()) + supported |= PROT_BTI; + + return (prot & ~supported) == 0; +} +#define arch_validate_prot(prot, addr) arch_validate_prot(prot, addr) + +#endif /* ! __ASM_MMAN_H__ */ diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 6bf5e650da78..9c91a8f93a0e 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -151,6 +151,7 @@ #define PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */ #define PTE_AF (_AT(pteval_t, 1) << 10) /* Access Flag */ #define PTE_NG (_AT(pteval_t, 1) << 11) /* nG */ +#define PTE_GP (_AT(pteval_t, 1) << 50) /* BTI guarded */ #define PTE_DBM (_AT(pteval_t, 1) << 51) /* Dirty Bit Management */ #define PTE_CONT (_AT(pteval_t, 1) << 52) /* Contiguous range */ #define PTE_PXN (_AT(pteval_t, 1) << 53) /* Privileged XN */ @@ -190,7 +191,6 @@ * Memory Attribute override for Stage-2 (MemAttr[3:0]) */ #define PTE_S2_MEMATTR(t) (_AT(pteval_t, (t)) << 2) -#define PTE_S2_MEMATTR_MASK (_AT(pteval_t, 0xf) << 2) /* * EL2/HYP PTE/PMD definitions diff --git a/arch/arm64/include/asm/pgtable-prot.h b/arch/arm64/include/asm/pgtable-prot.h index 1305e28225fc..2e7e0f452301 100644 --- a/arch/arm64/include/asm/pgtable-prot.h +++ b/arch/arm64/include/asm/pgtable-prot.h @@ -21,6 +21,7 @@ #ifndef __ASSEMBLY__ +#include <asm/cpufeature.h> #include <asm/pgtable-types.h> extern bool arm64_use_ng_mappings; @@ -31,6 +32,16 @@ extern bool arm64_use_ng_mappings; #define PTE_MAYBE_NG (arm64_use_ng_mappings ? PTE_NG : 0) #define PMD_MAYBE_NG (arm64_use_ng_mappings ? PMD_SECT_NG : 0) +/* + * If we have userspace only BTI we don't want to mark kernel pages + * guarded even if the system does support BTI. + */ +#ifdef CONFIG_ARM64_BTI_KERNEL +#define PTE_MAYBE_GP (system_supports_bti() ? PTE_GP : 0) +#else +#define PTE_MAYBE_GP 0 +#endif + #define PROT_DEFAULT (_PROT_DEFAULT | PTE_MAYBE_NG) #define PROT_SECT_DEFAULT (_PROT_SECT_DEFAULT | PMD_MAYBE_NG) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 538c85e62f86..dae0466d19d6 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -407,6 +407,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) #define __pgprot_modify(prot,mask,bits) \ __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) +#define pgprot_nx(prot) \ + __pgprot_modify(prot, 0, PTE_PXN) + /* * Mark the prot value as uncacheable and unbufferable. */ @@ -457,6 +460,7 @@ extern pgd_t init_pg_dir[PTRS_PER_PGD]; extern pgd_t init_pg_end[]; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; +extern pgd_t idmap_pg_end[]; extern pgd_t tramp_pg_dir[PTRS_PER_PGD]; extern void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd); @@ -508,7 +512,7 @@ static inline void pte_unmap(pte_t *pte) { } #define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr)) #define pte_clear_fixmap() clear_fixmap(FIX_PTE) -#define pmd_page(pmd) pfn_to_page(__phys_to_pfn(__pmd_to_phys(pmd))) +#define pmd_page(pmd) phys_to_page(__pmd_to_phys(pmd)) /* use ONLY for statically allocated translation tables */ #define pte_offset_kimg(dir,addr) ((pte_t *)__phys_to_kimg(pte_offset_phys((dir), (addr)))) @@ -566,7 +570,7 @@ static inline phys_addr_t pud_page_paddr(pud_t pud) #define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr)) #define pmd_clear_fixmap() clear_fixmap(FIX_PMD) -#define pud_page(pud) pfn_to_page(__phys_to_pfn(__pud_to_phys(pud))) +#define pud_page(pud) phys_to_page(__pud_to_phys(pud)) /* use ONLY for statically allocated translation tables */ #define pmd_offset_kimg(dir,addr) ((pmd_t *)__phys_to_kimg(pmd_offset_phys((dir), (addr)))) @@ -624,7 +628,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) #define pud_set_fixmap_offset(pgd, addr) pud_set_fixmap(pud_offset_phys(pgd, addr)) #define pud_clear_fixmap() clear_fixmap(FIX_PUD) -#define pgd_page(pgd) pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) +#define pgd_page(pgd) phys_to_page(__pgd_to_phys(pgd)) /* use ONLY for statically allocated translation tables */ #define pud_offset_kimg(dir,addr) ((pud_t *)__phys_to_kimg(pud_offset_phys((dir), (addr)))) @@ -660,7 +664,7 @@ static inline phys_addr_t pgd_page_paddr(pgd_t pgd) static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | - PTE_PROT_NONE | PTE_VALID | PTE_WRITE; + PTE_PROT_NONE | PTE_VALID | PTE_WRITE | PTE_GP; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) pte = pte_mkdirty(pte); diff --git a/arch/arm64/include/asm/ptrace.h b/arch/arm64/include/asm/ptrace.h index bf57308fcd63..953b6a1ce549 100644 --- a/arch/arm64/include/asm/ptrace.h +++ b/arch/arm64/include/asm/ptrace.h @@ -35,6 +35,7 @@ #define GIC_PRIO_PSR_I_SET (1 << 4) /* Additional SPSR bits not exposed in the UABI */ +#define PSR_MODE_THREAD_BIT (1 << 0) #define PSR_IL_BIT (1 << 20) /* AArch32-specific ptrace requests */ diff --git a/arch/arm64/include/asm/scs.h b/arch/arm64/include/asm/scs.h new file mode 100644 index 000000000000..eaa2cd92e4c1 --- /dev/null +++ b/arch/arm64/include/asm/scs.h @@ -0,0 +1,29 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_SCS_H +#define _ASM_SCS_H + +#ifdef __ASSEMBLY__ + +#include <asm/asm-offsets.h> + +#ifdef CONFIG_SHADOW_CALL_STACK + scs_sp .req x18 + + .macro scs_load tsk, tmp + ldr scs_sp, [\tsk, #TSK_TI_SCS_SP] + .endm + + .macro scs_save tsk, tmp + str scs_sp, [\tsk, #TSK_TI_SCS_SP] + .endm +#else + .macro scs_load tsk, tmp + .endm + + .macro scs_save tsk, tmp + .endm +#endif /* CONFIG_SHADOW_CALL_STACK */ + +#endif /* __ASSEMBLY __ */ + +#endif /* _ASM_SCS_H */ diff --git a/arch/arm64/include/asm/smp.h b/arch/arm64/include/asm/smp.h index 40d5ba029615..ea268d88b6f7 100644 --- a/arch/arm64/include/asm/smp.h +++ b/arch/arm64/include/asm/smp.h @@ -23,14 +23,6 @@ #define CPU_STUCK_REASON_52_BIT_VA (UL(1) << CPU_STUCK_REASON_SHIFT) #define CPU_STUCK_REASON_NO_GRAN (UL(2) << CPU_STUCK_REASON_SHIFT) -/* Possible options for __cpu_setup */ -/* Option to setup primary cpu */ -#define ARM64_CPU_BOOT_PRIMARY (1) -/* Option to setup secondary cpus */ -#define ARM64_CPU_BOOT_SECONDARY (2) -/* Option to setup cpus for different cpu run time services */ -#define ARM64_CPU_RUNTIME (3) - #ifndef __ASSEMBLY__ #include <asm/percpu.h> @@ -96,9 +88,6 @@ asmlinkage void secondary_start_kernel(void); struct secondary_data { void *stack; struct task_struct *task; -#ifdef CONFIG_ARM64_PTR_AUTH - struct ptrauth_keys_kernel ptrauth_key; -#endif long status; }; diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 4d9b1f48dc39..5017b531a415 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -68,12 +68,10 @@ extern void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk); DECLARE_PER_CPU(unsigned long *, irq_stack_ptr); -static inline bool on_irq_stack(unsigned long sp, +static inline bool on_stack(unsigned long sp, unsigned long low, + unsigned long high, enum stack_type type, struct stack_info *info) { - unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); - unsigned long high = low + IRQ_STACK_SIZE; - if (!low) return false; @@ -83,12 +81,20 @@ static inline bool on_irq_stack(unsigned long sp, if (info) { info->low = low; info->high = high; - info->type = STACK_TYPE_IRQ; + info->type = type; } - return true; } +static inline bool on_irq_stack(unsigned long sp, + struct stack_info *info) +{ + unsigned long low = (unsigned long)raw_cpu_read(irq_stack_ptr); + unsigned long high = low + IRQ_STACK_SIZE; + + return on_stack(sp, low, high, STACK_TYPE_IRQ, info); +} + static inline bool on_task_stack(const struct task_struct *tsk, unsigned long sp, struct stack_info *info) @@ -96,16 +102,7 @@ static inline bool on_task_stack(const struct task_struct *tsk, unsigned long low = (unsigned long)task_stack_page(tsk); unsigned long high = low + THREAD_SIZE; - if (sp < low || sp >= high) - return false; - - if (info) { - info->low = low; - info->high = high; - info->type = STACK_TYPE_TASK; - } - - return true; + return on_stack(sp, low, high, STACK_TYPE_TASK, info); } #ifdef CONFIG_VMAP_STACK @@ -117,16 +114,7 @@ static inline bool on_overflow_stack(unsigned long sp, unsigned long low = (unsigned long)raw_cpu_ptr(overflow_stack); unsigned long high = low + OVERFLOW_STACK_SIZE; - if (sp < low || sp >= high) - return false; - - if (info) { - info->low = low; - info->high = high; - info->type = STACK_TYPE_OVERFLOW; - } - - return true; + return on_stack(sp, low, high, STACK_TYPE_OVERFLOW, info); } #else static inline bool on_overflow_stack(unsigned long sp, diff --git a/arch/arm64/include/asm/suspend.h b/arch/arm64/include/asm/suspend.h index 8939c87c4dce..0cde2f473971 100644 --- a/arch/arm64/include/asm/suspend.h +++ b/arch/arm64/include/asm/suspend.h @@ -2,7 +2,7 @@ #ifndef __ASM_SUSPEND_H #define __ASM_SUSPEND_H -#define NR_CTX_REGS 12 +#define NR_CTX_REGS 13 #define NR_CALLEE_SAVED_REGS 12 /* diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index c4ac0ac25a00..463175f80341 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -105,6 +105,10 @@ #define SYS_DC_CSW sys_insn(1, 0, 7, 10, 2) #define SYS_DC_CISW sys_insn(1, 0, 7, 14, 2) +/* + * System registers, organised loosely by encoding but grouped together + * where the architected name contains an index. e.g. ID_MMFR<n>_EL1. + */ #define SYS_OSDTRRX_EL1 sys_reg(2, 0, 0, 0, 2) #define SYS_MDCCINT_EL1 sys_reg(2, 0, 0, 2, 0) #define SYS_MDSCR_EL1 sys_reg(2, 0, 0, 2, 2) @@ -134,12 +138,16 @@ #define SYS_ID_PFR0_EL1 sys_reg(3, 0, 0, 1, 0) #define SYS_ID_PFR1_EL1 sys_reg(3, 0, 0, 1, 1) +#define SYS_ID_PFR2_EL1 sys_reg(3, 0, 0, 3, 4) #define SYS_ID_DFR0_EL1 sys_reg(3, 0, 0, 1, 2) +#define SYS_ID_DFR1_EL1 sys_reg(3, 0, 0, 3, 5) #define SYS_ID_AFR0_EL1 sys_reg(3, 0, 0, 1, 3) #define SYS_ID_MMFR0_EL1 sys_reg(3, 0, 0, 1, 4) #define SYS_ID_MMFR1_EL1 sys_reg(3, 0, 0, 1, 5) #define SYS_ID_MMFR2_EL1 sys_reg(3, 0, 0, 1, 6) #define SYS_ID_MMFR3_EL1 sys_reg(3, 0, 0, 1, 7) +#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) +#define SYS_ID_MMFR5_EL1 sys_reg(3, 0, 0, 3, 6) #define SYS_ID_ISAR0_EL1 sys_reg(3, 0, 0, 2, 0) #define SYS_ID_ISAR1_EL1 sys_reg(3, 0, 0, 2, 1) @@ -147,7 +155,6 @@ #define SYS_ID_ISAR3_EL1 sys_reg(3, 0, 0, 2, 3) #define SYS_ID_ISAR4_EL1 sys_reg(3, 0, 0, 2, 4) #define SYS_ID_ISAR5_EL1 sys_reg(3, 0, 0, 2, 5) -#define SYS_ID_MMFR4_EL1 sys_reg(3, 0, 0, 2, 6) #define SYS_ID_ISAR6_EL1 sys_reg(3, 0, 0, 2, 7) #define SYS_MVFR0_EL1 sys_reg(3, 0, 0, 3, 0) @@ -552,6 +559,8 @@ #endif /* SCTLR_EL1 specific flags. */ +#define SCTLR_EL1_BT1 (BIT(36)) +#define SCTLR_EL1_BT0 (BIT(35)) #define SCTLR_EL1_UCI (BIT(26)) #define SCTLR_EL1_E0E (BIT(24)) #define SCTLR_EL1_SPAN (BIT(23)) @@ -594,6 +603,7 @@ /* id_aa64isar0 */ #define ID_AA64ISAR0_RNDR_SHIFT 60 +#define ID_AA64ISAR0_TLB_SHIFT 56 #define ID_AA64ISAR0_TS_SHIFT 52 #define ID_AA64ISAR0_FHM_SHIFT 48 #define ID_AA64ISAR0_DP_SHIFT 44 @@ -637,6 +647,8 @@ #define ID_AA64PFR0_CSV2_SHIFT 56 #define ID_AA64PFR0_DIT_SHIFT 48 #define ID_AA64PFR0_AMU_SHIFT 44 +#define ID_AA64PFR0_MPAM_SHIFT 40 +#define ID_AA64PFR0_SEL2_SHIFT 36 #define ID_AA64PFR0_SVE_SHIFT 32 #define ID_AA64PFR0_RAS_SHIFT 28 #define ID_AA64PFR0_GIC_SHIFT 24 @@ -655,15 +667,21 @@ #define ID_AA64PFR0_ASIMD_NI 0xf #define ID_AA64PFR0_ASIMD_SUPPORTED 0x0 #define ID_AA64PFR0_EL1_64BIT_ONLY 0x1 +#define ID_AA64PFR0_EL1_32BIT_64BIT 0x2 #define ID_AA64PFR0_EL0_64BIT_ONLY 0x1 #define ID_AA64PFR0_EL0_32BIT_64BIT 0x2 /* id_aa64pfr1 */ +#define ID_AA64PFR1_MPAMFRAC_SHIFT 16 +#define ID_AA64PFR1_RASFRAC_SHIFT 12 +#define ID_AA64PFR1_MTE_SHIFT 8 #define ID_AA64PFR1_SSBS_SHIFT 4 +#define ID_AA64PFR1_BT_SHIFT 0 #define ID_AA64PFR1_SSBS_PSTATE_NI 0 #define ID_AA64PFR1_SSBS_PSTATE_ONLY 1 #define ID_AA64PFR1_SSBS_PSTATE_INSNS 2 +#define ID_AA64PFR1_BT_BTI 0x1 /* id_aa64zfr0 */ #define ID_AA64ZFR0_F64MM_SHIFT 56 @@ -688,6 +706,9 @@ #define ID_AA64ZFR0_SVEVER_SVE2 0x1 /* id_aa64mmfr0 */ +#define ID_AA64MMFR0_TGRAN4_2_SHIFT 40 +#define ID_AA64MMFR0_TGRAN64_2_SHIFT 36 +#define ID_AA64MMFR0_TGRAN16_2_SHIFT 32 #define ID_AA64MMFR0_TGRAN4_SHIFT 28 #define ID_AA64MMFR0_TGRAN64_SHIFT 24 #define ID_AA64MMFR0_TGRAN16_SHIFT 20 @@ -752,6 +773,25 @@ #define ID_DFR0_PERFMON_8_1 0x4 +#define ID_ISAR4_SWP_FRAC_SHIFT 28 +#define ID_ISAR4_PSR_M_SHIFT 24 +#define ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT 20 +#define ID_ISAR4_BARRIER_SHIFT 16 +#define ID_ISAR4_SMC_SHIFT 12 +#define ID_ISAR4_WRITEBACK_SHIFT 8 +#define ID_ISAR4_WITHSHIFTS_SHIFT 4 +#define ID_ISAR4_UNPRIV_SHIFT 0 + +#define ID_DFR1_MTPMU_SHIFT 0 + +#define ID_ISAR0_DIVIDE_SHIFT 24 +#define ID_ISAR0_DEBUG_SHIFT 20 +#define ID_ISAR0_COPROC_SHIFT 16 +#define ID_ISAR0_CMPBRANCH_SHIFT 12 +#define ID_ISAR0_BITFIELD_SHIFT 8 +#define ID_ISAR0_BITCOUNT_SHIFT 4 +#define ID_ISAR0_SWAP_SHIFT 0 + #define ID_ISAR5_RDM_SHIFT 24 #define ID_ISAR5_CRC32_SHIFT 16 #define ID_ISAR5_SHA2_SHIFT 12 @@ -767,6 +807,22 @@ #define ID_ISAR6_DP_SHIFT 4 #define ID_ISAR6_JSCVT_SHIFT 0 +#define ID_MMFR4_EVT_SHIFT 28 +#define ID_MMFR4_CCIDX_SHIFT 24 +#define ID_MMFR4_LSM_SHIFT 20 +#define ID_MMFR4_HPDS_SHIFT 16 +#define ID_MMFR4_CNP_SHIFT 12 +#define ID_MMFR4_XNX_SHIFT 8 +#define ID_MMFR4_SPECSEI_SHIFT 0 + +#define ID_MMFR5_ETS_SHIFT 0 + +#define ID_PFR0_DIT_SHIFT 24 +#define ID_PFR0_CSV2_SHIFT 16 + +#define ID_PFR2_SSBS_SHIFT 4 +#define ID_PFR2_CSV3_SHIFT 0 + #define MVFR0_FPROUND_SHIFT 28 #define MVFR0_FPSHVEC_SHIFT 24 #define MVFR0_FPSQRT_SHIFT 20 @@ -785,17 +841,14 @@ #define MVFR1_FPDNAN_SHIFT 4 #define MVFR1_FPFTZ_SHIFT 0 - -#define ID_AA64MMFR0_TGRAN4_SHIFT 28 -#define ID_AA64MMFR0_TGRAN64_SHIFT 24 -#define ID_AA64MMFR0_TGRAN16_SHIFT 20 - -#define ID_AA64MMFR0_TGRAN4_NI 0xf -#define ID_AA64MMFR0_TGRAN4_SUPPORTED 0x0 -#define ID_AA64MMFR0_TGRAN64_NI 0xf -#define ID_AA64MMFR0_TGRAN64_SUPPORTED 0x0 -#define ID_AA64MMFR0_TGRAN16_NI 0x0 -#define ID_AA64MMFR0_TGRAN16_SUPPORTED 0x1 +#define ID_PFR1_GIC_SHIFT 28 +#define ID_PFR1_VIRT_FRAC_SHIFT 24 +#define ID_PFR1_SEC_FRAC_SHIFT 20 +#define ID_PFR1_GENTIMER_SHIFT 16 +#define ID_PFR1_VIRTUALIZATION_SHIFT 12 +#define ID_PFR1_MPROGMOD_SHIFT 8 +#define ID_PFR1_SECURITY_SHIFT 4 +#define ID_PFR1_PROGMOD_SHIFT 0 #if defined(CONFIG_ARM64_4K_PAGES) #define ID_AA64MMFR0_TGRAN_SHIFT ID_AA64MMFR0_TGRAN4_SHIFT diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 512174a8e789..6ea8b6a26ae9 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -41,6 +41,10 @@ struct thread_info { #endif } preempt; }; +#ifdef CONFIG_SHADOW_CALL_STACK + void *scs_base; + void *scs_sp; +#endif }; #define thread_saved_pc(tsk) \ @@ -100,11 +104,20 @@ void arch_release_task_struct(struct task_struct *tsk); _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ _TIF_SYSCALL_EMU) +#ifdef CONFIG_SHADOW_CALL_STACK +#define INIT_SCS \ + .scs_base = init_shadow_call_stack, \ + .scs_sp = init_shadow_call_stack, +#else +#define INIT_SCS +#endif + #define INIT_THREAD_INFO(tsk) \ { \ .flags = _TIF_FOREIGN_FPSTATE, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ + INIT_SCS \ } #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 803039d504de..3b859596840d 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -38,7 +38,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 439 +#define __NR_compat_syscalls 440 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index c1c61635f89c..6d95d0c8bf2f 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -883,6 +883,8 @@ __SYSCALL(__NR_clone3, sys_clone3) __SYSCALL(__NR_openat2, sys_openat2) #define __NR_pidfd_getfd 438 __SYSCALL(__NR_pidfd_getfd, sys_pidfd_getfd) +#define __NR_faccessat2 439 +__SYSCALL(__NR_faccessat2, sys_faccessat2) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/asm/virt.h b/arch/arm64/include/asm/virt.h index 61fd26752adc..5051b388c654 100644 --- a/arch/arm64/include/asm/virt.h +++ b/arch/arm64/include/asm/virt.h @@ -85,7 +85,7 @@ static inline bool is_kernel_in_hyp_mode(void) static __always_inline bool has_vhe(void) { - if (cpus_have_const_cap(ARM64_HAS_VIRT_HOST_EXTN)) + if (cpus_have_final_cap(ARM64_HAS_VIRT_HOST_EXTN)) return true; return false; diff --git a/arch/arm64/include/asm/vmap_stack.h b/arch/arm64/include/asm/vmap_stack.h index 0a12115d9638..0cc6636e3f15 100644 --- a/arch/arm64/include/asm/vmap_stack.h +++ b/arch/arm64/include/asm/vmap_stack.h @@ -19,10 +19,8 @@ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node) { BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK)); - return __vmalloc_node_range(stack_size, THREAD_ALIGN, - VMALLOC_START, VMALLOC_END, - THREADINFO_GFP, PAGE_KERNEL, 0, node, - __builtin_return_address(0)); + return __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node, + __builtin_return_address(0)); } #endif /* __ASM_VMAP_STACK_H */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index 7752d93bb50f..2d6ba1c2592e 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -73,5 +73,6 @@ #define HWCAP2_BF16 (1 << 14) #define HWCAP2_DGH (1 << 15) #define HWCAP2_RNG (1 << 16) +#define HWCAP2_BTI (1 << 17) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/include/uapi/asm/mman.h b/arch/arm64/include/uapi/asm/mman.h new file mode 100644 index 000000000000..6fdd71eb644f --- /dev/null +++ b/arch/arm64/include/uapi/asm/mman.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__ASM_MMAN_H +#define _UAPI__ASM_MMAN_H + +#include <asm-generic/mman.h> + +#define PROT_BTI 0x10 /* BTI guarded page */ + +#endif /* ! _UAPI__ASM_MMAN_H */ diff --git a/arch/arm64/include/uapi/asm/ptrace.h b/arch/arm64/include/uapi/asm/ptrace.h index d1bb5b69f1ce..42cbe34d95ce 100644 --- a/arch/arm64/include/uapi/asm/ptrace.h +++ b/arch/arm64/include/uapi/asm/ptrace.h @@ -46,6 +46,7 @@ #define PSR_I_BIT 0x00000080 #define PSR_A_BIT 0x00000100 #define PSR_D_BIT 0x00000200 +#define PSR_BTYPE_MASK 0x00000c00 #define PSR_SSBS_BIT 0x00001000 #define PSR_PAN_BIT 0x00400000 #define PSR_UAO_BIT 0x00800000 @@ -55,6 +56,8 @@ #define PSR_Z_BIT 0x40000000 #define PSR_N_BIT 0x80000000 +#define PSR_BTYPE_SHIFT 10 + /* * Groups of PSR bits */ @@ -63,6 +66,12 @@ #define PSR_x 0x0000ff00 /* Extension */ #define PSR_c 0x000000ff /* Control */ +/* Convenience names for the values of PSTATE.BTYPE */ +#define PSR_BTYPE_NONE (0b00 << PSR_BTYPE_SHIFT) +#define PSR_BTYPE_JC (0b01 << PSR_BTYPE_SHIFT) +#define PSR_BTYPE_C (0b10 << PSR_BTYPE_SHIFT) +#define PSR_BTYPE_J (0b11 << PSR_BTYPE_SHIFT) + /* syscall emulation path in ptrace */ #define PTRACE_SYSEMU 31 #define PTRACE_SYSEMU_SINGLESTEP 32 diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 4e5b8ee31442..151f28521f1e 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -63,6 +63,7 @@ obj-$(CONFIG_CRASH_CORE) += crash_core.o obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o obj-$(CONFIG_ARM64_SSBD) += ssbd.o obj-$(CONFIG_ARM64_PTR_AUTH) += pointer_auth.o +obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o obj-y += vdso/ probes/ obj-$(CONFIG_COMPAT_VDSO) += vdso32/ diff --git a/arch/arm64/kernel/acpi.c b/arch/arm64/kernel/acpi.c index a100483b47c4..46ec402e97ed 100644 --- a/arch/arm64/kernel/acpi.c +++ b/arch/arm64/kernel/acpi.c @@ -19,6 +19,7 @@ #include <linux/init.h> #include <linux/irq.h> #include <linux/irqdomain.h> +#include <linux/irq_work.h> #include <linux/memblock.h> #include <linux/of_fdt.h> #include <linux/smp.h> @@ -269,6 +270,7 @@ pgprot_t __acpi_get_mem_attribute(phys_addr_t addr) int apei_claim_sea(struct pt_regs *regs) { int err = -ENOENT; + bool return_to_irqs_enabled; unsigned long current_flags; if (!IS_ENABLED(CONFIG_ACPI_APEI_GHES)) @@ -276,6 +278,12 @@ int apei_claim_sea(struct pt_regs *regs) current_flags = local_daif_save_flags(); + /* current_flags isn't useful here as daif doesn't tell us about pNMI */ + return_to_irqs_enabled = !irqs_disabled_flags(arch_local_save_flags()); + + if (regs) + return_to_irqs_enabled = interrupts_enabled(regs); + /* * SEA can interrupt SError, mask it and describe this as an NMI so * that APEI defers the handling. @@ -284,6 +292,23 @@ int apei_claim_sea(struct pt_regs *regs) nmi_enter(); err = ghes_notify_sea(); nmi_exit(); + + /* + * APEI NMI-like notifications are deferred to irq_work. Unless + * we interrupted irqs-masked code, we can do that now. + */ + if (!err) { + if (return_to_irqs_enabled) { + local_daif_restore(DAIF_PROCCTX_NOIRQ); + __irq_enter(); + irq_work_run(); + __irq_exit(); + } else { + pr_warn_ratelimited("APEI work queued but not completed"); + err = -EINPROGRESS; + } + } + local_daif_restore(current_flags); return err; diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 9981a0a5a87f..0577e2142284 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -34,6 +34,10 @@ int main(void) #ifdef CONFIG_ARM64_SW_TTBR0_PAN DEFINE(TSK_TI_TTBR0, offsetof(struct task_struct, thread_info.ttbr0)); #endif +#ifdef CONFIG_SHADOW_CALL_STACK + DEFINE(TSK_TI_SCS_BASE, offsetof(struct task_struct, thread_info.scs_base)); + DEFINE(TSK_TI_SCS_SP, offsetof(struct task_struct, thread_info.scs_sp)); +#endif DEFINE(TSK_STACK, offsetof(struct task_struct, stack)); #ifdef CONFIG_STACKPROTECTOR DEFINE(TSK_STACK_CANARY, offsetof(struct task_struct, stack_canary)); @@ -92,11 +96,8 @@ int main(void) BLANK(); DEFINE(CPU_BOOT_STACK, offsetof(struct secondary_data, stack)); DEFINE(CPU_BOOT_TASK, offsetof(struct secondary_data, task)); -#ifdef CONFIG_ARM64_PTR_AUTH - DEFINE(CPU_BOOT_PTRAUTH_KEY, offsetof(struct secondary_data, ptrauth_key)); -#endif BLANK(); -#ifdef CONFIG_KVM_ARM_HOST +#ifdef CONFIG_KVM DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); diff --git a/arch/arm64/kernel/cpu-reset.S b/arch/arm64/kernel/cpu-reset.S index 38087b4c0432..4a18055b2ff9 100644 --- a/arch/arm64/kernel/cpu-reset.S +++ b/arch/arm64/kernel/cpu-reset.S @@ -29,7 +29,7 @@ * branch to what would be the reset vector. It must be executed with the * flat identity mapping. */ -ENTRY(__cpu_soft_restart) +SYM_CODE_START(__cpu_soft_restart) /* Clear sctlr_el1 flags. */ mrs x12, sctlr_el1 mov_q x13, SCTLR_ELx_FLAGS @@ -47,6 +47,6 @@ ENTRY(__cpu_soft_restart) mov x1, x3 // arg1 mov x2, x4 // arg2 br x8 -ENDPROC(__cpu_soft_restart) +SYM_CODE_END(__cpu_soft_restart) .popsection diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index df56d2295d16..ad06d6802d2e 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -234,7 +234,7 @@ static int detect_harden_bp_fw(void) smccc_end = NULL; break; -#if IS_ENABLED(CONFIG_KVM_ARM_HOST) +#if IS_ENABLED(CONFIG_KVM) case SMCCC_CONDUIT_SMC: cb = call_smc_arch_workaround_1; smccc_start = __smccc_workaround_1_smc; @@ -635,7 +635,7 @@ has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, return is_midr_in_range(midr, &range) && has_dic; } -#if defined(CONFIG_HARDEN_EL2_VECTORS) || defined(CONFIG_ARM64_ERRATUM_1319367) +#if defined(CONFIG_HARDEN_EL2_VECTORS) static const struct midr_range ca57_a72[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), @@ -757,12 +757,16 @@ static const struct arm64_cpu_capabilities erratum_843419_list[] = { }; #endif -#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE -static const struct midr_range erratum_speculative_at_vhe_list[] = { +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT +static const struct midr_range erratum_speculative_at_list[] = { #ifdef CONFIG_ARM64_ERRATUM_1165522 /* Cortex A76 r0p0 to r2p0 */ MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 2, 0), #endif +#ifdef CONFIG_ARM64_ERRATUM_1319367 + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), +#endif #ifdef CONFIG_ARM64_ERRATUM_1530923 /* Cortex A55 r0p0 to r2p0 */ MIDR_RANGE(MIDR_CORTEX_A55, 0, 0, 2, 0), @@ -774,7 +778,7 @@ static const struct midr_range erratum_speculative_at_vhe_list[] = { const struct arm64_cpu_capabilities arm64_errata[] = { #ifdef CONFIG_ARM64_WORKAROUND_CLEAN_CACHE { - .desc = "ARM errata 826319, 827319, 824069, 819472", + .desc = "ARM errata 826319, 827319, 824069, or 819472", .capability = ARM64_WORKAROUND_CLEAN_CACHE, ERRATA_MIDR_RANGE_LIST(workaround_clean_cache), .cpu_enable = cpu_enable_cache_maint_trap, @@ -856,7 +860,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI { - .desc = "Qualcomm erratum 1009, ARM erratum 1286807", + .desc = "Qualcomm erratum 1009, or ARM erratum 1286807", .capability = ARM64_WORKAROUND_REPEAT_TLBI, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = cpucap_multi_entry_cap_matches, @@ -897,11 +901,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { ERRATA_MIDR_RANGE_LIST(erratum_1418040_list), }, #endif -#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT_VHE +#ifdef CONFIG_ARM64_WORKAROUND_SPECULATIVE_AT { - .desc = "ARM errata 1165522, 1530923", - .capability = ARM64_WORKAROUND_SPECULATIVE_AT_VHE, - ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_vhe_list), + .desc = "ARM errata 1165522, 1319367, or 1530923", + .capability = ARM64_WORKAROUND_SPECULATIVE_AT, + ERRATA_MIDR_RANGE_LIST(erratum_speculative_at_list), }, #endif #ifdef CONFIG_ARM64_ERRATUM_1463225 @@ -935,13 +939,6 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .cpu_enable = cpu_enable_trap_ctr_access, }, #endif -#ifdef CONFIG_ARM64_ERRATUM_1319367 - { - .desc = "ARM erratum 1319367", - .capability = ARM64_WORKAROUND_SPECULATIVE_AT_NVHE, - ERRATA_MIDR_RANGE_LIST(ca57_a72), - }, -#endif { } }; diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9fac745aa7bb..4ae41670c2e6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -3,6 +3,61 @@ * Contains CPU feature definitions * * Copyright (C) 2015 ARM Ltd. + * + * A note for the weary kernel hacker: the code here is confusing and hard to + * follow! That's partly because it's solving a nasty problem, but also because + * there's a little bit of over-abstraction that tends to obscure what's going + * on behind a maze of helper functions and macros. + * + * The basic problem is that hardware folks have started gluing together CPUs + * with distinct architectural features; in some cases even creating SoCs where + * user-visible instructions are available only on a subset of the available + * cores. We try to address this by snapshotting the feature registers of the + * boot CPU and comparing these with the feature registers of each secondary + * CPU when bringing them up. If there is a mismatch, then we update the + * snapshot state to indicate the lowest-common denominator of the feature, + * known as the "safe" value. This snapshot state can be queried to view the + * "sanitised" value of a feature register. + * + * The sanitised register values are used to decide which capabilities we + * have in the system. These may be in the form of traditional "hwcaps" + * advertised to userspace or internal "cpucaps" which are used to configure + * things like alternative patching and static keys. While a feature mismatch + * may result in a TAINT_CPU_OUT_OF_SPEC kernel taint, a capability mismatch + * may prevent a CPU from being onlined at all. + * + * Some implementation details worth remembering: + * + * - Mismatched features are *always* sanitised to a "safe" value, which + * usually indicates that the feature is not supported. + * + * - A mismatched feature marked with FTR_STRICT will cause a "SANITY CHECK" + * warning when onlining an offending CPU and the kernel will be tainted + * with TAINT_CPU_OUT_OF_SPEC. + * + * - Features marked as FTR_VISIBLE have their sanitised value visible to + * userspace. FTR_VISIBLE features in registers that are only visible + * to EL0 by trapping *must* have a corresponding HWCAP so that late + * onlining of CPUs cannot lead to features disappearing at runtime. + * + * - A "feature" is typically a 4-bit register field. A "capability" is the + * high-level description derived from the sanitised field value. + * + * - Read the Arm ARM (DDI 0487F.a) section D13.1.3 ("Principles of the ID + * scheme for fields in ID registers") to understand when feature fields + * may be signed or unsigned (FTR_SIGNED and FTR_UNSIGNED accordingly). + * + * - KVM exposes its own view of the feature registers to guest operating + * systems regardless of FTR_VISIBLE. This is typically driven from the + * sanitised register values to allow virtual CPUs to be migrated between + * arbitrary physical CPUs, but some features not present on the host are + * also advertised and emulated. Look at sys_reg_descs[] for the gory + * details. + * + * - If the arm64_ftr_bits[] for a register has a missing field, then this + * field is treated as STRICT RES0, including for read_sanitised_ftr_reg(). + * This is stronger than FTR_HIDDEN and can be used to hide features from + * KVM guests. */ #define pr_fmt(fmt) "CPU features: " fmt @@ -124,6 +179,7 @@ static bool __system_matches_cap(unsigned int n); */ static const struct arm64_ftr_bits ftr_id_aa64isar0[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_RNDR_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TLB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_TS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_FHM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR0_DP_SHIFT, 4, 0), @@ -166,22 +222,27 @@ static const struct arm64_ftr_bits ftr_id_aa64pfr0[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_CSV2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_DIT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_AMU_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_MPAM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SEL2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_SVE), FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_SVE_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_RAS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_GIC_SHIFT, 4, 0), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_ASIMD_SHIFT, 4, ID_AA64PFR0_ASIMD_NI), S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI), - /* Linux doesn't care about the EL3 */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY), ARM64_FTR_END, }; static const struct arm64_ftr_bits ftr_id_aa64pfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_MPAMFRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_RASFRAC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_SSBS_SHIFT, 4, ID_AA64PFR1_SSBS_PSTATE_NI), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_BTI), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR1_BT_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -209,6 +270,24 @@ static const struct arm64_ftr_bits ftr_id_aa64zfr0[] = { static const struct arm64_ftr_bits ftr_id_aa64mmfr0[] = { /* + * Page size not being supported at Stage-2 is not fatal. You + * just give up KVM if PAGE_SIZE isn't supported there. Go fix + * your favourite nesting hypervisor. + * + * There is a small corner case where the hypervisor explicitly + * advertises a given granule size at Stage-2 (value 2) on some + * vCPUs, and uses the fallback to Stage-1 (value 0) for other + * vCPUs. Although this is not forbidden by the architecture, it + * indicates that the hypervisor is being silly (or buggy). + * + * We make no effort to cope with this and pretend that if these + * fields are inconsistent across vCPUs, then it isn't worth + * trying to bring KVM up. + */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN4_2_SHIFT, 4, 1), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN64_2_SHIFT, 4, 1), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64MMFR0_TGRAN16_2_SHIFT, 4, 1), + /* * We already refuse to boot CPUs that don't support our configured * page size, so we can only detect mismatches for a page size other * than the one we're currently using. Unfortunately, SoCs like this @@ -247,7 +326,7 @@ static const struct arm64_ftr_bits ftr_id_aa64mmfr2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_FWB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_AT_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LVA_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_IESB_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_LSM_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_UAO_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64MMFR2_CNP_SHIFT, 4, 0), @@ -289,7 +368,7 @@ static const struct arm64_ftr_bits ftr_id_mmfr0[] = { }; static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, 36, 28, 0), + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 36, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64DFR0_PMSVER_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_CTX_CMPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64DFR0_WRPS_SHIFT, 4, 0), @@ -316,6 +395,16 @@ static const struct arm64_ftr_bits ftr_dczid[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_isar0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DIVIDE_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_DEBUG_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_COPROC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_CMPBRANCH_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITFIELD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_BITCOUNT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR0_SWAP_SHIFT, 4, 0), + ARM64_FTR_END, +}; static const struct arm64_ftr_bits ftr_id_isar5[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR5_RDM_SHIFT, 4, 0), @@ -328,7 +417,37 @@ static const struct arm64_ftr_bits ftr_id_isar5[] = { }; static const struct arm64_ftr_bits ftr_id_mmfr4[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_EVT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CCIDX_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_LSM_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_HPDS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_CNP_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR4_XNX_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* ac2 */ + /* + * SpecSEI = 1 indicates that the PE might generate an SError on an + * external abort on speculative read. It is safe to assume that an + * SError might be generated than it will not be. Hence it has been + * classified as FTR_HIGHER_SAFE. + */ + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_MMFR4_SPECSEI_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static const struct arm64_ftr_bits ftr_id_isar4[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SWP_FRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_PSR_M_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SYNCH_PRIM_FRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_BARRIER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_SMC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WRITEBACK_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_WITHSHIFTS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_ISAR4_UNPRIV_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static const struct arm64_ftr_bits ftr_id_mmfr5[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_MMFR5_ETS_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -344,6 +463,8 @@ static const struct arm64_ftr_bits ftr_id_isar6[] = { }; static const struct arm64_ftr_bits ftr_id_pfr0[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR0_DIT_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR0_CSV2_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 12, 4, 0), /* State3 */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 8, 4, 0), /* State2 */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 4, 4, 0), /* State1 */ @@ -351,8 +472,26 @@ static const struct arm64_ftr_bits ftr_id_pfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_pfr1[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GIC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRT_FRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SEC_FRAC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_GENTIMER_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_VIRTUALIZATION_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_MPROGMOD_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_SECURITY_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR1_PROGMOD_SHIFT, 4, 0), + ARM64_FTR_END, +}; + +static const struct arm64_ftr_bits ftr_id_pfr2[] = { + ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_PFR2_SSBS_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_PFR2_CSV3_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_id_dfr0[] = { - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), + /* [31:28] TraceFilt */ S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 24, 4, 0xf), /* PerfMon */ ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 20, 4, 0), ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 16, 4, 0), @@ -363,6 +502,11 @@ static const struct arm64_ftr_bits ftr_id_dfr0[] = { ARM64_FTR_END, }; +static const struct arm64_ftr_bits ftr_id_dfr1[] = { + S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, ID_DFR1_MTPMU_SHIFT, 4, 0), + ARM64_FTR_END, +}; + static const struct arm64_ftr_bits ftr_zcr[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ZCR_ELx_LEN_SHIFT, ZCR_ELx_LEN_SIZE, 0), /* LEN */ @@ -373,7 +517,7 @@ static const struct arm64_ftr_bits ftr_zcr[] = { * Common ftr bits for a 32bit register with all hidden, strict * attributes, with 4bit feature fields and a default safe value of * 0. Covers the following 32bit registers: - * id_isar[0-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] + * id_isar[1-4], id_mmfr[1-3], id_pfr1, mvfr[0-1] */ static const struct arm64_ftr_bits ftr_generic_32bits[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_LOWER_SAFE, 28, 4, 0), @@ -411,7 +555,7 @@ static const struct __ftr_reg_entry { /* Op1 = 0, CRn = 0, CRm = 1 */ ARM64_FTR_REG(SYS_ID_PFR0_EL1, ftr_id_pfr0), - ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_PFR1_EL1, ftr_id_pfr1), ARM64_FTR_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0), ARM64_FTR_REG(SYS_ID_MMFR0_EL1, ftr_id_mmfr0), ARM64_FTR_REG(SYS_ID_MMFR1_EL1, ftr_generic_32bits), @@ -419,11 +563,11 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_ID_MMFR3_EL1, ftr_generic_32bits), /* Op1 = 0, CRn = 0, CRm = 2 */ - ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR0_EL1, ftr_id_isar0), ARM64_FTR_REG(SYS_ID_ISAR1_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_ISAR2_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_ID_ISAR3_EL1, ftr_generic_32bits), - ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_generic_32bits), + ARM64_FTR_REG(SYS_ID_ISAR4_EL1, ftr_id_isar4), ARM64_FTR_REG(SYS_ID_ISAR5_EL1, ftr_id_isar5), ARM64_FTR_REG(SYS_ID_MMFR4_EL1, ftr_id_mmfr4), ARM64_FTR_REG(SYS_ID_ISAR6_EL1, ftr_id_isar6), @@ -432,6 +576,9 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG(SYS_MVFR0_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_MVFR1_EL1, ftr_generic_32bits), ARM64_FTR_REG(SYS_MVFR2_EL1, ftr_mvfr2), + ARM64_FTR_REG(SYS_ID_PFR2_EL1, ftr_id_pfr2), + ARM64_FTR_REG(SYS_ID_DFR1_EL1, ftr_id_dfr1), + ARM64_FTR_REG(SYS_ID_MMFR5_EL1, ftr_id_mmfr5), /* Op1 = 0, CRn = 0, CRm = 4 */ ARM64_FTR_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0), @@ -468,16 +615,16 @@ static int search_cmp_ftr_reg(const void *id, const void *regp) } /* - * get_arm64_ftr_reg - Lookup a feature register entry using its - * sys_reg() encoding. With the array arm64_ftr_regs sorted in the - * ascending order of sys_id , we use binary search to find a matching + * get_arm64_ftr_reg_nowarn - Looks up a feature register entry using + * its sys_reg() encoding. With the array arm64_ftr_regs sorted in the + * ascending order of sys_id, we use binary search to find a matching * entry. * * returns - Upon success, matching ftr_reg entry for id. * - NULL on failure. It is upto the caller to decide * the impact of a failure. */ -static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) +static struct arm64_ftr_reg *get_arm64_ftr_reg_nowarn(u32 sys_id) { const struct __ftr_reg_entry *ret; @@ -491,6 +638,27 @@ static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) return NULL; } +/* + * get_arm64_ftr_reg - Looks up a feature register entry using + * its sys_reg() encoding. This calls get_arm64_ftr_reg_nowarn(). + * + * returns - Upon success, matching ftr_reg entry for id. + * - NULL on failure but with an WARN_ON(). + */ +static struct arm64_ftr_reg *get_arm64_ftr_reg(u32 sys_id) +{ + struct arm64_ftr_reg *reg; + + reg = get_arm64_ftr_reg_nowarn(sys_id); + + /* + * Requesting a non-existent register search is an error. Warn + * and let the caller handle it. + */ + WARN_ON(!reg); + return reg; +} + static u64 arm64_ftr_set_value(const struct arm64_ftr_bits *ftrp, s64 reg, s64 ftr_val) { @@ -552,7 +720,8 @@ static void __init init_cpu_ftr_reg(u32 sys_reg, u64 new) const struct arm64_ftr_bits *ftrp; struct arm64_ftr_reg *reg = get_arm64_ftr_reg(sys_reg); - BUG_ON(!reg); + if (!reg) + return; for (ftrp = reg->ftr_bits; ftrp->width; ftrp++) { u64 ftr_mask = arm64_ftr_mask(ftrp); @@ -625,6 +794,7 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { init_cpu_ftr_reg(SYS_ID_DFR0_EL1, info->reg_id_dfr0); + init_cpu_ftr_reg(SYS_ID_DFR1_EL1, info->reg_id_dfr1); init_cpu_ftr_reg(SYS_ID_ISAR0_EL1, info->reg_id_isar0); init_cpu_ftr_reg(SYS_ID_ISAR1_EL1, info->reg_id_isar1); init_cpu_ftr_reg(SYS_ID_ISAR2_EL1, info->reg_id_isar2); @@ -636,8 +806,11 @@ void __init init_cpu_features(struct cpuinfo_arm64 *info) init_cpu_ftr_reg(SYS_ID_MMFR1_EL1, info->reg_id_mmfr1); init_cpu_ftr_reg(SYS_ID_MMFR2_EL1, info->reg_id_mmfr2); init_cpu_ftr_reg(SYS_ID_MMFR3_EL1, info->reg_id_mmfr3); + init_cpu_ftr_reg(SYS_ID_MMFR4_EL1, info->reg_id_mmfr4); + init_cpu_ftr_reg(SYS_ID_MMFR5_EL1, info->reg_id_mmfr5); init_cpu_ftr_reg(SYS_ID_PFR0_EL1, info->reg_id_pfr0); init_cpu_ftr_reg(SYS_ID_PFR1_EL1, info->reg_id_pfr1); + init_cpu_ftr_reg(SYS_ID_PFR2_EL1, info->reg_id_pfr2); init_cpu_ftr_reg(SYS_MVFR0_EL1, info->reg_mvfr0); init_cpu_ftr_reg(SYS_MVFR1_EL1, info->reg_mvfr1); init_cpu_ftr_reg(SYS_MVFR2_EL1, info->reg_mvfr2); @@ -682,7 +855,9 @@ static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot) { struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id); - BUG_ON(!regp); + if (!regp) + return 0; + update_cpu_ftr_reg(regp, val); if ((boot & regp->strict_mask) == (val & regp->strict_mask)) return 0; @@ -691,6 +866,104 @@ static int check_update_ftr_reg(u32 sys_id, int cpu, u64 val, u64 boot) return 1; } +static void relax_cpu_ftr_reg(u32 sys_id, int field) +{ + const struct arm64_ftr_bits *ftrp; + struct arm64_ftr_reg *regp = get_arm64_ftr_reg(sys_id); + + if (!regp) + return; + + for (ftrp = regp->ftr_bits; ftrp->width; ftrp++) { + if (ftrp->shift == field) { + regp->strict_mask &= ~arm64_ftr_mask(ftrp); + break; + } + } + + /* Bogus field? */ + WARN_ON(!ftrp->width); +} + +static int update_32bit_cpu_features(int cpu, struct cpuinfo_arm64 *info, + struct cpuinfo_arm64 *boot) +{ + int taint = 0; + u64 pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); + + /* + * If we don't have AArch32 at all then skip the checks entirely + * as the register values may be UNKNOWN and we're not going to be + * using them for anything. + */ + if (!id_aa64pfr0_32bit_el0(pfr0)) + return taint; + + /* + * If we don't have AArch32 at EL1, then relax the strictness of + * EL1-dependent register fields to avoid spurious sanity check fails. + */ + if (!id_aa64pfr0_32bit_el1(pfr0)) { + relax_cpu_ftr_reg(SYS_ID_ISAR4_EL1, ID_ISAR4_SMC_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRT_FRAC_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SEC_FRAC_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_VIRTUALIZATION_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_SECURITY_SHIFT); + relax_cpu_ftr_reg(SYS_ID_PFR1_EL1, ID_PFR1_PROGMOD_SHIFT); + } + + taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, + info->reg_id_dfr0, boot->reg_id_dfr0); + taint |= check_update_ftr_reg(SYS_ID_DFR1_EL1, cpu, + info->reg_id_dfr1, boot->reg_id_dfr1); + taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, + info->reg_id_isar0, boot->reg_id_isar0); + taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, + info->reg_id_isar1, boot->reg_id_isar1); + taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, + info->reg_id_isar2, boot->reg_id_isar2); + taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, + info->reg_id_isar3, boot->reg_id_isar3); + taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, + info->reg_id_isar4, boot->reg_id_isar4); + taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, + info->reg_id_isar5, boot->reg_id_isar5); + taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu, + info->reg_id_isar6, boot->reg_id_isar6); + + /* + * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and + * ACTLR formats could differ across CPUs and therefore would have to + * be trapped for virtualization anyway. + */ + taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, + info->reg_id_mmfr0, boot->reg_id_mmfr0); + taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, + info->reg_id_mmfr1, boot->reg_id_mmfr1); + taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, + info->reg_id_mmfr2, boot->reg_id_mmfr2); + taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, + info->reg_id_mmfr3, boot->reg_id_mmfr3); + taint |= check_update_ftr_reg(SYS_ID_MMFR4_EL1, cpu, + info->reg_id_mmfr4, boot->reg_id_mmfr4); + taint |= check_update_ftr_reg(SYS_ID_MMFR5_EL1, cpu, + info->reg_id_mmfr5, boot->reg_id_mmfr5); + taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, + info->reg_id_pfr0, boot->reg_id_pfr0); + taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, + info->reg_id_pfr1, boot->reg_id_pfr1); + taint |= check_update_ftr_reg(SYS_ID_PFR2_EL1, cpu, + info->reg_id_pfr2, boot->reg_id_pfr2); + taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, + info->reg_mvfr0, boot->reg_mvfr0); + taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, + info->reg_mvfr1, boot->reg_mvfr1); + taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, + info->reg_mvfr2, boot->reg_mvfr2); + + return taint; +} + /* * Update system wide CPU feature registers with the values from a * non-boot CPU. Also performs SANITY checks to make sure that there @@ -753,9 +1026,6 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64MMFR2_EL1, cpu, info->reg_id_aa64mmfr2, boot->reg_id_aa64mmfr2); - /* - * EL3 is not our concern. - */ taint |= check_update_ftr_reg(SYS_ID_AA64PFR0_EL1, cpu, info->reg_id_aa64pfr0, boot->reg_id_aa64pfr0); taint |= check_update_ftr_reg(SYS_ID_AA64PFR1_EL1, cpu, @@ -764,55 +1034,6 @@ void update_cpu_features(int cpu, taint |= check_update_ftr_reg(SYS_ID_AA64ZFR0_EL1, cpu, info->reg_id_aa64zfr0, boot->reg_id_aa64zfr0); - /* - * If we have AArch32, we care about 32-bit features for compat. - * If the system doesn't support AArch32, don't update them. - */ - if (id_aa64pfr0_32bit_el0(read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1)) && - id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { - - taint |= check_update_ftr_reg(SYS_ID_DFR0_EL1, cpu, - info->reg_id_dfr0, boot->reg_id_dfr0); - taint |= check_update_ftr_reg(SYS_ID_ISAR0_EL1, cpu, - info->reg_id_isar0, boot->reg_id_isar0); - taint |= check_update_ftr_reg(SYS_ID_ISAR1_EL1, cpu, - info->reg_id_isar1, boot->reg_id_isar1); - taint |= check_update_ftr_reg(SYS_ID_ISAR2_EL1, cpu, - info->reg_id_isar2, boot->reg_id_isar2); - taint |= check_update_ftr_reg(SYS_ID_ISAR3_EL1, cpu, - info->reg_id_isar3, boot->reg_id_isar3); - taint |= check_update_ftr_reg(SYS_ID_ISAR4_EL1, cpu, - info->reg_id_isar4, boot->reg_id_isar4); - taint |= check_update_ftr_reg(SYS_ID_ISAR5_EL1, cpu, - info->reg_id_isar5, boot->reg_id_isar5); - taint |= check_update_ftr_reg(SYS_ID_ISAR6_EL1, cpu, - info->reg_id_isar6, boot->reg_id_isar6); - - /* - * Regardless of the value of the AuxReg field, the AIFSR, ADFSR, and - * ACTLR formats could differ across CPUs and therefore would have to - * be trapped for virtualization anyway. - */ - taint |= check_update_ftr_reg(SYS_ID_MMFR0_EL1, cpu, - info->reg_id_mmfr0, boot->reg_id_mmfr0); - taint |= check_update_ftr_reg(SYS_ID_MMFR1_EL1, cpu, - info->reg_id_mmfr1, boot->reg_id_mmfr1); - taint |= check_update_ftr_reg(SYS_ID_MMFR2_EL1, cpu, - info->reg_id_mmfr2, boot->reg_id_mmfr2); - taint |= check_update_ftr_reg(SYS_ID_MMFR3_EL1, cpu, - info->reg_id_mmfr3, boot->reg_id_mmfr3); - taint |= check_update_ftr_reg(SYS_ID_PFR0_EL1, cpu, - info->reg_id_pfr0, boot->reg_id_pfr0); - taint |= check_update_ftr_reg(SYS_ID_PFR1_EL1, cpu, - info->reg_id_pfr1, boot->reg_id_pfr1); - taint |= check_update_ftr_reg(SYS_MVFR0_EL1, cpu, - info->reg_mvfr0, boot->reg_mvfr0); - taint |= check_update_ftr_reg(SYS_MVFR1_EL1, cpu, - info->reg_mvfr1, boot->reg_mvfr1); - taint |= check_update_ftr_reg(SYS_MVFR2_EL1, cpu, - info->reg_mvfr2, boot->reg_mvfr2); - } - if (id_aa64pfr0_sve(info->reg_id_aa64pfr0)) { taint |= check_update_ftr_reg(SYS_ZCR_EL1, cpu, info->reg_zcr, boot->reg_zcr); @@ -824,6 +1045,12 @@ void update_cpu_features(int cpu, } /* + * This relies on a sanitised view of the AArch64 ID registers + * (e.g. SYS_ID_AA64PFR0_EL1), so we call it last. + */ + taint |= update_32bit_cpu_features(cpu, info, boot); + + /* * Mismatched CPU features are a recipe for disaster. Don't even * pretend to support them. */ @@ -837,8 +1064,8 @@ u64 read_sanitised_ftr_reg(u32 id) { struct arm64_ftr_reg *regp = get_arm64_ftr_reg(id); - /* We shouldn't get a request for an unsupported register */ - BUG_ON(!regp); + if (!regp) + return 0; return regp->sys_val; } @@ -854,11 +1081,15 @@ static u64 __read_sysreg_by_encoding(u32 sys_id) switch (sys_id) { read_sysreg_case(SYS_ID_PFR0_EL1); read_sysreg_case(SYS_ID_PFR1_EL1); + read_sysreg_case(SYS_ID_PFR2_EL1); read_sysreg_case(SYS_ID_DFR0_EL1); + read_sysreg_case(SYS_ID_DFR1_EL1); read_sysreg_case(SYS_ID_MMFR0_EL1); read_sysreg_case(SYS_ID_MMFR1_EL1); read_sysreg_case(SYS_ID_MMFR2_EL1); read_sysreg_case(SYS_ID_MMFR3_EL1); + read_sysreg_case(SYS_ID_MMFR4_EL1); + read_sysreg_case(SYS_ID_MMFR5_EL1); read_sysreg_case(SYS_ID_ISAR0_EL1); read_sysreg_case(SYS_ID_ISAR1_EL1); read_sysreg_case(SYS_ID_ISAR2_EL1); @@ -1409,6 +1640,21 @@ static bool can_use_gic_priorities(const struct arm64_cpu_capabilities *entry, } #endif +#ifdef CONFIG_ARM64_BTI +static void bti_enable(const struct arm64_cpu_capabilities *__unused) +{ + /* + * Use of X16/X17 for tail-calls and trampolines that jump to + * function entry points using BR is a requirement for + * marking binaries with GNU_PROPERTY_AARCH64_FEATURE_1_BTI. + * So, be strict and forbid other BRs using other registers to + * jump onto a PACIxSP instruction: + */ + sysreg_clear_set(sctlr_el1, 0, SCTLR_EL1_BT0 | SCTLR_EL1_BT1); + isb(); +} +#endif /* CONFIG_ARM64_BTI */ + /* Internal helper functions to match cpu capability type */ static bool cpucap_late_cpu_optional(const struct arm64_cpu_capabilities *cap) @@ -1511,6 +1757,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64PFR0_EL0_SHIFT, .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT, }, +#ifdef CONFIG_KVM + { + .desc = "32-bit EL1 Support", + .capability = ARM64_HAS_32BIT_EL1, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .matches = has_cpuid_feature, + .sys_reg = SYS_ID_AA64PFR0_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64PFR0_EL1_SHIFT, + .min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT, + }, +#endif { .desc = "Kernel page table isolation (KPTI)", .capability = ARM64_UNMAP_KERNEL_AT_EL0, @@ -1779,6 +2037,23 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .min_field_value = 1, }, #endif +#ifdef CONFIG_ARM64_BTI + { + .desc = "Branch Target Identification", + .capability = ARM64_BTI, +#ifdef CONFIG_ARM64_BTI_KERNEL + .type = ARM64_CPUCAP_STRICT_BOOT_CPU_FEATURE, +#else + .type = ARM64_CPUCAP_SYSTEM_FEATURE, +#endif + .matches = has_cpuid_feature, + .cpu_enable = bti_enable, + .sys_reg = SYS_ID_AA64PFR1_EL1, + .field_pos = ID_AA64PFR1_BT_SHIFT, + .min_field_value = ID_AA64PFR1_BT_BTI, + .sign = FTR_UNSIGNED, + }, +#endif {}, }; @@ -1888,6 +2163,9 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), +#ifdef CONFIG_ARM64_BTI + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), +#endif #ifdef CONFIG_ARM64_PTR_AUTH HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), @@ -2181,6 +2459,36 @@ static void verify_sve_features(void) /* Add checks on other ZCR bits here if necessary */ } +static void verify_hyp_capabilities(void) +{ + u64 safe_mmfr1, mmfr0, mmfr1; + int parange, ipa_max; + unsigned int safe_vmid_bits, vmid_bits; + + if (!IS_ENABLED(CONFIG_KVM) || !IS_ENABLED(CONFIG_KVM_ARM_HOST)) + return; + + safe_mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); + mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); + mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + + /* Verify VMID bits */ + safe_vmid_bits = get_vmid_bits(safe_mmfr1); + vmid_bits = get_vmid_bits(mmfr1); + if (vmid_bits < safe_vmid_bits) { + pr_crit("CPU%d: VMID width mismatch\n", smp_processor_id()); + cpu_die_early(); + } + + /* Verify IPA range */ + parange = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_PARANGE_SHIFT); + ipa_max = id_aa64mmfr0_parange_to_phys_shift(parange); + if (ipa_max < get_kvm_ipa_limit()) { + pr_crit("CPU%d: IPA range mismatch\n", smp_processor_id()); + cpu_die_early(); + } +} /* * Run through the enabled system capabilities and enable() it on this CPU. @@ -2206,6 +2514,9 @@ static void verify_local_cpu_capabilities(void) if (system_supports_sve()) verify_sve_features(); + + if (is_hyp_mode_available()) + verify_hyp_capabilities(); } void check_local_cpu_capabilities(void) @@ -2394,7 +2705,7 @@ static int emulate_sys_reg(u32 id, u64 *valp) if (sys_reg_CRm(id) == 0) return emulate_id_reg(id, valp); - regp = get_arm64_ftr_reg(id); + regp = get_arm64_ftr_reg_nowarn(id); if (regp) *valp = arm64_ftr_reg_user_value(regp); else diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 86136075ae41..86637466daa8 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -92,6 +92,7 @@ static const char *const hwcap_str[] = { "bf16", "dgh", "rng", + "bti", NULL }; @@ -311,6 +312,8 @@ static int __init cpuinfo_regs_init(void) } return 0; } +device_initcall(cpuinfo_regs_init); + static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) { unsigned int cpu = smp_processor_id(); @@ -362,6 +365,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) /* Update the 32bit ID registers only if AArch32 is implemented */ if (id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0)) { info->reg_id_dfr0 = read_cpuid(ID_DFR0_EL1); + info->reg_id_dfr1 = read_cpuid(ID_DFR1_EL1); info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); @@ -373,8 +377,11 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); + info->reg_id_mmfr4 = read_cpuid(ID_MMFR4_EL1); + info->reg_id_mmfr5 = read_cpuid(ID_MMFR5_EL1); info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + info->reg_id_pfr2 = read_cpuid(ID_PFR2_EL1); info->reg_mvfr0 = read_cpuid(MVFR0_EL1); info->reg_mvfr1 = read_cpuid(MVFR1_EL1); @@ -403,5 +410,3 @@ void __init cpuinfo_store_boot_cpu(void) boot_cpu_data = *info; init_cpu_features(&boot_cpu_data); } - -device_initcall(cpuinfo_regs_init); diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c index ca4c3e12d8c5..1f646b07e3e9 100644 --- a/arch/arm64/kernel/crash_core.c +++ b/arch/arm64/kernel/crash_core.c @@ -5,6 +5,7 @@ */ #include <linux/crash_core.h> +#include <asm/cpufeature.h> #include <asm/memory.h> void arch_crash_save_vmcoreinfo(void) @@ -16,4 +17,7 @@ void arch_crash_save_vmcoreinfo(void) vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", PHYS_OFFSET); vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset()); + vmcoreinfo_append_str("NUMBER(KERNELPACMASK)=0x%llx\n", + system_supports_address_auth() ? + ptrauth_kernel_pac_mask() : 0); } diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 48222a4760c2..15e80c876d46 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -376,15 +376,13 @@ int aarch32_break_handler(struct pt_regs *regs) } NOKPROBE_SYMBOL(aarch32_break_handler); -static int __init debug_traps_init(void) +void __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_TRACE, "single-step handler"); hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, TRAP_BRKPT, "ptrace BRK handler"); - return 0; } -arch_initcall(debug_traps_init); /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S index 1a03618df0df..0073b24b5d25 100644 --- a/arch/arm64/kernel/efi-entry.S +++ b/arch/arm64/kernel/efi-entry.S @@ -14,12 +14,12 @@ SYM_CODE_START(efi_enter_kernel) /* - * efi_entry() will have copied the kernel image if necessary and we + * efi_pe_entry() will have copied the kernel image if necessary and we * end up here with device tree address in x1 and the kernel entry * point stored in x0. Save those values in registers which are * callee preserved. */ - ldr w2, =stext_offset + ldr w2, =primary_entry_offset add x19, x0, x2 // relocated Image entrypoint mov x20, x1 // DTB address diff --git a/arch/arm64/kernel/efi-header.S b/arch/arm64/kernel/efi-header.S index 914999ccaf8a..df67c0f2a077 100644 --- a/arch/arm64/kernel/efi-header.S +++ b/arch/arm64/kernel/efi-header.S @@ -27,12 +27,12 @@ optional_header: .long __initdata_begin - efi_header_end // SizeOfCode .long __pecoff_data_size // SizeOfInitializedData .long 0 // SizeOfUninitializedData - .long __efistub_efi_entry - _head // AddressOfEntryPoint + .long __efistub_efi_pe_entry - _head // AddressOfEntryPoint .long efi_header_end - _head // BaseOfCode extra_header_fields: .quad 0 // ImageBase - .long SZ_4K // SectionAlignment + .long SEGMENT_ALIGN // SectionAlignment .long PECOFF_FILE_ALIGNMENT // FileAlignment .short 0 // MajorOperatingSystemVersion .short 0 // MinorOperatingSystemVersion diff --git a/arch/arm64/kernel/efi-rt-wrapper.S b/arch/arm64/kernel/efi-rt-wrapper.S index 3fc71106cb2b..75691a2641c1 100644 --- a/arch/arm64/kernel/efi-rt-wrapper.S +++ b/arch/arm64/kernel/efi-rt-wrapper.S @@ -5,7 +5,7 @@ #include <linux/linkage.h> -ENTRY(__efi_rt_asm_wrapper) +SYM_FUNC_START(__efi_rt_asm_wrapper) stp x29, x30, [sp, #-32]! mov x29, sp @@ -34,5 +34,14 @@ ENTRY(__efi_rt_asm_wrapper) ldp x29, x30, [sp], #32 b.ne 0f ret -0: b efi_handle_corrupted_x18 // tail call -ENDPROC(__efi_rt_asm_wrapper) +0: + /* + * With CONFIG_SHADOW_CALL_STACK, the kernel uses x18 to store a + * shadow stack pointer, which we need to restore before returning to + * potentially instrumented code. This is safe because the wrapper is + * called with preemption disabled and a separate shadow stack is used + * for interrupts. + */ + mov x18, x2 + b efi_handle_corrupted_x18 // tail call +SYM_FUNC_END(__efi_rt_asm_wrapper) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index c839b5bf1904..3dbdf9752b11 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -94,7 +94,7 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) break; default: el1_inv(regs, esr); - }; + } } NOKPROBE_SYMBOL(el1_sync_handler); @@ -188,6 +188,14 @@ static void notrace el0_undef(struct pt_regs *regs) } NOKPROBE_SYMBOL(el0_undef); +static void notrace el0_bti(struct pt_regs *regs) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_bti(regs); +} +NOKPROBE_SYMBOL(el0_bti); + static void notrace el0_inv(struct pt_regs *regs, unsigned long esr) { user_exit_irqoff(); @@ -255,6 +263,9 @@ asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) case ESR_ELx_EC_UNKNOWN: el0_undef(regs); break; + case ESR_ELx_EC_BTI: + el0_bti(regs); + break; case ESR_ELx_EC_BREAKPT_LOW: case ESR_ELx_EC_SOFTSTP_LOW: case ESR_ELx_EC_WATCHPT_LOW: diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 0f24eae8f3cc..f880dd63ddc3 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -16,34 +16,34 @@ * * x0 - pointer to struct fpsimd_state */ -ENTRY(fpsimd_save_state) +SYM_FUNC_START(fpsimd_save_state) fpsimd_save x0, 8 ret -ENDPROC(fpsimd_save_state) +SYM_FUNC_END(fpsimd_save_state) /* * Load the FP registers. * * x0 - pointer to struct fpsimd_state */ -ENTRY(fpsimd_load_state) +SYM_FUNC_START(fpsimd_load_state) fpsimd_restore x0, 8 ret -ENDPROC(fpsimd_load_state) +SYM_FUNC_END(fpsimd_load_state) #ifdef CONFIG_ARM64_SVE -ENTRY(sve_save_state) +SYM_FUNC_START(sve_save_state) sve_save 0, x1, 2 ret -ENDPROC(sve_save_state) +SYM_FUNC_END(sve_save_state) -ENTRY(sve_load_state) +SYM_FUNC_START(sve_load_state) sve_load 0, x1, x2, 3, x4 ret -ENDPROC(sve_load_state) +SYM_FUNC_END(sve_load_state) -ENTRY(sve_get_vl) +SYM_FUNC_START(sve_get_vl) _sve_rdvl 0, 1 ret -ENDPROC(sve_get_vl) +SYM_FUNC_END(sve_get_vl) #endif /* CONFIG_ARM64_SVE */ diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 833d48c9acb5..a338f40e64d3 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -23,8 +23,9 @@ * * ... where <entry> is either ftrace_caller or ftrace_regs_caller. * - * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are - * live, and x9-x18 are safe to clobber. + * Each instrumented function follows the AAPCS, so here x0-x8 and x18-x30 are + * live (x18 holds the Shadow Call Stack pointer), and x9-x17 are safe to + * clobber. * * We save the callsite's context into a pt_regs before invoking any ftrace * callbacks. So that we can get a sensible backtrace, we create a stack record diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ddcde093c433..5304d193c79d 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -23,6 +23,7 @@ #include <asm/mmu.h> #include <asm/processor.h> #include <asm/ptrace.h> +#include <asm/scs.h> #include <asm/thread_info.h> #include <asm/asm-uaccess.h> #include <asm/unistd.h> @@ -178,7 +179,9 @@ alternative_cb_end apply_ssbd 1, x22, x23 - ptrauth_keys_install_kernel tsk, 1, x20, x22, x23 + ptrauth_keys_install_kernel tsk, x20, x22, x23 + + scs_load tsk, x20 .else add x21, sp, #S_FRAME_SIZE get_current_task tsk @@ -343,6 +346,8 @@ alternative_else_nop_endif msr cntkctl_el1, x1 4: #endif + scs_save tsk, x0 + /* No kernel C function calls after this as user keys are set. */ ptrauth_keys_install_user tsk, x0, x1, x2 @@ -388,6 +393,9 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 .macro irq_stack_entry mov x19, sp // preserve the original sp +#ifdef CONFIG_SHADOW_CALL_STACK + mov x24, scs_sp // preserve the original shadow stack +#endif /* * Compare sp with the base of the task stack. @@ -405,15 +413,25 @@ alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0 /* switch to the irq stack */ mov sp, x26 + +#ifdef CONFIG_SHADOW_CALL_STACK + /* also switch to the irq shadow stack */ + adr_this_cpu scs_sp, irq_shadow_call_stack, x26 +#endif + 9998: .endm /* - * x19 should be preserved between irq_stack_entry and - * irq_stack_exit. + * The callee-saved regs (x19-x29) should be preserved between + * irq_stack_entry and irq_stack_exit, but note that kernel_entry + * uses x20-x23 to store data for later use. */ .macro irq_stack_exit mov sp, x19 +#ifdef CONFIG_SHADOW_CALL_STACK + mov scs_sp, x24 +#endif .endm /* GPRs used by entry code */ @@ -728,20 +746,9 @@ el0_error_naked: SYM_CODE_END(el0_error) /* - * Ok, we need to do extra processing, enter the slow path. - */ -work_pending: - mov x0, sp // 'regs' - bl do_notify_resume -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_on // enabled while in userspace -#endif - ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step - b finish_ret_to_user -/* * "slow" syscall return path. */ -ret_to_user: +SYM_CODE_START_LOCAL(ret_to_user) disable_daif gic_prio_kentry_setup tmp=x3 ldr x1, [tsk, #TSK_TI_FLAGS] @@ -753,7 +760,19 @@ finish_ret_to_user: bl stackleak_erase #endif kernel_exit 0 -ENDPROC(ret_to_user) + +/* + * Ok, we need to do extra processing, enter the slow path. + */ +work_pending: + mov x0, sp // 'regs' + bl do_notify_resume +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_on // enabled while in userspace +#endif + ldr x1, [tsk, #TSK_TI_FLAGS] // re-check for single-step + b finish_ret_to_user +SYM_CODE_END(ret_to_user) .popsection // .entry.text @@ -900,7 +919,9 @@ SYM_FUNC_START(cpu_switch_to) ldr lr, [x8] mov sp, x9 msr sp_el0, x1 - ptrauth_keys_install_kernel x1, 1, x8, x9, x10 + ptrauth_keys_install_kernel x1, x8, x9, x10 + scs_save x0, x8 + scs_load x1, x8 ret SYM_FUNC_END(cpu_switch_to) NOKPROBE(cpu_switch_to) @@ -1029,13 +1050,16 @@ SYM_CODE_START(__sdei_asm_handler) mov x19, x1 +#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK) + ldrb w4, [x19, #SDEI_EVENT_PRIORITY] +#endif + #ifdef CONFIG_VMAP_STACK /* * entry.S may have been using sp as a scratch register, find whether * this is a normal or critical event and switch to the appropriate * stack for this CPU. */ - ldrb w4, [x19, #SDEI_EVENT_PRIORITY] cbnz w4, 1f ldr_this_cpu dst=x5, sym=sdei_stack_normal_ptr, tmp=x6 b 2f @@ -1045,6 +1069,15 @@ SYM_CODE_START(__sdei_asm_handler) mov sp, x5 #endif +#ifdef CONFIG_SHADOW_CALL_STACK + /* Use a separate shadow call stack for normal and critical events */ + cbnz w4, 3f + adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_normal, tmp=x6 + b 4f +3: adr_this_cpu dst=scs_sp, sym=sdei_shadow_call_stack_critical, tmp=x6 +4: +#endif + /* * We may have interrupted userspace, or a guest, or exit-from or * return-to either of these. We can't trust sp_el0, restore it. diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 57a91032b4c2..632702146813 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -13,6 +13,7 @@ #include <linux/init.h> #include <linux/irqchip/arm-gic-v3.h> +#include <asm/asm_pointer_auth.h> #include <asm/assembler.h> #include <asm/boot.h> #include <asm/ptrace.h> @@ -27,6 +28,7 @@ #include <asm/pgtable-hwdef.h> #include <asm/pgtable.h> #include <asm/page.h> +#include <asm/scs.h> #include <asm/smp.h> #include <asm/sysreg.h> #include <asm/thread_info.h> @@ -70,9 +72,9 @@ _head: * its opcode forms the magic "MZ" signature required by UEFI. */ add x13, x18, #0x16 - b stext + b primary_entry #else - b stext // branch to kernel start, magic + b primary_entry // branch to kernel start, magic .long 0 // reserved #endif le64sym _kernel_offset_le // Image load offset from start of RAM, little-endian @@ -98,14 +100,13 @@ pe_header: * primary lowlevel boot path: * * Register Scope Purpose - * x21 stext() .. start_kernel() FDT pointer passed at boot in x0 - * x23 stext() .. start_kernel() physical misalignment/KASLR offset - * x28 __create_page_tables() callee preserved temp register - * x19/x20 __primary_switch() callee preserved temp registers - * x24 __primary_switch() .. relocate_kernel() - * current RELR displacement + * x21 primary_entry() .. start_kernel() FDT pointer passed at boot in x0 + * x23 primary_entry() .. start_kernel() physical misalignment/KASLR offset + * x28 __create_page_tables() callee preserved temp register + * x19/x20 __primary_switch() callee preserved temp registers + * x24 __primary_switch() .. relocate_kernel() current RELR displacement */ -SYM_CODE_START(stext) +SYM_CODE_START(primary_entry) bl preserve_boot_args bl el2_setup // Drop to EL1, w0=cpu_boot_mode adrp x23, __PHYS_OFFSET @@ -118,10 +119,9 @@ SYM_CODE_START(stext) * On return, the CPU will be ready for the MMU to be turned on and * the TCR will have been set. */ - mov x0, #ARM64_CPU_BOOT_PRIMARY bl __cpu_setup // initialise processor b __primary_switch -SYM_CODE_END(stext) +SYM_CODE_END(primary_entry) /* * Preserve the arguments passed by the bootloader in x0 .. x3 @@ -394,13 +394,19 @@ SYM_FUNC_START_LOCAL(__create_page_tables) /* * Since the page tables have been populated with non-cacheable - * accesses (MMU disabled), invalidate the idmap and swapper page - * tables again to remove any speculatively loaded cache lines. + * accesses (MMU disabled), invalidate those tables again to + * remove any speculatively loaded cache lines. */ + dmb sy + adrp x0, idmap_pg_dir + adrp x1, idmap_pg_end + sub x1, x1, x0 + bl __inval_dcache_area + + adrp x0, init_pg_dir adrp x1, init_pg_end sub x1, x1, x0 - dmb sy bl __inval_dcache_area ret x28 @@ -417,6 +423,10 @@ SYM_FUNC_START_LOCAL(__primary_switched) adr_l x5, init_task msr sp_el0, x5 // Save thread_info +#ifdef CONFIG_ARM64_PTR_AUTH + __ptrauth_keys_init_cpu x5, x6, x7, x8 +#endif + adr_l x8, vectors // load VBAR_EL1 with virtual msr vbar_el1, x8 // vector table address isb @@ -424,6 +434,10 @@ SYM_FUNC_START_LOCAL(__primary_switched) stp xzr, x30, [sp, #-16]! mov x29, sp +#ifdef CONFIG_SHADOW_CALL_STACK + adr_l scs_sp, init_shadow_call_stack // Set shadow call stack +#endif + str_l x21, __fdt_pointer, x5 // Save FDT pointer ldr_l x4, kimage_vaddr // Save the offset between @@ -717,7 +731,6 @@ SYM_FUNC_START_LOCAL(secondary_startup) * Common entry point for secondary CPUs. */ bl __cpu_secondary_check52bitva - mov x0, #ARM64_CPU_BOOT_SECONDARY bl __cpu_setup // initialise processor adrp x1, swapper_pg_dir bl __enable_mmu @@ -737,8 +750,14 @@ SYM_FUNC_START_LOCAL(__secondary_switched) ldr x2, [x0, #CPU_BOOT_TASK] cbz x2, __secondary_too_slow msr sp_el0, x2 + scs_load x2, x3 mov x29, #0 mov x30, #0 + +#ifdef CONFIG_ARM64_PTR_AUTH + ptrauth_keys_init_cpu x2, x3, x4, x5 +#endif + b secondary_start_kernel SYM_FUNC_END(__secondary_switched) diff --git a/arch/arm64/kernel/hibernate-asm.S b/arch/arm64/kernel/hibernate-asm.S index 6532105b3e32..8ccca660034e 100644 --- a/arch/arm64/kernel/hibernate-asm.S +++ b/arch/arm64/kernel/hibernate-asm.S @@ -65,7 +65,7 @@ * x5: physical address of a zero page that remains zero after resume */ .pushsection ".hibernate_exit.text", "ax" -ENTRY(swsusp_arch_suspend_exit) +SYM_CODE_START(swsusp_arch_suspend_exit) /* * We execute from ttbr0, change ttbr1 to our copied linear map tables * with a break-before-make via the zero page @@ -110,7 +110,7 @@ ENTRY(swsusp_arch_suspend_exit) cbz x24, 3f /* Do we need to re-initialise EL2? */ hvc #0 3: ret -ENDPROC(swsusp_arch_suspend_exit) +SYM_CODE_END(swsusp_arch_suspend_exit) /* * Restore the hyp stub. @@ -119,15 +119,15 @@ ENDPROC(swsusp_arch_suspend_exit) * * x24: The physical address of __hyp_stub_vectors */ -el1_sync: +SYM_CODE_START_LOCAL(el1_sync) msr vbar_el2, x24 eret -ENDPROC(el1_sync) +SYM_CODE_END(el1_sync) .macro invalid_vector label -\label: +SYM_CODE_START_LOCAL(\label) b \label -ENDPROC(\label) +SYM_CODE_END(\label) .endm invalid_vector el2_sync_invalid @@ -141,7 +141,7 @@ ENDPROC(\label) /* el2 vectors - switch el2 here while we restore the memory image. */ .align 11 -ENTRY(hibernate_el2_vectors) +SYM_CODE_START(hibernate_el2_vectors) ventry el2_sync_invalid // Synchronous EL2t ventry el2_irq_invalid // IRQ EL2t ventry el2_fiq_invalid // FIQ EL2t @@ -161,6 +161,6 @@ ENTRY(hibernate_el2_vectors) ventry el1_irq_invalid // IRQ 32-bit EL1 ventry el1_fiq_invalid // FIQ 32-bit EL1 ventry el1_error_invalid // Error 32-bit EL1 -END(hibernate_el2_vectors) +SYM_CODE_END(hibernate_el2_vectors) .popsection diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index e473ead806ed..160f5881a0b7 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -21,7 +21,7 @@ .align 11 -ENTRY(__hyp_stub_vectors) +SYM_CODE_START(__hyp_stub_vectors) ventry el2_sync_invalid // Synchronous EL2t ventry el2_irq_invalid // IRQ EL2t ventry el2_fiq_invalid // FIQ EL2t @@ -41,11 +41,11 @@ ENTRY(__hyp_stub_vectors) ventry el1_irq_invalid // IRQ 32-bit EL1 ventry el1_fiq_invalid // FIQ 32-bit EL1 ventry el1_error_invalid // Error 32-bit EL1 -ENDPROC(__hyp_stub_vectors) +SYM_CODE_END(__hyp_stub_vectors) .align 11 -el1_sync: +SYM_CODE_START_LOCAL(el1_sync) cmp x0, #HVC_SET_VECTORS b.ne 2f msr vbar_el2, x1 @@ -68,12 +68,12 @@ el1_sync: 9: mov x0, xzr eret -ENDPROC(el1_sync) +SYM_CODE_END(el1_sync) .macro invalid_vector label -\label: +SYM_CODE_START_LOCAL(\label) b \label -ENDPROC(\label) +SYM_CODE_END(\label) .endm invalid_vector el2_sync_invalid @@ -106,15 +106,15 @@ ENDPROC(\label) * initialisation entry point. */ -ENTRY(__hyp_set_vectors) +SYM_FUNC_START(__hyp_set_vectors) mov x1, x0 mov x0, #HVC_SET_VECTORS hvc #0 ret -ENDPROC(__hyp_set_vectors) +SYM_FUNC_END(__hyp_set_vectors) -ENTRY(__hyp_reset_vectors) +SYM_FUNC_START(__hyp_reset_vectors) mov x0, #HVC_RESET_VECTORS hvc #0 ret -ENDPROC(__hyp_reset_vectors) +SYM_FUNC_END(__hyp_reset_vectors) diff --git a/arch/arm64/kernel/image-vars.h b/arch/arm64/kernel/image-vars.h index 7f06ad93fc95..be0a63ffed23 100644 --- a/arch/arm64/kernel/image-vars.h +++ b/arch/arm64/kernel/image-vars.h @@ -13,7 +13,7 @@ #ifdef CONFIG_EFI __efistub_kernel_size = _edata - _text; -__efistub_stext_offset = stext - _text; +__efistub_primary_entry_offset = primary_entry - _text; /* diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index 4a9e773a177f..684d871ae38d 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -51,21 +51,33 @@ enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn) return aarch64_insn_encoding_class[(insn >> 25) & 0xf]; } -/* NOP is an alias of HINT */ -bool __kprobes aarch64_insn_is_nop(u32 insn) +bool __kprobes aarch64_insn_is_steppable_hint(u32 insn) { if (!aarch64_insn_is_hint(insn)) return false; switch (insn & 0xFE0) { - case AARCH64_INSN_HINT_YIELD: - case AARCH64_INSN_HINT_WFE: - case AARCH64_INSN_HINT_WFI: - case AARCH64_INSN_HINT_SEV: - case AARCH64_INSN_HINT_SEVL: - return false; - default: + case AARCH64_INSN_HINT_XPACLRI: + case AARCH64_INSN_HINT_PACIA_1716: + case AARCH64_INSN_HINT_PACIB_1716: + case AARCH64_INSN_HINT_AUTIA_1716: + case AARCH64_INSN_HINT_AUTIB_1716: + case AARCH64_INSN_HINT_PACIAZ: + case AARCH64_INSN_HINT_PACIASP: + case AARCH64_INSN_HINT_PACIBZ: + case AARCH64_INSN_HINT_PACIBSP: + case AARCH64_INSN_HINT_AUTIAZ: + case AARCH64_INSN_HINT_AUTIASP: + case AARCH64_INSN_HINT_AUTIBZ: + case AARCH64_INSN_HINT_AUTIBSP: + case AARCH64_INSN_HINT_BTI: + case AARCH64_INSN_HINT_BTIC: + case AARCH64_INSN_HINT_BTIJ: + case AARCH64_INSN_HINT_BTIJC: + case AARCH64_INSN_HINT_NOP: return true; + default: + return false; } } @@ -574,7 +586,7 @@ u32 aarch64_insn_gen_cond_branch_imm(unsigned long pc, unsigned long addr, offset >> 2); } -u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op) +u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_cr_op op) { return aarch64_insn_get_hint_value() | op; } @@ -1535,16 +1547,10 @@ static u32 aarch64_encode_immediate(u64 imm, u32 insn) { unsigned int immr, imms, n, ones, ror, esz, tmp; - u64 mask = ~0UL; - - /* Can't encode full zeroes or full ones */ - if (!imm || !~imm) - return AARCH64_BREAK_FAULT; + u64 mask; switch (variant) { case AARCH64_INSN_VARIANT_32BIT: - if (upper_32_bits(imm)) - return AARCH64_BREAK_FAULT; esz = 32; break; case AARCH64_INSN_VARIANT_64BIT: @@ -1556,6 +1562,12 @@ static u32 aarch64_encode_immediate(u64 imm, return AARCH64_BREAK_FAULT; } + mask = GENMASK(esz - 1, 0); + + /* Can't encode full zeroes, full ones, or value wider than the mask */ + if (!imm || imm == mask || imm & ~mask) + return AARCH64_BREAK_FAULT; + /* * Inverse of Replicate(). Try to spot a repeating pattern * with a pow2 stride. diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index b40c3b0def92..522e6f517ec0 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -138,12 +138,12 @@ static int setup_dtb(struct kimage *image, /* add rng-seed */ if (rng_is_initialized()) { - u8 rng_seed[RNG_SEED_SIZE]; - get_random_bytes(rng_seed, RNG_SEED_SIZE); - ret = fdt_setprop(dtb, off, FDT_PROP_RNG_SEED, rng_seed, - RNG_SEED_SIZE); + void *rng_seed; + ret = fdt_setprop_placeholder(dtb, off, FDT_PROP_RNG_SEED, + RNG_SEED_SIZE, &rng_seed); if (ret) goto out; + get_random_bytes(rng_seed, RNG_SEED_SIZE); } else { pr_notice("RNG is not initialised: omitting \"%s\" property\n", FDT_PROP_RNG_SEED); @@ -284,7 +284,7 @@ int load_other_segments(struct kimage *image, image->arch.elf_headers_sz = headers_sz; pr_debug("Loaded elf core header at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - image->arch.elf_headers_mem, headers_sz, headers_sz); + image->arch.elf_headers_mem, kbuf.bufsz, kbuf.memsz); } /* load initrd */ @@ -305,7 +305,7 @@ int load_other_segments(struct kimage *image, initrd_load_addr = kbuf.mem; pr_debug("Loaded initrd at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - initrd_load_addr, initrd_len, initrd_len); + initrd_load_addr, kbuf.bufsz, kbuf.memsz); } /* load dtb */ @@ -332,7 +332,7 @@ int load_other_segments(struct kimage *image, image->arch.dtb_mem = kbuf.mem; pr_debug("Loaded dtb at 0x%lx bufsz=0x%lx memsz=0x%lx\n", - kbuf.mem, dtb_len, dtb_len); + kbuf.mem, kbuf.bufsz, kbuf.memsz); return 0; diff --git a/arch/arm64/kernel/paravirt.c b/arch/arm64/kernel/paravirt.c index 1ef702b0be2d..295d66490584 100644 --- a/arch/arm64/kernel/paravirt.c +++ b/arch/arm64/kernel/paravirt.c @@ -120,7 +120,7 @@ static bool has_pv_steal_clock(void) struct arm_smccc_res res; /* To detect the presence of PV time support we require SMCCC 1.1+ */ - if (psci_ops.smccc_version < SMCCC_VERSION_1_1) + if (arm_smccc_1_1_get_conduit() == SMCCC_CONDUIT_NONE) return false; arm_smccc_1_1_invoke(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, diff --git a/arch/arm64/kernel/probes/decode-insn.c b/arch/arm64/kernel/probes/decode-insn.c index b78fac9e546c..263d5fba4c8a 100644 --- a/arch/arm64/kernel/probes/decode-insn.c +++ b/arch/arm64/kernel/probes/decode-insn.c @@ -46,7 +46,7 @@ static bool __kprobes aarch64_insn_is_steppable(u32 insn) * except for the NOP case. */ if (aarch64_insn_is_hint(insn)) - return aarch64_insn_is_nop(insn); + return aarch64_insn_is_steppable_hint(insn); return true; } diff --git a/arch/arm64/kernel/probes/kprobes_trampoline.S b/arch/arm64/kernel/probes/kprobes_trampoline.S index 45dce03aaeaf..890ca72c5a51 100644 --- a/arch/arm64/kernel/probes/kprobes_trampoline.S +++ b/arch/arm64/kernel/probes/kprobes_trampoline.S @@ -61,7 +61,7 @@ ldp x28, x29, [sp, #S_X28] .endm -ENTRY(kretprobe_trampoline) +SYM_CODE_START(kretprobe_trampoline) sub sp, sp, #S_FRAME_SIZE save_all_base_regs @@ -79,4 +79,4 @@ ENTRY(kretprobe_trampoline) add sp, sp, #S_FRAME_SIZE ret -ENDPROC(kretprobe_trampoline) +SYM_CODE_END(kretprobe_trampoline) diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 56be4cbf771f..eade7807e819 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -11,6 +11,7 @@ #include <linux/compat.h> #include <linux/efi.h> +#include <linux/elf.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/sched/debug.h> @@ -18,6 +19,7 @@ #include <linux/sched/task_stack.h> #include <linux/kernel.h> #include <linux/lockdep.h> +#include <linux/mman.h> #include <linux/mm.h> #include <linux/stddef.h> #include <linux/sysctl.h> @@ -209,6 +211,15 @@ void machine_restart(char *cmd) while (1); } +#define bstr(suffix, str) [PSR_BTYPE_ ## suffix >> PSR_BTYPE_SHIFT] = str +static const char *const btypes[] = { + bstr(NONE, "--"), + bstr( JC, "jc"), + bstr( C, "-c"), + bstr( J , "j-") +}; +#undef bstr + static void print_pstate(struct pt_regs *regs) { u64 pstate = regs->pstate; @@ -227,7 +238,10 @@ static void print_pstate(struct pt_regs *regs) pstate & PSR_AA32_I_BIT ? 'I' : 'i', pstate & PSR_AA32_F_BIT ? 'F' : 'f'); } else { - printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO)\n", + const char *btype_str = btypes[(pstate & PSR_BTYPE_MASK) >> + PSR_BTYPE_SHIFT]; + + printk("pstate: %08llx (%c%c%c%c %c%c%c%c %cPAN %cUAO BTYPE=%s)\n", pstate, pstate & PSR_N_BIT ? 'N' : 'n', pstate & PSR_Z_BIT ? 'Z' : 'z', @@ -238,7 +252,8 @@ static void print_pstate(struct pt_regs *regs) pstate & PSR_I_BIT ? 'I' : 'i', pstate & PSR_F_BIT ? 'F' : 'f', pstate & PSR_PAN_BIT ? '+' : '-', - pstate & PSR_UAO_BIT ? '+' : '-'); + pstate & PSR_UAO_BIT ? '+' : '-', + btype_str); } } @@ -655,3 +670,25 @@ asmlinkage void __sched arm64_preempt_schedule_irq(void) if (system_capabilities_finalized()) preempt_schedule_irq(); } + +#ifdef CONFIG_BINFMT_ELF +int arch_elf_adjust_prot(int prot, const struct arch_elf_state *state, + bool has_interp, bool is_interp) +{ + /* + * For dynamically linked executables the interpreter is + * responsible for setting PROT_BTI on everything except + * itself. + */ + if (is_interp != has_interp) + return prot; + + if (!(state->flags & ARM64_ELF_BTI)) + return prot; + + if (prot & PROT_EXEC) + prot |= PROT_BTI; + + return prot; +} +#endif diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index e7b01904f180..76790a5f2a0d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1875,7 +1875,7 @@ void syscall_trace_exit(struct pt_regs *regs) */ #define SPSR_EL1_AARCH64_RES0_BITS \ (GENMASK_ULL(63, 32) | GENMASK_ULL(27, 25) | GENMASK_ULL(23, 22) | \ - GENMASK_ULL(20, 13) | GENMASK_ULL(11, 10) | GENMASK_ULL(5, 5)) + GENMASK_ULL(20, 13) | GENMASK_ULL(5, 5)) #define SPSR_EL1_AARCH32_RES0_BITS \ (GENMASK_ULL(63, 32) | GENMASK_ULL(22, 22) | GENMASK_ULL(20, 20)) diff --git a/arch/arm64/kernel/reloc_test_syms.S b/arch/arm64/kernel/reloc_test_syms.S index 16a34f188f26..c50f45fa29fa 100644 --- a/arch/arm64/kernel/reloc_test_syms.S +++ b/arch/arm64/kernel/reloc_test_syms.S @@ -5,81 +5,81 @@ #include <linux/linkage.h> -ENTRY(absolute_data64) +SYM_FUNC_START(absolute_data64) ldr x0, 0f ret 0: .quad sym64_abs -ENDPROC(absolute_data64) +SYM_FUNC_END(absolute_data64) -ENTRY(absolute_data32) +SYM_FUNC_START(absolute_data32) ldr w0, 0f ret 0: .long sym32_abs -ENDPROC(absolute_data32) +SYM_FUNC_END(absolute_data32) -ENTRY(absolute_data16) +SYM_FUNC_START(absolute_data16) adr x0, 0f ldrh w0, [x0] ret 0: .short sym16_abs, 0 -ENDPROC(absolute_data16) +SYM_FUNC_END(absolute_data16) -ENTRY(signed_movw) +SYM_FUNC_START(signed_movw) movz x0, #:abs_g2_s:sym64_abs movk x0, #:abs_g1_nc:sym64_abs movk x0, #:abs_g0_nc:sym64_abs ret -ENDPROC(signed_movw) +SYM_FUNC_END(signed_movw) -ENTRY(unsigned_movw) +SYM_FUNC_START(unsigned_movw) movz x0, #:abs_g3:sym64_abs movk x0, #:abs_g2_nc:sym64_abs movk x0, #:abs_g1_nc:sym64_abs movk x0, #:abs_g0_nc:sym64_abs ret -ENDPROC(unsigned_movw) +SYM_FUNC_END(unsigned_movw) .align 12 .space 0xff8 -ENTRY(relative_adrp) +SYM_FUNC_START(relative_adrp) adrp x0, sym64_rel add x0, x0, #:lo12:sym64_rel ret -ENDPROC(relative_adrp) +SYM_FUNC_END(relative_adrp) .align 12 .space 0xffc -ENTRY(relative_adrp_far) +SYM_FUNC_START(relative_adrp_far) adrp x0, memstart_addr add x0, x0, #:lo12:memstart_addr ret -ENDPROC(relative_adrp_far) +SYM_FUNC_END(relative_adrp_far) -ENTRY(relative_adr) +SYM_FUNC_START(relative_adr) adr x0, sym64_rel ret -ENDPROC(relative_adr) +SYM_FUNC_END(relative_adr) -ENTRY(relative_data64) +SYM_FUNC_START(relative_data64) adr x1, 0f ldr x0, [x1] add x0, x0, x1 ret 0: .quad sym64_rel - . -ENDPROC(relative_data64) +SYM_FUNC_END(relative_data64) -ENTRY(relative_data32) +SYM_FUNC_START(relative_data32) adr x1, 0f ldr w0, [x1] add x0, x0, x1 ret 0: .long sym64_rel - . -ENDPROC(relative_data32) +SYM_FUNC_END(relative_data32) -ENTRY(relative_data16) +SYM_FUNC_START(relative_data16) adr x1, 0f ldrsh w0, [x1] add x0, x0, x1 ret 0: .short sym64_rel - ., 0 -ENDPROC(relative_data16) +SYM_FUNC_END(relative_data16) diff --git a/arch/arm64/kernel/relocate_kernel.S b/arch/arm64/kernel/relocate_kernel.S index c40ce496c78b..542d6edc6806 100644 --- a/arch/arm64/kernel/relocate_kernel.S +++ b/arch/arm64/kernel/relocate_kernel.S @@ -26,7 +26,7 @@ * control_code_page, a special page which has been set up to be preserved * during the copy operation. */ -ENTRY(arm64_relocate_new_kernel) +SYM_CODE_START(arm64_relocate_new_kernel) /* Setup the list loop variables. */ mov x18, x2 /* x18 = dtb address */ @@ -111,7 +111,7 @@ ENTRY(arm64_relocate_new_kernel) mov x3, xzr br x17 -ENDPROC(arm64_relocate_new_kernel) +SYM_CODE_END(arm64_relocate_new_kernel) .align 3 /* To keep the 64-bit values below naturally aligned. */ diff --git a/arch/arm64/kernel/scs.c b/arch/arm64/kernel/scs.c new file mode 100644 index 000000000000..e8f7ff45dd8f --- /dev/null +++ b/arch/arm64/kernel/scs.c @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Shadow Call Stack support. + * + * Copyright (C) 2019 Google LLC + */ + +#include <linux/percpu.h> +#include <linux/scs.h> + +DEFINE_SCS(irq_shadow_call_stack); + +#ifdef CONFIG_ARM_SDE_INTERFACE +DEFINE_SCS(sdei_shadow_call_stack_normal); +DEFINE_SCS(sdei_shadow_call_stack_critical); +#endif diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index d6259dac62b6..dab88260b137 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -95,19 +95,7 @@ static bool on_sdei_normal_stack(unsigned long sp, struct stack_info *info) unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_normal_ptr); unsigned long high = low + SDEI_STACK_SIZE; - if (!low) - return false; - - if (sp < low || sp >= high) - return false; - - if (info) { - info->low = low; - info->high = high; - info->type = STACK_TYPE_SDEI_NORMAL; - } - - return true; + return on_stack(sp, low, high, STACK_TYPE_SDEI_NORMAL, info); } static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) @@ -115,19 +103,7 @@ static bool on_sdei_critical_stack(unsigned long sp, struct stack_info *info) unsigned long low = (unsigned long)raw_cpu_read(sdei_stack_critical_ptr); unsigned long high = low + SDEI_STACK_SIZE; - if (!low) - return false; - - if (sp < low || sp >= high) - return false; - - if (info) { - info->low = low; - info->high = high; - info->type = STACK_TYPE_SDEI_CRITICAL; - } - - return true; + return on_stack(sp, low, high, STACK_TYPE_SDEI_CRITICAL, info); } bool _on_sdei_stack(unsigned long sp, struct stack_info *info) @@ -251,22 +227,12 @@ asmlinkage __kprobes notrace unsigned long __sdei_handler(struct pt_regs *regs, struct sdei_registered_event *arg) { unsigned long ret; - bool do_nmi_exit = false; - /* - * nmi_enter() deals with printk() re-entrance and use of RCU when - * RCU believed this CPU was idle. Because critical events can - * interrupt normal events, we may already be in_nmi(). - */ - if (!in_nmi()) { - nmi_enter(); - do_nmi_exit = true; - } + nmi_enter(); ret = _sdei_handler(regs, arg); - if (do_nmi_exit) - nmi_exit(); + nmi_exit(); return ret; } diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 339882db5a91..801d56cdf701 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -732,6 +732,22 @@ static void setup_return(struct pt_regs *regs, struct k_sigaction *ka, regs->regs[29] = (unsigned long)&user->next_frame->fp; regs->pc = (unsigned long)ka->sa.sa_handler; + /* + * Signal delivery is a (wacky) indirect function call in + * userspace, so simulate the same setting of BTYPE as a BLR + * <register containing the signal handler entry point>. + * Signal delivery to a location in a PROT_BTI guarded page + * that is not a function entry point will now trigger a + * SIGILL in userspace. + * + * If the signal handler entry point is not in a PROT_BTI + * guarded page, this is harmless. + */ + if (system_supports_bti()) { + regs->pstate &= ~PSR_BTYPE_MASK; + regs->pstate |= PSR_BTYPE_C; + } + if (ka->sa.sa_flags & SA_RESTORER) sigtramp = ka->sa.sa_restorer; else diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index 7b2f2e650c44..ba40d57757d6 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -62,7 +62,7 @@ * * x0 = struct sleep_stack_data area */ -ENTRY(__cpu_suspend_enter) +SYM_FUNC_START(__cpu_suspend_enter) stp x29, lr, [x0, #SLEEP_STACK_DATA_CALLEE_REGS] stp x19, x20, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+16] stp x21, x22, [x0,#SLEEP_STACK_DATA_CALLEE_REGS+32] @@ -95,23 +95,22 @@ ENTRY(__cpu_suspend_enter) ldp x29, lr, [sp], #16 mov x0, #1 ret -ENDPROC(__cpu_suspend_enter) +SYM_FUNC_END(__cpu_suspend_enter) .pushsection ".idmap.text", "awx" -ENTRY(cpu_resume) +SYM_CODE_START(cpu_resume) bl el2_setup // if in EL2 drop to EL1 cleanly - mov x0, #ARM64_CPU_RUNTIME bl __cpu_setup /* enable the MMU early - so we can access sleep_save_stash by va */ adrp x1, swapper_pg_dir bl __enable_mmu ldr x8, =_cpu_resume br x8 -ENDPROC(cpu_resume) +SYM_CODE_END(cpu_resume) .ltorg .popsection -ENTRY(_cpu_resume) +SYM_FUNC_START(_cpu_resume) mrs x1, mpidr_el1 adr_l x8, mpidr_hash // x8 = struct mpidr_hash virt address @@ -147,4 +146,4 @@ ENTRY(_cpu_resume) ldp x29, lr, [x29] mov x0, #0 ret -ENDPROC(_cpu_resume) +SYM_FUNC_END(_cpu_resume) diff --git a/arch/arm64/kernel/smccc-call.S b/arch/arm64/kernel/smccc-call.S index 54655273d1e0..1f93809528a4 100644 --- a/arch/arm64/kernel/smccc-call.S +++ b/arch/arm64/kernel/smccc-call.S @@ -30,9 +30,9 @@ * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, * struct arm_smccc_quirk *quirk) */ -ENTRY(__arm_smccc_smc) +SYM_FUNC_START(__arm_smccc_smc) SMCCC smc -ENDPROC(__arm_smccc_smc) +SYM_FUNC_END(__arm_smccc_smc) EXPORT_SYMBOL(__arm_smccc_smc) /* @@ -41,7 +41,7 @@ EXPORT_SYMBOL(__arm_smccc_smc) * unsigned long a6, unsigned long a7, struct arm_smccc_res *res, * struct arm_smccc_quirk *quirk) */ -ENTRY(__arm_smccc_hvc) +SYM_FUNC_START(__arm_smccc_hvc) SMCCC hvc -ENDPROC(__arm_smccc_hvc) +SYM_FUNC_END(__arm_smccc_hvc) EXPORT_SYMBOL(__arm_smccc_hvc) diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index bb813d06114a..4b6f4999d06a 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -65,7 +65,7 @@ EXPORT_PER_CPU_SYMBOL(cpu_number); */ struct secondary_data secondary_data; /* Number of CPUs which aren't online, but looping in kernel text. */ -int cpus_stuck_in_kernel; +static int cpus_stuck_in_kernel; enum ipi_msg_type { IPI_RESCHEDULE, @@ -114,10 +114,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) */ secondary_data.task = idle; secondary_data.stack = task_stack_page(idle) + THREAD_SIZE; -#if defined(CONFIG_ARM64_PTR_AUTH) - secondary_data.ptrauth_key.apia.lo = idle->thread.keys_kernel.apia.lo; - secondary_data.ptrauth_key.apia.hi = idle->thread.keys_kernel.apia.hi; -#endif update_cpu_boot_status(CPU_MMU_OFF); __flush_dcache_area(&secondary_data, sizeof(secondary_data)); @@ -140,10 +136,6 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) pr_crit("CPU%u: failed to come online\n", cpu); secondary_data.task = NULL; secondary_data.stack = NULL; -#if defined(CONFIG_ARM64_PTR_AUTH) - secondary_data.ptrauth_key.apia.lo = 0; - secondary_data.ptrauth_key.apia.hi = 0; -#endif __flush_dcache_area(&secondary_data, sizeof(secondary_data)); status = READ_ONCE(secondary_data.status); if (status == CPU_MMU_OFF) @@ -430,7 +422,7 @@ static void __init hyp_mode_check(void) "CPU: CPUs started in inconsistent modes"); else pr_info("CPU: All CPU(s) started at EL1\n"); - if (IS_ENABLED(CONFIG_KVM_ARM_HOST)) + if (IS_ENABLED(CONFIG_KVM)) kvm_compute_layout(); } diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index a12c0c88d345..5f5b868292f5 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -98,6 +98,24 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, regs->orig_x0 = regs->regs[0]; regs->syscallno = scno; + /* + * BTI note: + * The architecture does not guarantee that SPSR.BTYPE is zero + * on taking an SVC, so we could return to userspace with a + * non-zero BTYPE after the syscall. + * + * This shouldn't matter except when userspace is explicitly + * doing something stupid, such as setting PROT_BTI on a page + * that lacks conforming BTI/PACIxSP instructions, falling + * through from one executable page to another with differing + * PROT_BTI, or messing with BTYPE via ptrace: in such cases, + * userspace should not be surprised if a SIGILL occurs on + * syscall return. + * + * So, don't touch regs->pstate & PSR_BTYPE_MASK here. + * (Similarly for HVC and SMC elsewhere.) + */ + cortex_a76_erratum_1463225_svc_handler(); local_daif_restore(DAIF_PROCCTX); user_exit(); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index cf402be5c573..d332590f5978 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -272,6 +272,61 @@ void arm64_notify_die(const char *str, struct pt_regs *regs, } } +#ifdef CONFIG_COMPAT +#define PSTATE_IT_1_0_SHIFT 25 +#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT) +#define PSTATE_IT_7_2_SHIFT 10 +#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT) + +static u32 compat_get_it_state(struct pt_regs *regs) +{ + u32 it, pstate = regs->pstate; + + it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT; + it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2; + + return it; +} + +static void compat_set_it_state(struct pt_regs *regs, u32 it) +{ + u32 pstate_it; + + pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK; + pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK; + + regs->pstate &= ~PSR_AA32_IT_MASK; + regs->pstate |= pstate_it; +} + +static void advance_itstate(struct pt_regs *regs) +{ + u32 it; + + /* ARM mode */ + if (!(regs->pstate & PSR_AA32_T_BIT) || + !(regs->pstate & PSR_AA32_IT_MASK)) + return; + + it = compat_get_it_state(regs); + + /* + * If this is the last instruction of the block, wipe the IT + * state. Otherwise advance it. + */ + if (!(it & 7)) + it = 0; + else + it = (it & 0xe0) | ((it << 1) & 0x1f); + + compat_set_it_state(regs, it); +} +#else +static void advance_itstate(struct pt_regs *regs) +{ +} +#endif + void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size) { regs->pc += size; @@ -282,6 +337,11 @@ void arm64_skip_faulting_instruction(struct pt_regs *regs, unsigned long size) */ if (user_mode(regs)) user_fastforward_single_step(current); + + if (compat_user_mode(regs)) + advance_itstate(regs); + else + regs->pstate &= ~PSR_BTYPE_MASK; } static LIST_HEAD(undef_hook); @@ -411,6 +471,13 @@ void do_undefinstr(struct pt_regs *regs) } NOKPROBE_SYMBOL(do_undefinstr); +void do_bti(struct pt_regs *regs) +{ + BUG_ON(!user_mode(regs)); + force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); +} +NOKPROBE_SYMBOL(do_bti); + #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ res = -EFAULT; \ @@ -566,34 +633,7 @@ static const struct sys64_hook sys64_hooks[] = { {}, }; - #ifdef CONFIG_COMPAT -#define PSTATE_IT_1_0_SHIFT 25 -#define PSTATE_IT_1_0_MASK (0x3 << PSTATE_IT_1_0_SHIFT) -#define PSTATE_IT_7_2_SHIFT 10 -#define PSTATE_IT_7_2_MASK (0x3f << PSTATE_IT_7_2_SHIFT) - -static u32 compat_get_it_state(struct pt_regs *regs) -{ - u32 it, pstate = regs->pstate; - - it = (pstate & PSTATE_IT_1_0_MASK) >> PSTATE_IT_1_0_SHIFT; - it |= ((pstate & PSTATE_IT_7_2_MASK) >> PSTATE_IT_7_2_SHIFT) << 2; - - return it; -} - -static void compat_set_it_state(struct pt_regs *regs, u32 it) -{ - u32 pstate_it; - - pstate_it = (it << PSTATE_IT_1_0_SHIFT) & PSTATE_IT_1_0_MASK; - pstate_it |= ((it >> 2) << PSTATE_IT_7_2_SHIFT) & PSTATE_IT_7_2_MASK; - - regs->pstate &= ~PSR_AA32_IT_MASK; - regs->pstate |= pstate_it; -} - static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) { int cond; @@ -614,42 +654,12 @@ static bool cp15_cond_valid(unsigned int esr, struct pt_regs *regs) return aarch32_opcode_cond_checks[cond](regs->pstate); } -static void advance_itstate(struct pt_regs *regs) -{ - u32 it; - - /* ARM mode */ - if (!(regs->pstate & PSR_AA32_T_BIT) || - !(regs->pstate & PSR_AA32_IT_MASK)) - return; - - it = compat_get_it_state(regs); - - /* - * If this is the last instruction of the block, wipe the IT - * state. Otherwise advance it. - */ - if (!(it & 7)) - it = 0; - else - it = (it & 0xe0) | ((it << 1) & 0x1f); - - compat_set_it_state(regs, it); -} - -static void arm64_compat_skip_faulting_instruction(struct pt_regs *regs, - unsigned int sz) -{ - advance_itstate(regs); - arm64_skip_faulting_instruction(regs, sz); -} - static void compat_cntfrq_read_handler(unsigned int esr, struct pt_regs *regs) { int reg = (esr & ESR_ELx_CP15_32_ISS_RT_MASK) >> ESR_ELx_CP15_32_ISS_RT_SHIFT; pt_regs_write_reg(regs, reg, arch_timer_get_rate()); - arm64_compat_skip_faulting_instruction(regs, 4); + arm64_skip_faulting_instruction(regs, 4); } static const struct sys64_hook cp15_32_hooks[] = { @@ -669,7 +679,7 @@ static void compat_cntvct_read_handler(unsigned int esr, struct pt_regs *regs) pt_regs_write_reg(regs, rt, lower_32_bits(val)); pt_regs_write_reg(regs, rt2, upper_32_bits(val)); - arm64_compat_skip_faulting_instruction(regs, 4); + arm64_skip_faulting_instruction(regs, 4); } static const struct sys64_hook cp15_64_hooks[] = { @@ -690,7 +700,7 @@ void do_cp15instr(unsigned int esr, struct pt_regs *regs) * There is no T16 variant of a CP access, so we * always advance PC by 4 bytes. */ - arm64_compat_skip_faulting_instruction(regs, 4); + arm64_skip_faulting_instruction(regs, 4); return; } @@ -753,6 +763,7 @@ static const char *esr_class_str[] = { [ESR_ELx_EC_CP10_ID] = "CP10 MRC/VMRS", [ESR_ELx_EC_PAC] = "PAC", [ESR_ELx_EC_CP14_64] = "CP14 MCRR/MRRC", + [ESR_ELx_EC_BTI] = "BTI", [ESR_ELx_EC_ILL] = "PSTATE.IL", [ESR_ELx_EC_SVC32] = "SVC (AArch32)", [ESR_ELx_EC_HVC32] = "HVC (AArch32)", @@ -906,17 +917,13 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr) asmlinkage void do_serror(struct pt_regs *regs, unsigned int esr) { - const bool was_in_nmi = in_nmi(); - - if (!was_in_nmi) - nmi_enter(); + nmi_enter(); /* non-RAS errors are not containable */ if (!arm64_is_ras_serror(esr) || arm64_is_fatal_ras_serror(regs, esr)) arm64_serror_panic(regs, esr); - if (!was_in_nmi) - nmi_exit(); + nmi_exit(); } asmlinkage void enter_from_user_mode(void) @@ -1047,11 +1054,11 @@ int __init early_brk64(unsigned long addr, unsigned int esr, return bug_handler(regs, esr) != DBG_HOOK_HANDLED; } -/* This registration must happen early, before debug_traps_init(). */ void __init trap_init(void) { register_kernel_break_hook(&bug_break_hook); #ifdef CONFIG_KASAN_SW_TAGS register_kernel_break_hook(&kasan_break_hook); #endif + debug_traps_init(); } diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 033a48f30dbb..d51a898fd60f 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -33,20 +33,14 @@ extern char vdso_start[], vdso_end[]; extern char vdso32_start[], vdso32_end[]; #endif /* CONFIG_COMPAT_VDSO */ -/* vdso_lookup arch_index */ -enum arch_vdso_type { - ARM64_VDSO = 0, +enum vdso_abi { + VDSO_ABI_AA64, #ifdef CONFIG_COMPAT_VDSO - ARM64_VDSO32 = 1, + VDSO_ABI_AA32, #endif /* CONFIG_COMPAT_VDSO */ }; -#ifdef CONFIG_COMPAT_VDSO -#define VDSO_TYPES (ARM64_VDSO32 + 1) -#else -#define VDSO_TYPES (ARM64_VDSO + 1) -#endif /* CONFIG_COMPAT_VDSO */ -struct __vdso_abi { +struct vdso_abi_info { const char *name; const char *vdso_code_start; const char *vdso_code_end; @@ -57,14 +51,14 @@ struct __vdso_abi { struct vm_special_mapping *cm; }; -static struct __vdso_abi vdso_lookup[VDSO_TYPES] __ro_after_init = { - { +static struct vdso_abi_info vdso_info[] __ro_after_init = { + [VDSO_ABI_AA64] = { .name = "vdso", .vdso_code_start = vdso_start, .vdso_code_end = vdso_end, }, #ifdef CONFIG_COMPAT_VDSO - { + [VDSO_ABI_AA32] = { .name = "vdso32", .vdso_code_start = vdso32_start, .vdso_code_end = vdso32_end, @@ -81,13 +75,13 @@ static union { } vdso_data_store __page_aligned_data; struct vdso_data *vdso_data = vdso_data_store.data; -static int __vdso_remap(enum arch_vdso_type arch_index, +static int __vdso_remap(enum vdso_abi abi, const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { unsigned long new_size = new_vma->vm_end - new_vma->vm_start; - unsigned long vdso_size = vdso_lookup[arch_index].vdso_code_end - - vdso_lookup[arch_index].vdso_code_start; + unsigned long vdso_size = vdso_info[abi].vdso_code_end - + vdso_info[abi].vdso_code_start; if (vdso_size != new_size) return -EINVAL; @@ -97,24 +91,24 @@ static int __vdso_remap(enum arch_vdso_type arch_index, return 0; } -static int __vdso_init(enum arch_vdso_type arch_index) +static int __vdso_init(enum vdso_abi abi) { int i; struct page **vdso_pagelist; unsigned long pfn; - if (memcmp(vdso_lookup[arch_index].vdso_code_start, "\177ELF", 4)) { + if (memcmp(vdso_info[abi].vdso_code_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); return -EINVAL; } - vdso_lookup[arch_index].vdso_pages = ( - vdso_lookup[arch_index].vdso_code_end - - vdso_lookup[arch_index].vdso_code_start) >> + vdso_info[abi].vdso_pages = ( + vdso_info[abi].vdso_code_end - + vdso_info[abi].vdso_code_start) >> PAGE_SHIFT; /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kcalloc(vdso_lookup[arch_index].vdso_pages + 1, + vdso_pagelist = kcalloc(vdso_info[abi].vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); if (vdso_pagelist == NULL) @@ -125,26 +119,27 @@ static int __vdso_init(enum arch_vdso_type arch_index) /* Grab the vDSO code pages. */ - pfn = sym_to_pfn(vdso_lookup[arch_index].vdso_code_start); + pfn = sym_to_pfn(vdso_info[abi].vdso_code_start); - for (i = 0; i < vdso_lookup[arch_index].vdso_pages; i++) + for (i = 0; i < vdso_info[abi].vdso_pages; i++) vdso_pagelist[i + 1] = pfn_to_page(pfn + i); - vdso_lookup[arch_index].dm->pages = &vdso_pagelist[0]; - vdso_lookup[arch_index].cm->pages = &vdso_pagelist[1]; + vdso_info[abi].dm->pages = &vdso_pagelist[0]; + vdso_info[abi].cm->pages = &vdso_pagelist[1]; return 0; } -static int __setup_additional_pages(enum arch_vdso_type arch_index, +static int __setup_additional_pages(enum vdso_abi abi, struct mm_struct *mm, struct linux_binprm *bprm, int uses_interp) { unsigned long vdso_base, vdso_text_len, vdso_mapping_len; + unsigned long gp_flags = 0; void *ret; - vdso_text_len = vdso_lookup[arch_index].vdso_pages << PAGE_SHIFT; + vdso_text_len = vdso_info[abi].vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ vdso_mapping_len = vdso_text_len + PAGE_SIZE; @@ -156,16 +151,19 @@ static int __setup_additional_pages(enum arch_vdso_type arch_index, ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, VM_READ|VM_MAYREAD, - vdso_lookup[arch_index].dm); + vdso_info[abi].dm); if (IS_ERR(ret)) goto up_fail; + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) && system_supports_bti()) + gp_flags = VM_ARM64_BTI; + vdso_base += PAGE_SIZE; mm->context.vdso = (void *)vdso_base; ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| + VM_READ|VM_EXEC|gp_flags| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_lookup[arch_index].cm); + vdso_info[abi].cm); if (IS_ERR(ret)) goto up_fail; @@ -184,46 +182,42 @@ up_fail: static int aarch32_vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - return __vdso_remap(ARM64_VDSO32, sm, new_vma); + return __vdso_remap(VDSO_ABI_AA32, sm, new_vma); } #endif /* CONFIG_COMPAT_VDSO */ -/* - * aarch32_vdso_pages: - * 0 - kuser helpers - * 1 - sigreturn code - * or (CONFIG_COMPAT_VDSO): - * 0 - kuser helpers - * 1 - vdso data - * 2 - vdso code - */ -#define C_VECTORS 0 +enum aarch32_map { + AA32_MAP_VECTORS, /* kuser helpers */ #ifdef CONFIG_COMPAT_VDSO -#define C_VVAR 1 -#define C_VDSO 2 -#define C_PAGES (C_VDSO + 1) + AA32_MAP_VVAR, + AA32_MAP_VDSO, #else -#define C_SIGPAGE 1 -#define C_PAGES (C_SIGPAGE + 1) -#endif /* CONFIG_COMPAT_VDSO */ -static struct page *aarch32_vdso_pages[C_PAGES] __ro_after_init; -static struct vm_special_mapping aarch32_vdso_spec[C_PAGES] = { - { + AA32_MAP_SIGPAGE +#endif +}; + +static struct page *aarch32_vectors_page __ro_after_init; +#ifndef CONFIG_COMPAT_VDSO +static struct page *aarch32_sig_page __ro_after_init; +#endif + +static struct vm_special_mapping aarch32_vdso_maps[] = { + [AA32_MAP_VECTORS] = { .name = "[vectors]", /* ABI */ - .pages = &aarch32_vdso_pages[C_VECTORS], + .pages = &aarch32_vectors_page, }, #ifdef CONFIG_COMPAT_VDSO - { + [AA32_MAP_VVAR] = { .name = "[vvar]", }, - { + [AA32_MAP_VDSO] = { .name = "[vdso]", .mremap = aarch32_vdso_mremap, }, #else - { + [AA32_MAP_SIGPAGE] = { .name = "[sigpage]", /* ABI */ - .pages = &aarch32_vdso_pages[C_SIGPAGE], + .pages = &aarch32_sig_page, }, #endif /* CONFIG_COMPAT_VDSO */ }; @@ -243,8 +237,8 @@ static int aarch32_alloc_kuser_vdso_page(void) memcpy((void *)(vdso_page + 0x1000 - kuser_sz), __kuser_helper_start, kuser_sz); - aarch32_vdso_pages[C_VECTORS] = virt_to_page(vdso_page); - flush_dcache_page(aarch32_vdso_pages[C_VECTORS]); + aarch32_vectors_page = virt_to_page(vdso_page); + flush_dcache_page(aarch32_vectors_page); return 0; } @@ -253,10 +247,10 @@ static int __aarch32_alloc_vdso_pages(void) { int ret; - vdso_lookup[ARM64_VDSO32].dm = &aarch32_vdso_spec[C_VVAR]; - vdso_lookup[ARM64_VDSO32].cm = &aarch32_vdso_spec[C_VDSO]; + vdso_info[VDSO_ABI_AA32].dm = &aarch32_vdso_maps[AA32_MAP_VVAR]; + vdso_info[VDSO_ABI_AA32].cm = &aarch32_vdso_maps[AA32_MAP_VDSO]; - ret = __vdso_init(ARM64_VDSO32); + ret = __vdso_init(VDSO_ABI_AA32); if (ret) return ret; @@ -275,8 +269,8 @@ static int __aarch32_alloc_vdso_pages(void) return -ENOMEM; memcpy((void *)sigpage, __aarch32_sigret_code_start, sigret_sz); - aarch32_vdso_pages[C_SIGPAGE] = virt_to_page(sigpage); - flush_dcache_page(aarch32_vdso_pages[C_SIGPAGE]); + aarch32_sig_page = virt_to_page(sigpage); + flush_dcache_page(aarch32_sig_page); ret = aarch32_alloc_kuser_vdso_page(); if (ret) @@ -306,7 +300,7 @@ static int aarch32_kuser_helpers_setup(struct mm_struct *mm) ret = _install_special_mapping(mm, AARCH32_VECTORS_BASE, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC, - &aarch32_vdso_spec[C_VECTORS]); + &aarch32_vdso_maps[AA32_MAP_VECTORS]); return PTR_ERR_OR_ZERO(ret); } @@ -330,7 +324,7 @@ static int aarch32_sigreturn_setup(struct mm_struct *mm) ret = _install_special_mapping(mm, addr, PAGE_SIZE, VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, - &aarch32_vdso_spec[C_SIGPAGE]); + &aarch32_vdso_maps[AA32_MAP_SIGPAGE]); if (IS_ERR(ret)) goto out; @@ -354,7 +348,7 @@ int aarch32_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) goto out; #ifdef CONFIG_COMPAT_VDSO - ret = __setup_additional_pages(ARM64_VDSO32, + ret = __setup_additional_pages(VDSO_ABI_AA32, mm, bprm, uses_interp); @@ -371,22 +365,19 @@ out: static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) { - return __vdso_remap(ARM64_VDSO, sm, new_vma); + return __vdso_remap(VDSO_ABI_AA64, sm, new_vma); } -/* - * aarch64_vdso_pages: - * 0 - vvar - * 1 - vdso - */ -#define A_VVAR 0 -#define A_VDSO 1 -#define A_PAGES (A_VDSO + 1) -static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = { - { +enum aarch64_map { + AA64_MAP_VVAR, + AA64_MAP_VDSO, +}; + +static struct vm_special_mapping aarch64_vdso_maps[] __ro_after_init = { + [AA64_MAP_VVAR] = { .name = "[vvar]", }, - { + [AA64_MAP_VDSO] = { .name = "[vdso]", .mremap = vdso_mremap, }, @@ -394,10 +385,10 @@ static struct vm_special_mapping vdso_spec[A_PAGES] __ro_after_init = { static int __init vdso_init(void) { - vdso_lookup[ARM64_VDSO].dm = &vdso_spec[A_VVAR]; - vdso_lookup[ARM64_VDSO].cm = &vdso_spec[A_VDSO]; + vdso_info[VDSO_ABI_AA64].dm = &aarch64_vdso_maps[AA64_MAP_VVAR]; + vdso_info[VDSO_ABI_AA64].cm = &aarch64_vdso_maps[AA64_MAP_VDSO]; - return __vdso_init(ARM64_VDSO); + return __vdso_init(VDSO_ABI_AA64); } arch_initcall(vdso_init); @@ -410,7 +401,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, if (down_write_killable(&mm->mmap_sem)) return -EINTR; - ret = __setup_additional_pages(ARM64_VDSO, + ret = __setup_additional_pages(VDSO_ABI_AA64, mm, bprm, uses_interp); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 3862cad2410c..556d424c6f52 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -17,15 +17,19 @@ obj-vdso := vgettimeofday.o note.o sigreturn.o targets := $(obj-vdso) vdso.so vdso.so.dbg obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) +btildflags-$(CONFIG_ARM64_BTI_KERNEL) += -z force-bti + +# -Bsymbolic has been added for consistency with arm, the compat vDSO and +# potential future proofing if we end up with internal calls to the exported +# routines, as x86 does (see 6f121e548f83 ("x86, vdso: Reimplement vdso.so +# preparation in build-time C")). ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv \ - --build-id -n -T + -Bsymbolic --eh-frame-hdr --build-id -n $(btildflags-y) -T ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18 ccflags-y += -DDISABLE_BRANCH_PROFILING -VDSO_LDFLAGS := -Bsymbolic - -CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os +CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) KBUILD_CFLAGS += $(DISABLE_LTO) KASAN_SANITIZE := n UBSAN_SANITIZE := n diff --git a/arch/arm64/kernel/vdso/note.S b/arch/arm64/kernel/vdso/note.S index 0ce6ec75a525..3d4e82290c80 100644 --- a/arch/arm64/kernel/vdso/note.S +++ b/arch/arm64/kernel/vdso/note.S @@ -12,9 +12,12 @@ #include <linux/version.h> #include <linux/elfnote.h> #include <linux/build-salt.h> +#include <asm/assembler.h> ELFNOTE_START(Linux, 0, "a") .long LINUX_VERSION_CODE ELFNOTE_END BUILD_SALT + +emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso/sigreturn.S b/arch/arm64/kernel/vdso/sigreturn.S index 12324863d5c2..620a3ef837b7 100644 --- a/arch/arm64/kernel/vdso/sigreturn.S +++ b/arch/arm64/kernel/vdso/sigreturn.S @@ -1,7 +1,11 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Sigreturn trampoline for returning from a signal when the SA_RESTORER - * flag is not set. + * flag is not set. It serves primarily as a hall of shame for crappy + * unwinders and features an exciting but mysterious NOP instruction. + * + * It's also fragile as hell, so please think twice before changing anything + * in here. * * Copyright (C) 2012 ARM Limited * @@ -9,18 +13,54 @@ */ #include <linux/linkage.h> +#include <asm/assembler.h> #include <asm/unistd.h> .text - nop -SYM_FUNC_START(__kernel_rt_sigreturn) +/* Ensure that the mysterious NOP can be associated with a function. */ .cfi_startproc + +/* + * .cfi_signal_frame causes the corresponding Frame Description Entry in the + * .eh_frame section to be annotated as a signal frame. This allows DWARF + * unwinders (e.g. libstdc++) to implement _Unwind_GetIPInfo(), which permits + * unwinding out of the signal trampoline without the need for the mysterious + * NOP. + */ .cfi_signal_frame - .cfi_def_cfa x29, 0 - .cfi_offset x29, 0 * 8 - .cfi_offset x30, 1 * 8 + +/* + * Tell the unwinder where to locate the frame record linking back to the + * interrupted context. We don't provide unwind info for registers other + * than the frame pointer and the link register here; in practice, this + * is sufficient for unwinding in C/C++ based runtimes and the values in + * the sigcontext may have been modified by this point anyway. Debuggers + * already have baked-in strategies for attempting to unwind out of signals. + */ + .cfi_def_cfa x29, 0 + .cfi_offset x29, 0 * 8 + .cfi_offset x30, 1 * 8 + +/* + * This mysterious NOP is required for some unwinders (e.g. libc++) that + * unconditionally subtract one from the result of _Unwind_GetIP() in order to + * identify the calling function. + * Hack borrowed from arch/powerpc/kernel/vdso64/sigtramp.S. + */ + nop // Mysterious NOP + +/* + * GDB relies on being able to identify the sigreturn instruction sequence to + * unwind from signal handlers. We cannot, therefore, use SYM_FUNC_START() + * here, as it will emit a BTI C instruction and break the unwinder. Thankfully, + * this function is only ever called from a RET and so omitting the landing pad + * is perfectly fine. + */ +SYM_CODE_START(__kernel_rt_sigreturn) mov x8, #__NR_rt_sigreturn svc #0 .cfi_endproc -SYM_FUNC_END(__kernel_rt_sigreturn) +SYM_CODE_END(__kernel_rt_sigreturn) + +emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso/vdso.S b/arch/arm64/kernel/vdso/vdso.S index d1414fee5274..c4b1990bf2be 100644 --- a/arch/arm64/kernel/vdso/vdso.S +++ b/arch/arm64/kernel/vdso/vdso.S @@ -8,6 +8,7 @@ #include <linux/init.h> #include <linux/linkage.h> #include <linux/const.h> +#include <asm/assembler.h> #include <asm/page.h> .globl vdso_start, vdso_end @@ -19,3 +20,5 @@ vdso_start: vdso_end: .previous + +emit_aarch64_feature_1_and diff --git a/arch/arm64/kernel/vdso32/sigreturn.S b/arch/arm64/kernel/vdso32/sigreturn.S index 620524969696..b0091064c3d6 100644 --- a/arch/arm64/kernel/vdso32/sigreturn.S +++ b/arch/arm64/kernel/vdso32/sigreturn.S @@ -3,6 +3,9 @@ * This file provides both A32 and T32 versions, in accordance with the * arm sigreturn code. * + * Please read the comments in arch/arm64/kernel/vdso/sigreturn.S to + * understand some of the craziness in here. + * * Copyright (C) 2018 ARM Limited */ @@ -17,39 +20,39 @@ .save {r0-r15} .pad #COMPAT_SIGFRAME_REGS_OFFSET nop -SYM_FUNC_START(__kernel_sigreturn_arm) +SYM_CODE_START(__kernel_sigreturn_arm) mov r7, #__NR_compat_sigreturn svc #0 .fnend -SYM_FUNC_END(__kernel_sigreturn_arm) +SYM_CODE_END(__kernel_sigreturn_arm) .fnstart .save {r0-r15} .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET nop -SYM_FUNC_START(__kernel_rt_sigreturn_arm) +SYM_CODE_START(__kernel_rt_sigreturn_arm) mov r7, #__NR_compat_rt_sigreturn svc #0 .fnend -SYM_FUNC_END(__kernel_rt_sigreturn_arm) +SYM_CODE_END(__kernel_rt_sigreturn_arm) .thumb .fnstart .save {r0-r15} .pad #COMPAT_SIGFRAME_REGS_OFFSET nop -SYM_FUNC_START(__kernel_sigreturn_thumb) +SYM_CODE_START(__kernel_sigreturn_thumb) mov r7, #__NR_compat_sigreturn svc #0 .fnend -SYM_FUNC_END(__kernel_sigreturn_thumb) +SYM_CODE_END(__kernel_sigreturn_thumb) .fnstart .save {r0-r15} .pad #COMPAT_RT_SIGFRAME_REGS_OFFSET nop -SYM_FUNC_START(__kernel_rt_sigreturn_thumb) +SYM_CODE_START(__kernel_rt_sigreturn_thumb) mov r7, #__NR_compat_rt_sigreturn svc #0 .fnend -SYM_FUNC_END(__kernel_rt_sigreturn_thumb) +SYM_CODE_END(__kernel_rt_sigreturn_thumb) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 497f9675071d..3be632177631 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -17,10 +17,6 @@ #include "image.h" -/* .exit.text needed in case of alternative patching */ -#define ARM_EXIT_KEEP(x) x -#define ARM_EXIT_DISCARD(x) - OUTPUT_ARCH(aarch64) ENTRY(_text) @@ -72,8 +68,8 @@ jiffies = jiffies_64; /* * The size of the PE/COFF section that covers the kernel image, which - * runs from stext to _edata, must be a round multiple of the PE/COFF - * FileAlignment, which we set to its minimum value of 0x200. 'stext' + * runs from _stext to _edata, must be a round multiple of the PE/COFF + * FileAlignment, which we set to its minimum value of 0x200. '_stext' * itself is 4 KB aligned, so padding out _edata to a 0x200 aligned * boundary should be sufficient. */ @@ -95,8 +91,6 @@ SECTIONS * order of matching. */ /DISCARD/ : { - ARM_EXIT_DISCARD(EXIT_TEXT) - ARM_EXIT_DISCARD(EXIT_DATA) EXIT_CALL *(.discard) *(.discard.*) @@ -139,6 +133,7 @@ SECTIONS idmap_pg_dir = .; . += IDMAP_DIR_SIZE; + idmap_pg_end = .; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 tramp_pg_dir = .; @@ -161,7 +156,7 @@ SECTIONS __exittext_begin = .; .exit.text : { - ARM_EXIT_KEEP(EXIT_TEXT) + EXIT_TEXT } __exittext_end = .; @@ -175,7 +170,7 @@ SECTIONS *(.altinstr_replacement) } - . = ALIGN(PAGE_SIZE); + . = ALIGN(SEGMENT_ALIGN); __inittext_end = .; __initdata_begin = .; @@ -188,7 +183,7 @@ SECTIONS *(.init.rodata.* .init.bss) /* from the EFI stub */ } .exit.data : { - ARM_EXIT_KEEP(EXIT_DATA) + EXIT_DATA } PERCPU_SECTION(L1_CACHE_BYTES) @@ -246,6 +241,7 @@ SECTIONS . += INIT_DIR_SIZE; init_pg_end = .; + . = ALIGN(SEGMENT_ALIGN); __pecoff_data_size = ABSOLUTE(. - __initdata_begin); _end = .; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 449386d76441..f1c1f981482c 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -3,7 +3,6 @@ # KVM configuration # -source "virt/kvm/Kconfig" source "virt/lib/Kconfig" menuconfig VIRTUALIZATION @@ -18,7 +17,7 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION -config KVM +menuconfig KVM bool "Kernel-based Virtual Machine (KVM) support" depends on OF # for TASKSTATS/TASK_DELAY_ACCT: @@ -28,13 +27,11 @@ config KVM select HAVE_KVM_CPU_RELAX_INTERCEPT select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_MMIO - select KVM_ARM_HOST select KVM_GENERIC_DIRTYLOG_READ_PROTECT select SRCU select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD - select KVM_ARM_PMU if HW_PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_IRQCHIP select HAVE_KVM_IRQ_ROUTING @@ -45,23 +42,24 @@ config KVM select TASK_DELAY_ACCT ---help--- Support hosting virtualized guest machines. - We don't support KVM with 16K page tables yet, due to the multiple - levels of fake page tables. If unsure, say N. -config KVM_ARM_HOST - bool - ---help--- - Provides host support for ARM processors. +if KVM + +source "virt/kvm/Kconfig" config KVM_ARM_PMU - bool + bool "Virtual Performance Monitoring Unit (PMU) support" + depends on HW_PERF_EVENTS + default y ---help--- Adds support for a virtual Performance Monitoring Unit (PMU) in virtual machines. config KVM_INDIRECT_VECTORS - def_bool KVM && (HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS) + def_bool HARDEN_BRANCH_PREDICTOR || HARDEN_EL2_VECTORS + +endif # KVM endif # VIRTUALIZATION diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 5ffbdc39e780..8d3d9513cbfe 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -3,37 +3,25 @@ # Makefile for Kernel-based Virtual Machine module # -ccflags-y += -I $(srctree)/$(src) -I $(srctree)/virt/kvm/arm/vgic +ccflags-y += -I $(srctree)/$(src) KVM=../../../virt/kvm -obj-$(CONFIG_KVM_ARM_HOST) += kvm.o -obj-$(CONFIG_KVM_ARM_HOST) += hyp/ +obj-$(CONFIG_KVM) += kvm.o +obj-$(CONFIG_KVM) += hyp/ -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o $(KVM)/vfio.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arm.o $(KVM)/arm/mmu.o $(KVM)/arm/mmio.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/psci.o $(KVM)/arm/perf.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hypercalls.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/pvtime.o +kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/eventfd.o \ + $(KVM)/vfio.o $(KVM)/irqchip.o \ + arm.o mmu.o mmio.o psci.o perf.o hypercalls.o pvtime.o \ + inject_fault.o regmap.o va_layout.o hyp.o hyp-init.o handle_exit.o \ + guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o \ + vgic-sys-reg-v3.o fpsimd.o pmu.o \ + aarch32.o arch_timer.o \ + vgic/vgic.o vgic/vgic-init.o \ + vgic/vgic-irqfd.o vgic/vgic-v2.o \ + vgic/vgic-v3.o vgic/vgic-v4.o \ + vgic/vgic-mmio.o vgic/vgic-mmio-v2.o \ + vgic/vgic-mmio-v3.o vgic/vgic-kvm-device.o \ + vgic/vgic-its.o vgic/vgic-debug.o -kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o va_layout.o -kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o -kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o -kvm-$(CONFIG_KVM_ARM_HOST) += vgic-sys-reg-v3.o fpsimd.o pmu.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o - -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-debug.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o -kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o -kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o +kvm-$(CONFIG_KVM_ARM_PMU) += pmu-emul.o diff --git a/arch/arm64/kvm/aarch32.c b/arch/arm64/kvm/aarch32.c new file mode 100644 index 000000000000..0a356aa91aa1 --- /dev/null +++ b/arch/arm64/kvm/aarch32.c @@ -0,0 +1,204 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/bits.h> +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +#define DFSR_FSC_EXTABT_LPAE 0x10 +#define DFSR_FSC_EXTABT_nLPAE 0x08 +#define DFSR_LPAE BIT(9) + +/* + * Table taken from ARMv8 ARM DDI0487B-B, table G1-10. + */ +static const u8 return_offsets[8][2] = { + [0] = { 0, 0 }, /* Reset, unused */ + [1] = { 4, 2 }, /* Undefined */ + [2] = { 0, 0 }, /* SVC, unused */ + [3] = { 4, 4 }, /* Prefetch abort */ + [4] = { 8, 8 }, /* Data abort */ + [5] = { 0, 0 }, /* HVC, unused */ + [6] = { 4, 4 }, /* IRQ, unused */ + [7] = { 4, 4 }, /* FIQ, unused */ +}; + +/* + * When an exception is taken, most CPSR fields are left unchanged in the + * handler. However, some are explicitly overridden (e.g. M[4:0]). + * + * The SPSR/SPSR_ELx layouts differ, and the below is intended to work with + * either format. Note: SPSR.J bit doesn't exist in SPSR_ELx, but this bit was + * obsoleted by the ARMv7 virtualization extensions and is RES0. + * + * For the SPSR layout seen from AArch32, see: + * - ARM DDI 0406C.d, page B1-1148 + * - ARM DDI 0487E.a, page G8-6264 + * + * For the SPSR_ELx layout for AArch32 seen from AArch64, see: + * - ARM DDI 0487E.a, page C5-426 + * + * Here we manipulate the fields in order of the AArch32 SPSR_ELx layout, from + * MSB to LSB. + */ +static unsigned long get_except32_cpsr(struct kvm_vcpu *vcpu, u32 mode) +{ + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + unsigned long old, new; + + old = *vcpu_cpsr(vcpu); + new = 0; + + new |= (old & PSR_AA32_N_BIT); + new |= (old & PSR_AA32_Z_BIT); + new |= (old & PSR_AA32_C_BIT); + new |= (old & PSR_AA32_V_BIT); + new |= (old & PSR_AA32_Q_BIT); + + // CPSR.IT[7:0] are set to zero upon any exception + // See ARM DDI 0487E.a, section G1.12.3 + // See ARM DDI 0406C.d, section B1.8.3 + + new |= (old & PSR_AA32_DIT_BIT); + + // CPSR.SSBS is set to SCTLR.DSSBS upon any exception + // See ARM DDI 0487E.a, page G8-6244 + if (sctlr & BIT(31)) + new |= PSR_AA32_SSBS_BIT; + + // CPSR.PAN is unchanged unless SCTLR.SPAN == 0b0 + // SCTLR.SPAN is RES1 when ARMv8.1-PAN is not implemented + // See ARM DDI 0487E.a, page G8-6246 + new |= (old & PSR_AA32_PAN_BIT); + if (!(sctlr & BIT(23))) + new |= PSR_AA32_PAN_BIT; + + // SS does not exist in AArch32, so ignore + + // CPSR.IL is set to zero upon any exception + // See ARM DDI 0487E.a, page G1-5527 + + new |= (old & PSR_AA32_GE_MASK); + + // CPSR.IT[7:0] are set to zero upon any exception + // See prior comment above + + // CPSR.E is set to SCTLR.EE upon any exception + // See ARM DDI 0487E.a, page G8-6245 + // See ARM DDI 0406C.d, page B4-1701 + if (sctlr & BIT(25)) + new |= PSR_AA32_E_BIT; + + // CPSR.A is unchanged upon an exception to Undefined, Supervisor + // CPSR.A is set upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_A_BIT); + if (mode != PSR_AA32_MODE_UND && mode != PSR_AA32_MODE_SVC) + new |= PSR_AA32_A_BIT; + + // CPSR.I is set upon any exception + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= PSR_AA32_I_BIT; + + // CPSR.F is set upon an exception to FIQ + // CPSR.F is unchanged upon an exception to other modes + // See ARM DDI 0487E.a, pages G1-5515 to G1-5516 + // See ARM DDI 0406C.d, page B1-1182 + new |= (old & PSR_AA32_F_BIT); + if (mode == PSR_AA32_MODE_FIQ) + new |= PSR_AA32_F_BIT; + + // CPSR.T is set to SCTLR.TE upon any exception + // See ARM DDI 0487E.a, page G8-5514 + // See ARM DDI 0406C.d, page B1-1181 + if (sctlr & BIT(30)) + new |= PSR_AA32_T_BIT; + + new |= mode; + + return new; +} + +static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset) +{ + unsigned long spsr = *vcpu_cpsr(vcpu); + bool is_thumb = (spsr & PSR_AA32_T_BIT); + u32 return_offset = return_offsets[vect_offset >> 2][is_thumb]; + u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR); + + *vcpu_cpsr(vcpu) = get_except32_cpsr(vcpu, mode); + + /* Note: These now point to the banked copies */ + vcpu_write_spsr(vcpu, host_spsr_to_spsr32(spsr)); + *vcpu_reg32(vcpu, 14) = *vcpu_pc(vcpu) + return_offset; + + /* Branch to exception vector */ + if (sctlr & (1 << 13)) + vect_offset += 0xffff0000; + else /* always have security exceptions */ + vect_offset += vcpu_cp15(vcpu, c12_VBAR); + + *vcpu_pc(vcpu) = vect_offset; +} + +void kvm_inject_undef32(struct kvm_vcpu *vcpu) +{ + prepare_fault32(vcpu, PSR_AA32_MODE_UND, 4); +} + +/* + * Modelled after TakeDataAbortException() and TakePrefetchAbortException + * pseudocode. + */ +static void inject_abt32(struct kvm_vcpu *vcpu, bool is_pabt, + unsigned long addr) +{ + u32 vect_offset; + u32 *far, *fsr; + bool is_lpae; + + if (is_pabt) { + vect_offset = 12; + far = &vcpu_cp15(vcpu, c6_IFAR); + fsr = &vcpu_cp15(vcpu, c5_IFSR); + } else { /* !iabt */ + vect_offset = 16; + far = &vcpu_cp15(vcpu, c6_DFAR); + fsr = &vcpu_cp15(vcpu, c5_DFSR); + } + + prepare_fault32(vcpu, PSR_AA32_MODE_ABT, vect_offset); + + *far = addr; + + /* Give the guest an IMPLEMENTATION DEFINED exception */ + is_lpae = (vcpu_cp15(vcpu, c2_TTBCR) >> 31); + if (is_lpae) { + *fsr = DFSR_LPAE | DFSR_FSC_EXTABT_LPAE; + } else { + /* no need to shuffle FS[4] into DFSR[10] as its 0 */ + *fsr = DFSR_FSC_EXTABT_nLPAE; + } +} + +void kvm_inject_dabt32(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt32(vcpu, false, addr); +} + +void kvm_inject_pabt32(struct kvm_vcpu *vcpu, unsigned long addr) +{ + inject_abt32(vcpu, true, addr); +} diff --git a/arch/arm64/kvm/arch_timer.c b/arch/arm64/kvm/arch_timer.c new file mode 100644 index 000000000000..a1fe0ea3254e --- /dev/null +++ b/arch/arm64/kvm/arch_timer.c @@ -0,0 +1,1171 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/uaccess.h> + +#include <clocksource/arm_arch_timer.h> +#include <asm/arch_timer.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +#include <kvm/arm_vgic.h> +#include <kvm/arm_arch_timer.h> + +#include "trace.h" + +static struct timecounter *timecounter; +static unsigned int host_vtimer_irq; +static unsigned int host_ptimer_irq; +static u32 host_vtimer_irq_flags; +static u32 host_ptimer_irq_flags; + +static DEFINE_STATIC_KEY_FALSE(has_gic_active_state); + +static const struct kvm_irq_level default_ptimer_irq = { + .irq = 30, + .level = 1, +}; + +static const struct kvm_irq_level default_vtimer_irq = { + .irq = 27, + .level = 1, +}; + +static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx); +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, + struct arch_timer_context *timer_ctx); +static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); +static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg, + u64 val); +static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg); + +u64 kvm_phys_timer_read(void) +{ + return timecounter->cc->read(timecounter->cc); +} + +static void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map) +{ + if (has_vhe()) { + map->direct_vtimer = vcpu_vtimer(vcpu); + map->direct_ptimer = vcpu_ptimer(vcpu); + map->emul_ptimer = NULL; + } else { + map->direct_vtimer = vcpu_vtimer(vcpu); + map->direct_ptimer = NULL; + map->emul_ptimer = vcpu_ptimer(vcpu); + } + + trace_kvm_get_timer_map(vcpu->vcpu_id, map); +} + +static inline bool userspace_irqchip(struct kvm *kvm) +{ + return static_branch_unlikely(&userspace_irqchip_in_use) && + unlikely(!irqchip_in_kernel(kvm)); +} + +static void soft_timer_start(struct hrtimer *hrt, u64 ns) +{ + hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), + HRTIMER_MODE_ABS_HARD); +} + +static void soft_timer_cancel(struct hrtimer *hrt) +{ + hrtimer_cancel(hrt); +} + +static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) +{ + struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; + struct arch_timer_context *ctx; + struct timer_map map; + + /* + * We may see a timer interrupt after vcpu_put() has been called which + * sets the CPU's vcpu pointer to NULL, because even though the timer + * has been disabled in timer_save_state(), the hardware interrupt + * signal may not have been retired from the interrupt controller yet. + */ + if (!vcpu) + return IRQ_HANDLED; + + get_timer_map(vcpu, &map); + + if (irq == host_vtimer_irq) + ctx = map.direct_vtimer; + else + ctx = map.direct_ptimer; + + if (kvm_timer_should_fire(ctx)) + kvm_timer_update_irq(vcpu, true, ctx); + + if (userspace_irqchip(vcpu->kvm) && + !static_branch_unlikely(&has_gic_active_state)) + disable_percpu_irq(host_vtimer_irq); + + return IRQ_HANDLED; +} + +static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) +{ + u64 cval, now; + + cval = timer_ctx->cnt_cval; + now = kvm_phys_timer_read() - timer_ctx->cntvoff; + + if (now < cval) { + u64 ns; + + ns = cyclecounter_cyc2ns(timecounter->cc, + cval - now, + timecounter->mask, + &timecounter->frac); + return ns; + } + + return 0; +} + +static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx) +{ + WARN_ON(timer_ctx && timer_ctx->loaded); + return timer_ctx && + !(timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_IT_MASK) && + (timer_ctx->cnt_ctl & ARCH_TIMER_CTRL_ENABLE); +} + +/* + * Returns the earliest expiration time in ns among guest timers. + * Note that it will return 0 if none of timers can fire. + */ +static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu) +{ + u64 min_delta = ULLONG_MAX; + int i; + + for (i = 0; i < NR_KVM_TIMERS; i++) { + struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i]; + + WARN(ctx->loaded, "timer %d loaded\n", i); + if (kvm_timer_irq_can_fire(ctx)) + min_delta = min(min_delta, kvm_timer_compute_delta(ctx)); + } + + /* If none of timers can fire, then return 0 */ + if (min_delta == ULLONG_MAX) + return 0; + + return min_delta; +} + +static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt) +{ + struct arch_timer_cpu *timer; + struct kvm_vcpu *vcpu; + u64 ns; + + timer = container_of(hrt, struct arch_timer_cpu, bg_timer); + vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu); + + /* + * Check that the timer has really expired from the guest's + * PoV (NTP on the host may have forced it to expire + * early). If we should have slept longer, restart it. + */ + ns = kvm_timer_earliest_exp(vcpu); + if (unlikely(ns)) { + hrtimer_forward_now(hrt, ns_to_ktime(ns)); + return HRTIMER_RESTART; + } + + kvm_vcpu_wake_up(vcpu); + return HRTIMER_NORESTART; +} + +static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt) +{ + struct arch_timer_context *ctx; + struct kvm_vcpu *vcpu; + u64 ns; + + ctx = container_of(hrt, struct arch_timer_context, hrtimer); + vcpu = ctx->vcpu; + + trace_kvm_timer_hrtimer_expire(ctx); + + /* + * Check that the timer has really expired from the guest's + * PoV (NTP on the host may have forced it to expire + * early). If not ready, schedule for a later time. + */ + ns = kvm_timer_compute_delta(ctx); + if (unlikely(ns)) { + hrtimer_forward_now(hrt, ns_to_ktime(ns)); + return HRTIMER_RESTART; + } + + kvm_timer_update_irq(vcpu, true, ctx); + return HRTIMER_NORESTART; +} + +static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx) +{ + enum kvm_arch_timers index; + u64 cval, now; + + if (!timer_ctx) + return false; + + index = arch_timer_ctx_index(timer_ctx); + + if (timer_ctx->loaded) { + u32 cnt_ctl = 0; + + switch (index) { + case TIMER_VTIMER: + cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); + break; + case TIMER_PTIMER: + cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); + break; + case NR_KVM_TIMERS: + /* GCC is braindead */ + cnt_ctl = 0; + break; + } + + return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) && + (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) && + !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK); + } + + if (!kvm_timer_irq_can_fire(timer_ctx)) + return false; + + cval = timer_ctx->cnt_cval; + now = kvm_phys_timer_read() - timer_ctx->cntvoff; + + return cval <= now; +} + +bool kvm_timer_is_pending(struct kvm_vcpu *vcpu) +{ + struct timer_map map; + + get_timer_map(vcpu, &map); + + return kvm_timer_should_fire(map.direct_vtimer) || + kvm_timer_should_fire(map.direct_ptimer) || + kvm_timer_should_fire(map.emul_ptimer); +} + +/* + * Reflect the timer output level into the kvm_run structure + */ +void kvm_timer_update_run(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct kvm_sync_regs *regs = &vcpu->run->s.regs; + + /* Populate the device bitmap with the timer states */ + regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER | + KVM_ARM_DEV_EL1_PTIMER); + if (kvm_timer_should_fire(vtimer)) + regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER; + if (kvm_timer_should_fire(ptimer)) + regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER; +} + +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, + struct arch_timer_context *timer_ctx) +{ + int ret; + + timer_ctx->irq.level = new_level; + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_ctx->irq.irq, + timer_ctx->irq.level); + + if (!userspace_irqchip(vcpu->kvm)) { + ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + timer_ctx->irq.irq, + timer_ctx->irq.level, + timer_ctx); + WARN_ON(ret); + } +} + +/* Only called for a fully emulated timer */ +static void timer_emulate(struct arch_timer_context *ctx) +{ + bool should_fire = kvm_timer_should_fire(ctx); + + trace_kvm_timer_emulate(ctx, should_fire); + + if (should_fire != ctx->irq.level) { + kvm_timer_update_irq(ctx->vcpu, should_fire, ctx); + return; + } + + /* + * If the timer can fire now, we don't need to have a soft timer + * scheduled for the future. If the timer cannot fire at all, + * then we also don't need a soft timer. + */ + if (!kvm_timer_irq_can_fire(ctx)) { + soft_timer_cancel(&ctx->hrtimer); + return; + } + + soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx)); +} + +static void timer_save_state(struct arch_timer_context *ctx) +{ + struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + enum kvm_arch_timers index = arch_timer_ctx_index(ctx); + unsigned long flags; + + if (!timer->enabled) + return; + + local_irq_save(flags); + + if (!ctx->loaded) + goto out; + + switch (index) { + case TIMER_VTIMER: + ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL); + ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL); + + /* Disable the timer */ + write_sysreg_el0(0, SYS_CNTV_CTL); + isb(); + + break; + case TIMER_PTIMER: + ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL); + ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL); + + /* Disable the timer */ + write_sysreg_el0(0, SYS_CNTP_CTL); + isb(); + + break; + case NR_KVM_TIMERS: + BUG(); + } + + trace_kvm_timer_save_state(ctx); + + ctx->loaded = false; +out: + local_irq_restore(flags); +} + +/* + * Schedule the background timer before calling kvm_vcpu_block, so that this + * thread is removed from its waitqueue and made runnable when there's a timer + * interrupt to handle. + */ +static void kvm_timer_blocking(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct timer_map map; + + get_timer_map(vcpu, &map); + + /* + * If no timers are capable of raising interrupts (disabled or + * masked), then there's no more work for us to do. + */ + if (!kvm_timer_irq_can_fire(map.direct_vtimer) && + !kvm_timer_irq_can_fire(map.direct_ptimer) && + !kvm_timer_irq_can_fire(map.emul_ptimer)) + return; + + /* + * At least one guest time will expire. Schedule a background timer. + * Set the earliest expiration time among the guest timers. + */ + soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu)); +} + +static void kvm_timer_unblocking(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + + soft_timer_cancel(&timer->bg_timer); +} + +static void timer_restore_state(struct arch_timer_context *ctx) +{ + struct arch_timer_cpu *timer = vcpu_timer(ctx->vcpu); + enum kvm_arch_timers index = arch_timer_ctx_index(ctx); + unsigned long flags; + + if (!timer->enabled) + return; + + local_irq_save(flags); + + if (ctx->loaded) + goto out; + + switch (index) { + case TIMER_VTIMER: + write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL); + isb(); + write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL); + break; + case TIMER_PTIMER: + write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL); + isb(); + write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL); + break; + case NR_KVM_TIMERS: + BUG(); + } + + trace_kvm_timer_restore_state(ctx); + + ctx->loaded = true; +out: + local_irq_restore(flags); +} + +static void set_cntvoff(u64 cntvoff) +{ + kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff); +} + +static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active) +{ + int r; + r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active); + WARN_ON(r); +} + +static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx) +{ + struct kvm_vcpu *vcpu = ctx->vcpu; + bool phys_active = false; + + /* + * Update the timer output so that it is likely to match the + * state we're about to restore. If the timer expires between + * this point and the register restoration, we'll take the + * interrupt anyway. + */ + kvm_timer_update_irq(ctx->vcpu, kvm_timer_should_fire(ctx), ctx); + + if (irqchip_in_kernel(vcpu->kvm)) + phys_active = kvm_vgic_map_is_active(vcpu, ctx->irq.irq); + + phys_active |= ctx->irq.level; + + set_timer_irq_phys_active(ctx, phys_active); +} + +static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + /* + * Update the timer output so that it is likely to match the + * state we're about to restore. If the timer expires between + * this point and the register restoration, we'll take the + * interrupt anyway. + */ + kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer); + + /* + * When using a userspace irqchip with the architected timers and a + * host interrupt controller that doesn't support an active state, we + * must still prevent continuously exiting from the guest, and + * therefore mask the physical interrupt by disabling it on the host + * interrupt controller when the virtual level is high, such that the + * guest can make forward progress. Once we detect the output level + * being de-asserted, we unmask the interrupt again so that we exit + * from the guest when the timer fires. + */ + if (vtimer->irq.level) + disable_percpu_irq(host_vtimer_irq); + else + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); +} + +void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct timer_map map; + + if (unlikely(!timer->enabled)) + return; + + get_timer_map(vcpu, &map); + + if (static_branch_likely(&has_gic_active_state)) { + kvm_timer_vcpu_load_gic(map.direct_vtimer); + if (map.direct_ptimer) + kvm_timer_vcpu_load_gic(map.direct_ptimer); + } else { + kvm_timer_vcpu_load_nogic(vcpu); + } + + set_cntvoff(map.direct_vtimer->cntvoff); + + kvm_timer_unblocking(vcpu); + + timer_restore_state(map.direct_vtimer); + if (map.direct_ptimer) + timer_restore_state(map.direct_ptimer); + + if (map.emul_ptimer) + timer_emulate(map.emul_ptimer); +} + +bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + struct kvm_sync_regs *sregs = &vcpu->run->s.regs; + bool vlevel, plevel; + + if (likely(irqchip_in_kernel(vcpu->kvm))) + return false; + + vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER; + plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER; + + return kvm_timer_should_fire(vtimer) != vlevel || + kvm_timer_should_fire(ptimer) != plevel; +} + +void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct timer_map map; + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); + + if (unlikely(!timer->enabled)) + return; + + get_timer_map(vcpu, &map); + + timer_save_state(map.direct_vtimer); + if (map.direct_ptimer) + timer_save_state(map.direct_ptimer); + + /* + * Cancel soft timer emulation, because the only case where we + * need it after a vcpu_put is in the context of a sleeping VCPU, and + * in that case we already factor in the deadline for the physical + * timer when scheduling the bg_timer. + * + * In any case, we re-schedule the hrtimer for the physical timer when + * coming back to the VCPU thread in kvm_timer_vcpu_load(). + */ + if (map.emul_ptimer) + soft_timer_cancel(&map.emul_ptimer->hrtimer); + + if (rcuwait_active(wait)) + kvm_timer_blocking(vcpu); + + /* + * The kernel may decide to run userspace after calling vcpu_put, so + * we reset cntvoff to 0 to ensure a consistent read between user + * accesses to the virtual counter and kernel access to the physical + * counter of non-VHE case. For VHE, the virtual counter uses a fixed + * virtual offset of zero, so no need to zero CNTVOFF_EL2 register. + */ + set_cntvoff(0); +} + +/* + * With a userspace irqchip we have to check if the guest de-asserted the + * timer and if so, unmask the timer irq signal on the host interrupt + * controller to ensure that we see future timer signals. + */ +static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu) +{ + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + + if (!kvm_timer_should_fire(vtimer)) { + kvm_timer_update_irq(vcpu, false, vtimer); + if (static_branch_likely(&has_gic_active_state)) + set_timer_irq_phys_active(vtimer, false); + else + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); + } +} + +void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + + if (unlikely(!timer->enabled)) + return; + + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + unmask_vtimer_irq_user(vcpu); +} + +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct timer_map map; + + get_timer_map(vcpu, &map); + + /* + * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 + * and to 0 for ARMv7. We provide an implementation that always + * resets the timer to be disabled and unmasked and is compliant with + * the ARMv7 architecture. + */ + vcpu_vtimer(vcpu)->cnt_ctl = 0; + vcpu_ptimer(vcpu)->cnt_ctl = 0; + + if (timer->enabled) { + kvm_timer_update_irq(vcpu, false, vcpu_vtimer(vcpu)); + kvm_timer_update_irq(vcpu, false, vcpu_ptimer(vcpu)); + + if (irqchip_in_kernel(vcpu->kvm)) { + kvm_vgic_reset_mapped_irq(vcpu, map.direct_vtimer->irq.irq); + if (map.direct_ptimer) + kvm_vgic_reset_mapped_irq(vcpu, map.direct_ptimer->irq.irq); + } + } + + if (map.emul_ptimer) + soft_timer_cancel(&map.emul_ptimer->hrtimer); + + return 0; +} + +/* Make the updates of cntvoff for all vtimer contexts atomic */ +static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) +{ + int i; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + + mutex_lock(&kvm->lock); + kvm_for_each_vcpu(i, tmp, kvm) + vcpu_vtimer(tmp)->cntvoff = cntvoff; + + /* + * When called from the vcpu create path, the CPU being created is not + * included in the loop above, so we just set it here as well. + */ + vcpu_vtimer(vcpu)->cntvoff = cntvoff; + mutex_unlock(&kvm->lock); +} + +void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + + /* Synchronize cntvoff across all vtimers of a VM. */ + update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); + ptimer->cntvoff = 0; + + hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + timer->bg_timer.function = kvm_bg_timer_expire; + + hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + vtimer->hrtimer.function = kvm_hrtimer_expire; + ptimer->hrtimer.function = kvm_hrtimer_expire; + + vtimer->irq.irq = default_vtimer_irq.irq; + ptimer->irq.irq = default_ptimer_irq.irq; + + vtimer->host_timer_irq = host_vtimer_irq; + ptimer->host_timer_irq = host_ptimer_irq; + + vtimer->host_timer_irq_flags = host_vtimer_irq_flags; + ptimer->host_timer_irq_flags = host_ptimer_irq_flags; + + vtimer->vcpu = vcpu; + ptimer->vcpu = vcpu; +} + +static void kvm_timer_init_interrupt(void *info) +{ + enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags); + enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags); +} + +int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) +{ + struct arch_timer_context *timer; + + switch (regid) { + case KVM_REG_ARM_TIMER_CTL: + timer = vcpu_vtimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); + break; + case KVM_REG_ARM_TIMER_CNT: + timer = vcpu_vtimer(vcpu); + update_vtimer_cntvoff(vcpu, kvm_phys_timer_read() - value); + break; + case KVM_REG_ARM_TIMER_CVAL: + timer = vcpu_vtimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); + break; + case KVM_REG_ARM_PTIMER_CTL: + timer = vcpu_ptimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CTL, value); + break; + case KVM_REG_ARM_PTIMER_CVAL: + timer = vcpu_ptimer(vcpu); + kvm_arm_timer_write(vcpu, timer, TIMER_REG_CVAL, value); + break; + + default: + return -1; + } + + return 0; +} + +static u64 read_timer_ctl(struct arch_timer_context *timer) +{ + /* + * Set ISTATUS bit if it's expired. + * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is + * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit + * regardless of ENABLE bit for our implementation convenience. + */ + if (!kvm_timer_compute_delta(timer)) + return timer->cnt_ctl | ARCH_TIMER_CTRL_IT_STAT; + else + return timer->cnt_ctl; +} + +u64 kvm_arm_timer_get_reg(struct kvm_vcpu *vcpu, u64 regid) +{ + switch (regid) { + case KVM_REG_ARM_TIMER_CTL: + return kvm_arm_timer_read(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CTL); + case KVM_REG_ARM_TIMER_CNT: + return kvm_arm_timer_read(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CNT); + case KVM_REG_ARM_TIMER_CVAL: + return kvm_arm_timer_read(vcpu, + vcpu_vtimer(vcpu), TIMER_REG_CVAL); + case KVM_REG_ARM_PTIMER_CTL: + return kvm_arm_timer_read(vcpu, + vcpu_ptimer(vcpu), TIMER_REG_CTL); + case KVM_REG_ARM_PTIMER_CNT: + return kvm_arm_timer_read(vcpu, + vcpu_ptimer(vcpu), TIMER_REG_CNT); + case KVM_REG_ARM_PTIMER_CVAL: + return kvm_arm_timer_read(vcpu, + vcpu_ptimer(vcpu), TIMER_REG_CVAL); + } + return (u64)-1; +} + +static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg) +{ + u64 val; + + switch (treg) { + case TIMER_REG_TVAL: + val = timer->cnt_cval - kvm_phys_timer_read() + timer->cntvoff; + val &= lower_32_bits(val); + break; + + case TIMER_REG_CTL: + val = read_timer_ctl(timer); + break; + + case TIMER_REG_CVAL: + val = timer->cnt_cval; + break; + + case TIMER_REG_CNT: + val = kvm_phys_timer_read() - timer->cntvoff; + break; + + default: + BUG(); + } + + return val; +} + +u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu, + enum kvm_arch_timers tmr, + enum kvm_arch_timer_regs treg) +{ + u64 val; + + preempt_disable(); + kvm_timer_vcpu_put(vcpu); + + val = kvm_arm_timer_read(vcpu, vcpu_get_timer(vcpu, tmr), treg); + + kvm_timer_vcpu_load(vcpu); + preempt_enable(); + + return val; +} + +static void kvm_arm_timer_write(struct kvm_vcpu *vcpu, + struct arch_timer_context *timer, + enum kvm_arch_timer_regs treg, + u64 val) +{ + switch (treg) { + case TIMER_REG_TVAL: + timer->cnt_cval = kvm_phys_timer_read() - timer->cntvoff + (s32)val; + break; + + case TIMER_REG_CTL: + timer->cnt_ctl = val & ~ARCH_TIMER_CTRL_IT_STAT; + break; + + case TIMER_REG_CVAL: + timer->cnt_cval = val; + break; + + default: + BUG(); + } +} + +void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu, + enum kvm_arch_timers tmr, + enum kvm_arch_timer_regs treg, + u64 val) +{ + preempt_disable(); + kvm_timer_vcpu_put(vcpu); + + kvm_arm_timer_write(vcpu, vcpu_get_timer(vcpu, tmr), treg, val); + + kvm_timer_vcpu_load(vcpu); + preempt_enable(); +} + +static int kvm_timer_starting_cpu(unsigned int cpu) +{ + kvm_timer_init_interrupt(NULL); + return 0; +} + +static int kvm_timer_dying_cpu(unsigned int cpu) +{ + disable_percpu_irq(host_vtimer_irq); + return 0; +} + +int kvm_timer_hyp_init(bool has_gic) +{ + struct arch_timer_kvm_info *info; + int err; + + info = arch_timer_get_kvm_info(); + timecounter = &info->timecounter; + + if (!timecounter->cc) { + kvm_err("kvm_arch_timer: uninitialized timecounter\n"); + return -ENODEV; + } + + /* First, do the virtual EL1 timer irq */ + + if (info->virtual_irq <= 0) { + kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n", + info->virtual_irq); + return -ENODEV; + } + host_vtimer_irq = info->virtual_irq; + + host_vtimer_irq_flags = irq_get_trigger_type(host_vtimer_irq); + if (host_vtimer_irq_flags != IRQF_TRIGGER_HIGH && + host_vtimer_irq_flags != IRQF_TRIGGER_LOW) { + kvm_err("Invalid trigger for vtimer IRQ%d, assuming level low\n", + host_vtimer_irq); + host_vtimer_irq_flags = IRQF_TRIGGER_LOW; + } + + err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler, + "kvm guest vtimer", kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n", + host_vtimer_irq, err); + return err; + } + + if (has_gic) { + err = irq_set_vcpu_affinity(host_vtimer_irq, + kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); + goto out_free_irq; + } + + static_branch_enable(&has_gic_active_state); + } + + kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq); + + /* Now let's do the physical EL1 timer irq */ + + if (info->physical_irq > 0) { + host_ptimer_irq = info->physical_irq; + host_ptimer_irq_flags = irq_get_trigger_type(host_ptimer_irq); + if (host_ptimer_irq_flags != IRQF_TRIGGER_HIGH && + host_ptimer_irq_flags != IRQF_TRIGGER_LOW) { + kvm_err("Invalid trigger for ptimer IRQ%d, assuming level low\n", + host_ptimer_irq); + host_ptimer_irq_flags = IRQF_TRIGGER_LOW; + } + + err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler, + "kvm guest ptimer", kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n", + host_ptimer_irq, err); + return err; + } + + if (has_gic) { + err = irq_set_vcpu_affinity(host_ptimer_irq, + kvm_get_running_vcpus()); + if (err) { + kvm_err("kvm_arch_timer: error setting vcpu affinity\n"); + goto out_free_irq; + } + } + + kvm_debug("physical timer IRQ%d\n", host_ptimer_irq); + } else if (has_vhe()) { + kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n", + info->physical_irq); + err = -ENODEV; + goto out_free_irq; + } + + cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, + "kvm/arm/timer:starting", kvm_timer_starting_cpu, + kvm_timer_dying_cpu); + return 0; +out_free_irq: + free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); + return err; +} + +void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + + soft_timer_cancel(&timer->bg_timer); +} + +static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) +{ + int vtimer_irq, ptimer_irq; + int i, ret; + + vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; + ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); + if (ret) + return false; + + ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; + ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); + if (ret) + return false; + + kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { + if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || + vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) + return false; + } + + return true; +} + +bool kvm_arch_timer_get_input_level(int vintid) +{ + struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); + struct arch_timer_context *timer; + + if (vintid == vcpu_vtimer(vcpu)->irq.irq) + timer = vcpu_vtimer(vcpu); + else if (vintid == vcpu_ptimer(vcpu)->irq.irq) + timer = vcpu_ptimer(vcpu); + else + BUG(); + + return kvm_timer_should_fire(timer); +} + +int kvm_timer_enable(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu_timer(vcpu); + struct timer_map map; + int ret; + + if (timer->enabled) + return 0; + + /* Without a VGIC we do not map virtual IRQs to physical IRQs */ + if (!irqchip_in_kernel(vcpu->kvm)) + goto no_vgic; + + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + + if (!timer_irqs_are_valid(vcpu)) { + kvm_debug("incorrectly configured timer irqs\n"); + return -EINVAL; + } + + get_timer_map(vcpu, &map); + + ret = kvm_vgic_map_phys_irq(vcpu, + map.direct_vtimer->host_timer_irq, + map.direct_vtimer->irq.irq, + kvm_arch_timer_get_input_level); + if (ret) + return ret; + + if (map.direct_ptimer) { + ret = kvm_vgic_map_phys_irq(vcpu, + map.direct_ptimer->host_timer_irq, + map.direct_ptimer->irq.irq, + kvm_arch_timer_get_input_level); + } + + if (ret) + return ret; + +no_vgic: + timer->enabled = 1; + return 0; +} + +/* + * On VHE system, we only need to configure the EL2 timer trap register once, + * not for every world switch. + * The host kernel runs at EL2 with HCR_EL2.TGE == 1, + * and this makes those bits have no effect for the host kernel execution. + */ +void kvm_timer_init_vhe(void) +{ + /* When HCR_EL2.E2H ==1, EL1PCEN and EL1PCTEN are shifted by 10 */ + u32 cnthctl_shift = 10; + u64 val; + + /* + * VHE systems allow the guest direct access to the EL1 physical + * timer/counter. + */ + val = read_sysreg(cnthctl_el2); + val |= (CNTHCTL_EL1PCEN << cnthctl_shift); + val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); + write_sysreg(val, cnthctl_el2); +} + +static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu_vtimer(vcpu)->irq.irq = vtimer_irq; + vcpu_ptimer(vcpu)->irq.irq = ptimer_irq; + } +} + +int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + int irq; + + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + if (get_user(irq, uaddr)) + return -EFAULT; + + if (!(irq_is_ppi(irq))) + return -EINVAL; + + if (vcpu->arch.timer_cpu.enabled) + return -EBUSY; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq); + break; + default: + return -ENXIO; + } + + return 0; +} + +int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *timer; + int irq; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + timer = vcpu_vtimer(vcpu); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + timer = vcpu_ptimer(vcpu); + break; + default: + return -ENXIO; + } + + irq = timer->irq.irq; + return put_user(irq, uaddr); +} + +int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + return 0; + } + + return -ENXIO; +} diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c new file mode 100644 index 000000000000..7a57381c05e8 --- /dev/null +++ b/arch/arm64/kvm/arm.c @@ -0,0 +1,1710 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/bug.h> +#include <linux/cpu_pm.h> +#include <linux/errno.h> +#include <linux/err.h> +#include <linux/kvm_host.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/fs.h> +#include <linux/mman.h> +#include <linux/sched.h> +#include <linux/kvm.h> +#include <linux/kvm_irqfd.h> +#include <linux/irqbypass.h> +#include <linux/sched/stat.h> +#include <trace/events/kvm.h> + +#define CREATE_TRACE_POINTS +#include "trace_arm.h" + +#include <linux/uaccess.h> +#include <asm/ptrace.h> +#include <asm/mman.h> +#include <asm/tlbflush.h> +#include <asm/cacheflush.h> +#include <asm/cpufeature.h> +#include <asm/virt.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_coproc.h> +#include <asm/sections.h> + +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_pmu.h> +#include <kvm/arm_psci.h> + +#ifdef REQUIRES_VIRT +__asm__(".arch_extension virt"); +#endif + +DEFINE_PER_CPU(kvm_host_data_t, kvm_host_data); +static DEFINE_PER_CPU(unsigned long, kvm_arm_hyp_stack_page); + +/* The VMID used in the VTTBR */ +static atomic64_t kvm_vmid_gen = ATOMIC64_INIT(1); +static u32 kvm_next_vmid; +static DEFINE_SPINLOCK(kvm_vmid_lock); + +static bool vgic_present; + +static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); +DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); + +int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) +{ + return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; +} + +int kvm_arch_hardware_setup(void *opaque) +{ + return 0; +} + +int kvm_arch_check_processor_compat(void *opaque) +{ + return 0; +} + +int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + struct kvm_enable_cap *cap) +{ + int r; + + if (cap->flags) + return -EINVAL; + + switch (cap->cap) { + case KVM_CAP_ARM_NISV_TO_USER: + r = 0; + kvm->arch.return_nisv_io_abort_to_user = true; + break; + default: + r = -EINVAL; + break; + } + + return r; +} + +static int kvm_arm_default_max_vcpus(void) +{ + return vgic_present ? kvm_vgic_get_max_vcpus() : KVM_MAX_VCPUS; +} + +/** + * kvm_arch_init_vm - initializes a VM data structure + * @kvm: pointer to the KVM struct + */ +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) +{ + int ret, cpu; + + ret = kvm_arm_setup_stage2(kvm, type); + if (ret) + return ret; + + kvm->arch.last_vcpu_ran = alloc_percpu(typeof(*kvm->arch.last_vcpu_ran)); + if (!kvm->arch.last_vcpu_ran) + return -ENOMEM; + + for_each_possible_cpu(cpu) + *per_cpu_ptr(kvm->arch.last_vcpu_ran, cpu) = -1; + + ret = kvm_alloc_stage2_pgd(kvm); + if (ret) + goto out_fail_alloc; + + ret = create_hyp_mappings(kvm, kvm + 1, PAGE_HYP); + if (ret) + goto out_free_stage2_pgd; + + kvm_vgic_early_init(kvm); + + /* Mark the initial VMID generation invalid */ + kvm->arch.vmid.vmid_gen = 0; + + /* The maximum number of VCPUs is limited by the host's GIC model */ + kvm->arch.max_vcpus = kvm_arm_default_max_vcpus(); + + return ret; +out_free_stage2_pgd: + kvm_free_stage2_pgd(kvm); +out_fail_alloc: + free_percpu(kvm->arch.last_vcpu_ran); + kvm->arch.last_vcpu_ran = NULL; + return ret; +} + +int kvm_arch_create_vcpu_debugfs(struct kvm_vcpu *vcpu) +{ + return 0; +} + +vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + + +/** + * kvm_arch_destroy_vm - destroy the VM data structure + * @kvm: pointer to the KVM struct + */ +void kvm_arch_destroy_vm(struct kvm *kvm) +{ + int i; + + kvm_vgic_destroy(kvm); + + free_percpu(kvm->arch.last_vcpu_ran); + kvm->arch.last_vcpu_ran = NULL; + + for (i = 0; i < KVM_MAX_VCPUS; ++i) { + if (kvm->vcpus[i]) { + kvm_vcpu_destroy(kvm->vcpus[i]); + kvm->vcpus[i] = NULL; + } + } + atomic_set(&kvm->online_vcpus, 0); +} + +int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) +{ + int r; + switch (ext) { + case KVM_CAP_IRQCHIP: + r = vgic_present; + break; + case KVM_CAP_IOEVENTFD: + case KVM_CAP_DEVICE_CTRL: + case KVM_CAP_USER_MEMORY: + case KVM_CAP_SYNC_MMU: + case KVM_CAP_DESTROY_MEMORY_REGION_WORKS: + case KVM_CAP_ONE_REG: + case KVM_CAP_ARM_PSCI: + case KVM_CAP_ARM_PSCI_0_2: + case KVM_CAP_READONLY_MEM: + case KVM_CAP_MP_STATE: + case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_VCPU_EVENTS: + case KVM_CAP_ARM_IRQ_LINE_LAYOUT_2: + case KVM_CAP_ARM_NISV_TO_USER: + case KVM_CAP_ARM_INJECT_EXT_DABT: + r = 1; + break; + case KVM_CAP_ARM_SET_DEVICE_ADDR: + r = 1; + break; + case KVM_CAP_NR_VCPUS: + r = num_online_cpus(); + break; + case KVM_CAP_MAX_VCPUS: + case KVM_CAP_MAX_VCPU_ID: + if (kvm) + r = kvm->arch.max_vcpus; + else + r = kvm_arm_default_max_vcpus(); + break; + case KVM_CAP_MSI_DEVID: + if (!kvm) + r = -EINVAL; + else + r = kvm->arch.vgic.msis_require_devid; + break; + case KVM_CAP_ARM_USER_IRQ: + /* + * 1: EL1_VTIMER, EL1_PTIMER, and PMU. + * (bump this number if adding more devices) + */ + r = 1; + break; + default: + r = kvm_arch_vm_ioctl_check_extension(kvm, ext); + break; + } + return r; +} + +long kvm_arch_dev_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + return -EINVAL; +} + +struct kvm *kvm_arch_alloc_vm(void) +{ + if (!has_vhe()) + return kzalloc(sizeof(struct kvm), GFP_KERNEL); + + return vzalloc(sizeof(struct kvm)); +} + +void kvm_arch_free_vm(struct kvm *kvm) +{ + if (!has_vhe()) + kfree(kvm); + else + vfree(kvm); +} + +int kvm_arch_vcpu_precreate(struct kvm *kvm, unsigned int id) +{ + if (irqchip_in_kernel(kvm) && vgic_initialized(kvm)) + return -EBUSY; + + if (id >= kvm->arch.max_vcpus) + return -EINVAL; + + return 0; +} + +int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) +{ + int err; + + /* Force users to call KVM_ARM_VCPU_INIT */ + vcpu->arch.target = -1; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + + /* Set up the timer */ + kvm_timer_vcpu_init(vcpu); + + kvm_pmu_vcpu_init(vcpu); + + kvm_arm_reset_debug_ptr(vcpu); + + kvm_arm_pvtime_vcpu_init(&vcpu->arch); + + err = kvm_vgic_vcpu_init(vcpu); + if (err) + return err; + + return create_hyp_mappings(vcpu, vcpu + 1, PAGE_HYP); +} + +void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu) +{ +} + +void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + if (vcpu->arch.has_run_once && unlikely(!irqchip_in_kernel(vcpu->kvm))) + static_branch_dec(&userspace_irqchip_in_use); + + kvm_mmu_free_memory_caches(vcpu); + kvm_timer_vcpu_terminate(vcpu); + kvm_pmu_vcpu_destroy(vcpu); + + kvm_arm_vcpu_destroy(vcpu); +} + +int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) +{ + return kvm_timer_is_pending(vcpu); +} + +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) +{ + /* + * If we're about to block (most likely because we've just hit a + * WFI), we need to sync back the state of the GIC CPU interface + * so that we have the latest PMR and group enables. This ensures + * that kvm_arch_vcpu_runnable has up-to-date data to decide + * whether we have pending interrupts. + * + * For the same reason, we want to tell GICv4 that we need + * doorbells to be signalled, should an interrupt become pending. + */ + preempt_disable(); + kvm_vgic_vmcr_sync(vcpu); + vgic_v4_put(vcpu, true); + preempt_enable(); +} + +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) +{ + preempt_disable(); + vgic_v4_load(vcpu); + preempt_enable(); +} + +void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +{ + int *last_ran; + kvm_host_data_t *cpu_data; + + last_ran = this_cpu_ptr(vcpu->kvm->arch.last_vcpu_ran); + cpu_data = this_cpu_ptr(&kvm_host_data); + + /* + * We might get preempted before the vCPU actually runs, but + * over-invalidation doesn't affect correctness. + */ + if (*last_ran != vcpu->vcpu_id) { + kvm_call_hyp(__kvm_tlb_flush_local_vmid, vcpu); + *last_ran = vcpu->vcpu_id; + } + + vcpu->cpu = cpu; + vcpu->arch.host_cpu_context = &cpu_data->host_ctxt; + + kvm_vgic_load(vcpu); + kvm_timer_vcpu_load(vcpu); + kvm_vcpu_load_sysregs(vcpu); + kvm_arch_vcpu_load_fp(vcpu); + kvm_vcpu_pmu_restore_guest(vcpu); + if (kvm_arm_is_pvtime_enabled(&vcpu->arch)) + kvm_make_request(KVM_REQ_RECORD_STEAL, vcpu); + + if (single_task_running()) + vcpu_clear_wfx_traps(vcpu); + else + vcpu_set_wfx_traps(vcpu); + + vcpu_ptrauth_setup_lazy(vcpu); +} + +void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) +{ + kvm_arch_vcpu_put_fp(vcpu); + kvm_vcpu_put_sysregs(vcpu); + kvm_timer_vcpu_put(vcpu); + kvm_vgic_put(vcpu); + kvm_vcpu_pmu_restore_host(vcpu); + + vcpu->cpu = -1; +} + +static void vcpu_power_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_SLEEP, vcpu); + kvm_vcpu_kick(vcpu); +} + +int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + if (vcpu->arch.power_off) + mp_state->mp_state = KVM_MP_STATE_STOPPED; + else + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + + return 0; +} + +int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, + struct kvm_mp_state *mp_state) +{ + int ret = 0; + + switch (mp_state->mp_state) { + case KVM_MP_STATE_RUNNABLE: + vcpu->arch.power_off = false; + break; + case KVM_MP_STATE_STOPPED: + vcpu_power_off(vcpu); + break; + default: + ret = -EINVAL; + } + + return ret; +} + +/** + * kvm_arch_vcpu_runnable - determine if the vcpu can be scheduled + * @v: The VCPU pointer + * + * If the guest CPU is not waiting for interrupts or an interrupt line is + * asserted, the CPU is by definition runnable. + */ +int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) +{ + bool irq_lines = *vcpu_hcr(v) & (HCR_VI | HCR_VF); + return ((irq_lines || kvm_vgic_vcpu_pending_irq(v)) + && !v->arch.power_off && !v->arch.pause); +} + +bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) +{ + return vcpu_mode_priv(vcpu); +} + +/* Just ensure a guest exit from a particular CPU */ +static void exit_vm_noop(void *info) +{ +} + +void force_vm_exit(const cpumask_t *mask) +{ + preempt_disable(); + smp_call_function_many(mask, exit_vm_noop, NULL, true); + preempt_enable(); +} + +/** + * need_new_vmid_gen - check that the VMID is still valid + * @vmid: The VMID to check + * + * return true if there is a new generation of VMIDs being used + * + * The hardware supports a limited set of values with the value zero reserved + * for the host, so we check if an assigned value belongs to a previous + * generation, which requires us to assign a new value. If we're the first to + * use a VMID for the new generation, we must flush necessary caches and TLBs + * on all CPUs. + */ +static bool need_new_vmid_gen(struct kvm_vmid *vmid) +{ + u64 current_vmid_gen = atomic64_read(&kvm_vmid_gen); + smp_rmb(); /* Orders read of kvm_vmid_gen and kvm->arch.vmid */ + return unlikely(READ_ONCE(vmid->vmid_gen) != current_vmid_gen); +} + +/** + * update_vmid - Update the vmid with a valid VMID for the current generation + * @kvm: The guest that struct vmid belongs to + * @vmid: The stage-2 VMID information struct + */ +static void update_vmid(struct kvm_vmid *vmid) +{ + if (!need_new_vmid_gen(vmid)) + return; + + spin_lock(&kvm_vmid_lock); + + /* + * We need to re-check the vmid_gen here to ensure that if another vcpu + * already allocated a valid vmid for this vm, then this vcpu should + * use the same vmid. + */ + if (!need_new_vmid_gen(vmid)) { + spin_unlock(&kvm_vmid_lock); + return; + } + + /* First user of a new VMID generation? */ + if (unlikely(kvm_next_vmid == 0)) { + atomic64_inc(&kvm_vmid_gen); + kvm_next_vmid = 1; + + /* + * On SMP we know no other CPUs can use this CPU's or each + * other's VMID after force_vm_exit returns since the + * kvm_vmid_lock blocks them from reentry to the guest. + */ + force_vm_exit(cpu_all_mask); + /* + * Now broadcast TLB + ICACHE invalidation over the inner + * shareable domain to make sure all data structures are + * clean. + */ + kvm_call_hyp(__kvm_flush_vm_context); + } + + vmid->vmid = kvm_next_vmid; + kvm_next_vmid++; + kvm_next_vmid &= (1 << kvm_get_vmid_bits()) - 1; + + smp_wmb(); + WRITE_ONCE(vmid->vmid_gen, atomic64_read(&kvm_vmid_gen)); + + spin_unlock(&kvm_vmid_lock); +} + +static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + int ret = 0; + + if (likely(vcpu->arch.has_run_once)) + return 0; + + if (!kvm_arm_vcpu_is_finalized(vcpu)) + return -EPERM; + + vcpu->arch.has_run_once = true; + + if (likely(irqchip_in_kernel(kvm))) { + /* + * Map the VGIC hardware resources before running a vcpu the + * first time on this VM. + */ + if (unlikely(!vgic_ready(kvm))) { + ret = kvm_vgic_map_resources(kvm); + if (ret) + return ret; + } + } else { + /* + * Tell the rest of the code that there are userspace irqchip + * VMs in the wild. + */ + static_branch_inc(&userspace_irqchip_in_use); + } + + ret = kvm_timer_enable(vcpu); + if (ret) + return ret; + + ret = kvm_arm_pmu_v3_enable(vcpu); + + return ret; +} + +bool kvm_arch_intc_initialized(struct kvm *kvm) +{ + return vgic_initialized(kvm); +} + +void kvm_arm_halt_guest(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) + vcpu->arch.pause = true; + kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP); +} + +void kvm_arm_resume_guest(struct kvm *kvm) +{ + int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu->arch.pause = false; + rcuwait_wake_up(kvm_arch_vcpu_get_wait(vcpu)); + } +} + +static void vcpu_req_sleep(struct kvm_vcpu *vcpu) +{ + struct rcuwait *wait = kvm_arch_vcpu_get_wait(vcpu); + + rcuwait_wait_event(wait, + (!vcpu->arch.power_off) &&(!vcpu->arch.pause), + TASK_INTERRUPTIBLE); + + if (vcpu->arch.power_off || vcpu->arch.pause) { + /* Awaken to handle a signal, request we sleep again later. */ + kvm_make_request(KVM_REQ_SLEEP, vcpu); + } + + /* + * Make sure we will observe a potential reset request if we've + * observed a change to the power state. Pairs with the smp_wmb() in + * kvm_psci_vcpu_on(). + */ + smp_rmb(); +} + +static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) +{ + return vcpu->arch.target >= 0; +} + +static void check_vcpu_requests(struct kvm_vcpu *vcpu) +{ + if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) + vcpu_req_sleep(vcpu); + + if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) + kvm_reset_vcpu(vcpu); + + /* + * Clear IRQ_PENDING requests that were made to guarantee + * that a VCPU sees new virtual interrupts. + */ + kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); + + if (kvm_check_request(KVM_REQ_RECORD_STEAL, vcpu)) + kvm_update_stolen_time(vcpu); + + if (kvm_check_request(KVM_REQ_RELOAD_GICv4, vcpu)) { + /* The distributor enable bits were changed */ + preempt_disable(); + vgic_v4_put(vcpu, false); + vgic_v4_load(vcpu); + preempt_enable(); + } + } +} + +/** + * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code + * @vcpu: The VCPU pointer + * + * This function is called through the VCPU_RUN ioctl called from user space. It + * will execute VM code in a loop until the time slice for the process is used + * or some emulation is needed from user space in which case the function will + * return with return value 0 and with the kvm_run structure filled in with the + * required data for the requested emulation. + */ +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) +{ + struct kvm_run *run = vcpu->run; + int ret; + + if (unlikely(!kvm_vcpu_initialized(vcpu))) + return -ENOEXEC; + + ret = kvm_vcpu_first_run_init(vcpu); + if (ret) + return ret; + + if (run->exit_reason == KVM_EXIT_MMIO) { + ret = kvm_handle_mmio_return(vcpu, run); + if (ret) + return ret; + } + + if (run->immediate_exit) + return -EINTR; + + vcpu_load(vcpu); + + kvm_sigset_activate(vcpu); + + ret = 1; + run->exit_reason = KVM_EXIT_UNKNOWN; + while (ret > 0) { + /* + * Check conditions before entering the guest + */ + cond_resched(); + + update_vmid(&vcpu->kvm->arch.vmid); + + check_vcpu_requests(vcpu); + + /* + * Preparing the interrupts to be injected also + * involves poking the GIC, which must be done in a + * non-preemptible context. + */ + preempt_disable(); + + kvm_pmu_flush_hwstate(vcpu); + + local_irq_disable(); + + kvm_vgic_flush_hwstate(vcpu); + + /* + * Exit if we have a signal pending so that we can deliver the + * signal to user space. + */ + if (signal_pending(current)) { + ret = -EINTR; + run->exit_reason = KVM_EXIT_INTR; + } + + /* + * If we're using a userspace irqchip, then check if we need + * to tell a userspace irqchip about timer or PMU level + * changes and if so, exit to userspace (the actual level + * state gets updated in kvm_timer_update_run and + * kvm_pmu_update_run below). + */ + if (static_branch_unlikely(&userspace_irqchip_in_use)) { + if (kvm_timer_should_notify_user(vcpu) || + kvm_pmu_should_notify_user(vcpu)) { + ret = -EINTR; + run->exit_reason = KVM_EXIT_INTR; + } + } + + /* + * Ensure we set mode to IN_GUEST_MODE after we disable + * interrupts and before the final VCPU requests check. + * See the comment in kvm_vcpu_exiting_guest_mode() and + * Documentation/virt/kvm/vcpu-requests.rst + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + + if (ret <= 0 || need_new_vmid_gen(&vcpu->kvm->arch.vmid) || + kvm_request_pending(vcpu)) { + vcpu->mode = OUTSIDE_GUEST_MODE; + isb(); /* Ensure work in x_flush_hwstate is committed */ + kvm_pmu_sync_hwstate(vcpu); + if (static_branch_unlikely(&userspace_irqchip_in_use)) + kvm_timer_sync_hwstate(vcpu); + kvm_vgic_sync_hwstate(vcpu); + local_irq_enable(); + preempt_enable(); + continue; + } + + kvm_arm_setup_debug(vcpu); + + /************************************************************** + * Enter the guest + */ + trace_kvm_entry(*vcpu_pc(vcpu)); + guest_enter_irqoff(); + + if (has_vhe()) { + ret = kvm_vcpu_run_vhe(vcpu); + } else { + ret = kvm_call_hyp_ret(__kvm_vcpu_run_nvhe, vcpu); + } + + vcpu->mode = OUTSIDE_GUEST_MODE; + vcpu->stat.exits++; + /* + * Back from guest + *************************************************************/ + + kvm_arm_clear_debug(vcpu); + + /* + * We must sync the PMU state before the vgic state so + * that the vgic can properly sample the updated state of the + * interrupt line. + */ + kvm_pmu_sync_hwstate(vcpu); + + /* + * Sync the vgic state before syncing the timer state because + * the timer code needs to know if the virtual timer + * interrupts are active. + */ + kvm_vgic_sync_hwstate(vcpu); + + /* + * Sync the timer hardware state before enabling interrupts as + * we don't want vtimer interrupts to race with syncing the + * timer virtual interrupt state. + */ + if (static_branch_unlikely(&userspace_irqchip_in_use)) + kvm_timer_sync_hwstate(vcpu); + + kvm_arch_vcpu_ctxsync_fp(vcpu); + + /* + * We may have taken a host interrupt in HYP mode (ie + * while executing the guest). This interrupt is still + * pending, as we haven't serviced it yet! + * + * We're now back in SVC mode, with interrupts + * disabled. Enabling the interrupts now will have + * the effect of taking the interrupt again, in SVC + * mode this time. + */ + local_irq_enable(); + + /* + * We do local_irq_enable() before calling guest_exit() so + * that if a timer interrupt hits while running the guest we + * account that tick as being spent in the guest. We enable + * preemption after calling guest_exit() so that if we get + * preempted we make sure ticks after that is not counted as + * guest time. + */ + guest_exit(); + trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + + /* Exit types that need handling before we can be preempted */ + handle_exit_early(vcpu, run, ret); + + preempt_enable(); + + ret = handle_exit(vcpu, run, ret); + } + + /* Tell userspace about in-kernel device output levels */ + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) { + kvm_timer_update_run(vcpu); + kvm_pmu_update_run(vcpu); + } + + kvm_sigset_deactivate(vcpu); + + vcpu_put(vcpu); + return ret; +} + +static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) +{ + int bit_index; + bool set; + unsigned long *hcr; + + if (number == KVM_ARM_IRQ_CPU_IRQ) + bit_index = __ffs(HCR_VI); + else /* KVM_ARM_IRQ_CPU_FIQ */ + bit_index = __ffs(HCR_VF); + + hcr = vcpu_hcr(vcpu); + if (level) + set = test_and_set_bit(bit_index, hcr); + else + set = test_and_clear_bit(bit_index, hcr); + + /* + * If we didn't change anything, no need to wake up or kick other CPUs + */ + if (set == level) + return 0; + + /* + * The vcpu irq_lines field was updated, wake up sleeping VCPUs and + * trigger a world-switch round on the running physical CPU to set the + * virtual IRQ/FIQ fields in the HCR appropriately. + */ + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + + return 0; +} + +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, + bool line_status) +{ + u32 irq = irq_level->irq; + unsigned int irq_type, vcpu_idx, irq_num; + int nrcpus = atomic_read(&kvm->online_vcpus); + struct kvm_vcpu *vcpu = NULL; + bool level = irq_level->level; + + irq_type = (irq >> KVM_ARM_IRQ_TYPE_SHIFT) & KVM_ARM_IRQ_TYPE_MASK; + vcpu_idx = (irq >> KVM_ARM_IRQ_VCPU_SHIFT) & KVM_ARM_IRQ_VCPU_MASK; + vcpu_idx += ((irq >> KVM_ARM_IRQ_VCPU2_SHIFT) & KVM_ARM_IRQ_VCPU2_MASK) * (KVM_ARM_IRQ_VCPU_MASK + 1); + irq_num = (irq >> KVM_ARM_IRQ_NUM_SHIFT) & KVM_ARM_IRQ_NUM_MASK; + + trace_kvm_irq_line(irq_type, vcpu_idx, irq_num, irq_level->level); + + switch (irq_type) { + case KVM_ARM_IRQ_TYPE_CPU: + if (irqchip_in_kernel(kvm)) + return -ENXIO; + + if (vcpu_idx >= nrcpus) + return -EINVAL; + + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; + + if (irq_num > KVM_ARM_IRQ_CPU_FIQ) + return -EINVAL; + + return vcpu_interrupt_line(vcpu, irq_num, level); + case KVM_ARM_IRQ_TYPE_PPI: + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + if (vcpu_idx >= nrcpus) + return -EINVAL; + + vcpu = kvm_get_vcpu(kvm, vcpu_idx); + if (!vcpu) + return -EINVAL; + + if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) + return -EINVAL; + + return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL); + case KVM_ARM_IRQ_TYPE_SPI: + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + if (irq_num < VGIC_NR_PRIVATE_IRQS) + return -EINVAL; + + return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL); + } + + return -EINVAL; +} + +static int kvm_vcpu_set_target(struct kvm_vcpu *vcpu, + const struct kvm_vcpu_init *init) +{ + unsigned int i, ret; + int phys_target = kvm_target_cpu(); + + if (init->target != phys_target) + return -EINVAL; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same target. + */ + if (vcpu->arch.target != -1 && vcpu->arch.target != init->target) + return -EINVAL; + + /* -ENOENT for unknown features, -EINVAL for invalid combinations. */ + for (i = 0; i < sizeof(init->features) * 8; i++) { + bool set = (init->features[i / 32] & (1 << (i % 32))); + + if (set && i >= KVM_VCPU_MAX_FEATURES) + return -ENOENT; + + /* + * Secondary and subsequent calls to KVM_ARM_VCPU_INIT must + * use the same feature set. + */ + if (vcpu->arch.target != -1 && i < KVM_VCPU_MAX_FEATURES && + test_bit(i, vcpu->arch.features) != set) + return -EINVAL; + + if (set) + set_bit(i, vcpu->arch.features); + } + + vcpu->arch.target = phys_target; + + /* Now we know what it is, we can reset it. */ + ret = kvm_reset_vcpu(vcpu); + if (ret) { + vcpu->arch.target = -1; + bitmap_zero(vcpu->arch.features, KVM_VCPU_MAX_FEATURES); + } + + return ret; +} + +static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, + struct kvm_vcpu_init *init) +{ + int ret; + + ret = kvm_vcpu_set_target(vcpu, init); + if (ret) + return ret; + + /* + * Ensure a rebooted VM will fault in RAM pages and detect if the + * guest MMU is turned off and flush the caches as needed. + * + * S2FWB enforces all memory accesses to RAM being cacheable, we + * ensure that the cache is always coherent. + */ + if (vcpu->arch.has_run_once && !cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + stage2_unmap_vm(vcpu->kvm); + + vcpu_reset_hcr(vcpu); + + /* + * Handle the "start in power-off" case. + */ + if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) + vcpu_power_off(vcpu); + else + vcpu->arch.power_off = false; + + return 0; +} + +static int kvm_arm_vcpu_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + default: + ret = kvm_arm_vcpu_arch_set_attr(vcpu, attr); + break; + } + + return ret; +} + +static int kvm_arm_vcpu_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + default: + ret = kvm_arm_vcpu_arch_get_attr(vcpu, attr); + break; + } + + return ret; +} + +static int kvm_arm_vcpu_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + default: + ret = kvm_arm_vcpu_arch_has_attr(vcpu, attr); + break; + } + + return ret; +} + +static int kvm_arm_vcpu_get_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + memset(events, 0, sizeof(*events)); + + return __kvm_arm_vcpu_get_events(vcpu, events); +} + +static int kvm_arm_vcpu_set_events(struct kvm_vcpu *vcpu, + struct kvm_vcpu_events *events) +{ + int i; + + /* check whether the reserved field is zero */ + for (i = 0; i < ARRAY_SIZE(events->reserved); i++) + if (events->reserved[i]) + return -EINVAL; + + /* check whether the pad field is zero */ + for (i = 0; i < ARRAY_SIZE(events->exception.pad); i++) + if (events->exception.pad[i]) + return -EINVAL; + + return __kvm_arm_vcpu_set_events(vcpu, events); +} + +long kvm_arch_vcpu_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + struct kvm_device_attr attr; + long r; + + switch (ioctl) { + case KVM_ARM_VCPU_INIT: { + struct kvm_vcpu_init init; + + r = -EFAULT; + if (copy_from_user(&init, argp, sizeof(init))) + break; + + r = kvm_arch_vcpu_ioctl_vcpu_init(vcpu, &init); + break; + } + case KVM_SET_ONE_REG: + case KVM_GET_ONE_REG: { + struct kvm_one_reg reg; + + r = -ENOEXEC; + if (unlikely(!kvm_vcpu_initialized(vcpu))) + break; + + r = -EFAULT; + if (copy_from_user(®, argp, sizeof(reg))) + break; + + if (ioctl == KVM_SET_ONE_REG) + r = kvm_arm_set_reg(vcpu, ®); + else + r = kvm_arm_get_reg(vcpu, ®); + break; + } + case KVM_GET_REG_LIST: { + struct kvm_reg_list __user *user_list = argp; + struct kvm_reg_list reg_list; + unsigned n; + + r = -ENOEXEC; + if (unlikely(!kvm_vcpu_initialized(vcpu))) + break; + + r = -EPERM; + if (!kvm_arm_vcpu_is_finalized(vcpu)) + break; + + r = -EFAULT; + if (copy_from_user(®_list, user_list, sizeof(reg_list))) + break; + n = reg_list.n; + reg_list.n = kvm_arm_num_regs(vcpu); + if (copy_to_user(user_list, ®_list, sizeof(reg_list))) + break; + r = -E2BIG; + if (n < reg_list.n) + break; + r = kvm_arm_copy_reg_indices(vcpu, user_list->reg); + break; + } + case KVM_SET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_arm_vcpu_set_attr(vcpu, &attr); + break; + } + case KVM_GET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_arm_vcpu_get_attr(vcpu, &attr); + break; + } + case KVM_HAS_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_arm_vcpu_has_attr(vcpu, &attr); + break; + } + case KVM_GET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + if (kvm_arm_vcpu_get_events(vcpu, &events)) + return -EINVAL; + + if (copy_to_user(argp, &events, sizeof(events))) + return -EFAULT; + + return 0; + } + case KVM_SET_VCPU_EVENTS: { + struct kvm_vcpu_events events; + + if (copy_from_user(&events, argp, sizeof(events))) + return -EFAULT; + + return kvm_arm_vcpu_set_events(vcpu, &events); + } + case KVM_ARM_VCPU_FINALIZE: { + int what; + + if (!kvm_vcpu_initialized(vcpu)) + return -ENOEXEC; + + if (get_user(what, (const int __user *)argp)) + return -EFAULT; + + return kvm_arm_vcpu_finalize(vcpu, what); + } + default: + r = -EINVAL; + } + + return r; +} + +void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) +{ + +} + +void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + kvm_flush_remote_tlbs(kvm); +} + +static int kvm_vm_ioctl_set_device_addr(struct kvm *kvm, + struct kvm_arm_device_addr *dev_addr) +{ + unsigned long dev_id, type; + + dev_id = (dev_addr->id & KVM_ARM_DEVICE_ID_MASK) >> + KVM_ARM_DEVICE_ID_SHIFT; + type = (dev_addr->id & KVM_ARM_DEVICE_TYPE_MASK) >> + KVM_ARM_DEVICE_TYPE_SHIFT; + + switch (dev_id) { + case KVM_ARM_DEVICE_VGIC_V2: + if (!vgic_present) + return -ENXIO; + return kvm_vgic_addr(kvm, type, &dev_addr->addr, true); + default: + return -ENODEV; + } +} + +long kvm_arch_vm_ioctl(struct file *filp, + unsigned int ioctl, unsigned long arg) +{ + struct kvm *kvm = filp->private_data; + void __user *argp = (void __user *)arg; + + switch (ioctl) { + case KVM_CREATE_IRQCHIP: { + int ret; + if (!vgic_present) + return -ENXIO; + mutex_lock(&kvm->lock); + ret = kvm_vgic_create(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + mutex_unlock(&kvm->lock); + return ret; + } + case KVM_ARM_SET_DEVICE_ADDR: { + struct kvm_arm_device_addr dev_addr; + + if (copy_from_user(&dev_addr, argp, sizeof(dev_addr))) + return -EFAULT; + return kvm_vm_ioctl_set_device_addr(kvm, &dev_addr); + } + case KVM_ARM_PREFERRED_TARGET: { + int err; + struct kvm_vcpu_init init; + + err = kvm_vcpu_preferred_target(&init); + if (err) + return err; + + if (copy_to_user(argp, &init, sizeof(init))) + return -EFAULT; + + return 0; + } + default: + return -EINVAL; + } +} + +static void cpu_init_hyp_mode(void) +{ + phys_addr_t pgd_ptr; + unsigned long hyp_stack_ptr; + unsigned long vector_ptr; + unsigned long tpidr_el2; + + /* Switch from the HYP stub to our own HYP init vector */ + __hyp_set_vectors(kvm_get_idmap_vector()); + + /* + * Calculate the raw per-cpu offset without a translation from the + * kernel's mapping to the linear mapping, and store it in tpidr_el2 + * so that we can use adr_l to access per-cpu variables in EL2. + */ + tpidr_el2 = ((unsigned long)this_cpu_ptr(&kvm_host_data) - + (unsigned long)kvm_ksym_ref(kvm_host_data)); + + pgd_ptr = kvm_mmu_get_httbr(); + hyp_stack_ptr = __this_cpu_read(kvm_arm_hyp_stack_page) + PAGE_SIZE; + vector_ptr = (unsigned long)kvm_get_hyp_vector(); + + /* + * Call initialization code, and switch to the full blown HYP code. + * If the cpucaps haven't been finalized yet, something has gone very + * wrong, and hyp will crash and burn when it uses any + * cpus_have_const_cap() wrapper. + */ + BUG_ON(!system_capabilities_finalized()); + __kvm_call_hyp((void *)pgd_ptr, hyp_stack_ptr, vector_ptr, tpidr_el2); + + /* + * Disabling SSBD on a non-VHE system requires us to enable SSBS + * at EL2. + */ + if (this_cpu_has_cap(ARM64_SSBS) && + arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) { + kvm_call_hyp(__kvm_enable_ssbs); + } +} + +static void cpu_hyp_reset(void) +{ + if (!is_kernel_in_hyp_mode()) + __hyp_reset_vectors(); +} + +static void cpu_hyp_reinit(void) +{ + kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt); + + cpu_hyp_reset(); + + if (is_kernel_in_hyp_mode()) + kvm_timer_init_vhe(); + else + cpu_init_hyp_mode(); + + kvm_arm_init_debug(); + + if (vgic_present) + kvm_vgic_init_cpu_hardware(); +} + +static void _kvm_arch_hardware_enable(void *discard) +{ + if (!__this_cpu_read(kvm_arm_hardware_enabled)) { + cpu_hyp_reinit(); + __this_cpu_write(kvm_arm_hardware_enabled, 1); + } +} + +int kvm_arch_hardware_enable(void) +{ + _kvm_arch_hardware_enable(NULL); + return 0; +} + +static void _kvm_arch_hardware_disable(void *discard) +{ + if (__this_cpu_read(kvm_arm_hardware_enabled)) { + cpu_hyp_reset(); + __this_cpu_write(kvm_arm_hardware_enabled, 0); + } +} + +void kvm_arch_hardware_disable(void) +{ + _kvm_arch_hardware_disable(NULL); +} + +#ifdef CONFIG_CPU_PM +static int hyp_init_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, + void *v) +{ + /* + * kvm_arm_hardware_enabled is left with its old value over + * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should + * re-enable hyp. + */ + switch (cmd) { + case CPU_PM_ENTER: + if (__this_cpu_read(kvm_arm_hardware_enabled)) + /* + * don't update kvm_arm_hardware_enabled here + * so that the hardware will be re-enabled + * when we resume. See below. + */ + cpu_hyp_reset(); + + return NOTIFY_OK; + case CPU_PM_ENTER_FAILED: + case CPU_PM_EXIT: + if (__this_cpu_read(kvm_arm_hardware_enabled)) + /* The hardware was enabled before suspend. */ + cpu_hyp_reinit(); + + return NOTIFY_OK; + + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block hyp_init_cpu_pm_nb = { + .notifier_call = hyp_init_cpu_pm_notifier, +}; + +static void __init hyp_cpu_pm_init(void) +{ + cpu_pm_register_notifier(&hyp_init_cpu_pm_nb); +} +static void __init hyp_cpu_pm_exit(void) +{ + cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb); +} +#else +static inline void hyp_cpu_pm_init(void) +{ +} +static inline void hyp_cpu_pm_exit(void) +{ +} +#endif + +static int init_common_resources(void) +{ + return kvm_set_ipa_limit(); +} + +static int init_subsystems(void) +{ + int err = 0; + + /* + * Enable hardware so that subsystem initialisation can access EL2. + */ + on_each_cpu(_kvm_arch_hardware_enable, NULL, 1); + + /* + * Register CPU lower-power notifier + */ + hyp_cpu_pm_init(); + + /* + * Init HYP view of VGIC + */ + err = kvm_vgic_hyp_init(); + switch (err) { + case 0: + vgic_present = true; + break; + case -ENODEV: + case -ENXIO: + vgic_present = false; + err = 0; + break; + default: + goto out; + } + + /* + * Init HYP architected timer support + */ + err = kvm_timer_hyp_init(vgic_present); + if (err) + goto out; + + kvm_perf_init(); + kvm_coproc_table_init(); + +out: + on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); + + return err; +} + +static void teardown_hyp_mode(void) +{ + int cpu; + + free_hyp_pgds(); + for_each_possible_cpu(cpu) + free_page(per_cpu(kvm_arm_hyp_stack_page, cpu)); +} + +/** + * Inits Hyp-mode on all online CPUs + */ +static int init_hyp_mode(void) +{ + int cpu; + int err = 0; + + /* + * Allocate Hyp PGD and setup Hyp identity mapping + */ + err = kvm_mmu_init(); + if (err) + goto out_err; + + /* + * Allocate stack pages for Hypervisor-mode + */ + for_each_possible_cpu(cpu) { + unsigned long stack_page; + + stack_page = __get_free_page(GFP_KERNEL); + if (!stack_page) { + err = -ENOMEM; + goto out_err; + } + + per_cpu(kvm_arm_hyp_stack_page, cpu) = stack_page; + } + + /* + * Map the Hyp-code called directly from the host + */ + err = create_hyp_mappings(kvm_ksym_ref(__hyp_text_start), + kvm_ksym_ref(__hyp_text_end), PAGE_HYP_EXEC); + if (err) { + kvm_err("Cannot map world-switch code\n"); + goto out_err; + } + + err = create_hyp_mappings(kvm_ksym_ref(__start_rodata), + kvm_ksym_ref(__end_rodata), PAGE_HYP_RO); + if (err) { + kvm_err("Cannot map rodata section\n"); + goto out_err; + } + + err = create_hyp_mappings(kvm_ksym_ref(__bss_start), + kvm_ksym_ref(__bss_stop), PAGE_HYP_RO); + if (err) { + kvm_err("Cannot map bss section\n"); + goto out_err; + } + + err = kvm_map_vectors(); + if (err) { + kvm_err("Cannot map vectors\n"); + goto out_err; + } + + /* + * Map the Hyp stack pages + */ + for_each_possible_cpu(cpu) { + char *stack_page = (char *)per_cpu(kvm_arm_hyp_stack_page, cpu); + err = create_hyp_mappings(stack_page, stack_page + PAGE_SIZE, + PAGE_HYP); + + if (err) { + kvm_err("Cannot map hyp stack\n"); + goto out_err; + } + } + + for_each_possible_cpu(cpu) { + kvm_host_data_t *cpu_data; + + cpu_data = per_cpu_ptr(&kvm_host_data, cpu); + err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP); + + if (err) { + kvm_err("Cannot map host CPU state: %d\n", err); + goto out_err; + } + } + + err = hyp_map_aux_data(); + if (err) + kvm_err("Cannot map host auxiliary data: %d\n", err); + + return 0; + +out_err: + teardown_hyp_mode(); + kvm_err("error initializing Hyp mode: %d\n", err); + return err; +} + +static void check_kvm_target_cpu(void *ret) +{ + *(int *)ret = kvm_target_cpu(); +} + +struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr) +{ + struct kvm_vcpu *vcpu; + int i; + + mpidr &= MPIDR_HWID_BITMASK; + kvm_for_each_vcpu(i, vcpu, kvm) { + if (mpidr == kvm_vcpu_get_mpidr_aff(vcpu)) + return vcpu; + } + return NULL; +} + +bool kvm_arch_has_irq_bypass(void) +{ + return true; +} + +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq, + &irqfd->irq_entry); +} +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons, + struct irq_bypass_producer *prod) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq, + &irqfd->irq_entry); +} + +void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + kvm_arm_halt_guest(irqfd->kvm); +} + +void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons) +{ + struct kvm_kernel_irqfd *irqfd = + container_of(cons, struct kvm_kernel_irqfd, consumer); + + kvm_arm_resume_guest(irqfd->kvm); +} + +/** + * Initialize Hyp-mode and memory mappings on all CPUs. + */ +int kvm_arch_init(void *opaque) +{ + int err; + int ret, cpu; + bool in_hyp_mode; + + if (!is_hyp_mode_available()) { + kvm_info("HYP mode not available\n"); + return -ENODEV; + } + + in_hyp_mode = is_kernel_in_hyp_mode(); + + if (!in_hyp_mode && kvm_arch_requires_vhe()) { + kvm_pr_unimpl("CPU unsupported in non-VHE mode, not initializing\n"); + return -ENODEV; + } + + for_each_online_cpu(cpu) { + smp_call_function_single(cpu, check_kvm_target_cpu, &ret, 1); + if (ret < 0) { + kvm_err("Error, CPU %d not supported!\n", cpu); + return -ENODEV; + } + } + + err = init_common_resources(); + if (err) + return err; + + err = kvm_arm_init_sve(); + if (err) + return err; + + if (!in_hyp_mode) { + err = init_hyp_mode(); + if (err) + goto out_err; + } + + err = init_subsystems(); + if (err) + goto out_hyp; + + if (in_hyp_mode) + kvm_info("VHE mode initialized successfully\n"); + else + kvm_info("Hyp mode initialized successfully\n"); + + return 0; + +out_hyp: + hyp_cpu_pm_exit(); + if (!in_hyp_mode) + teardown_hyp_mode(); +out_err: + return err; +} + +/* NOP: Compiling as a module not supported */ +void kvm_arch_exit(void) +{ + kvm_perf_teardown(); +} + +static int arm_init(void) +{ + int rc = kvm_init(NULL, sizeof(struct kvm_vcpu), 0, THIS_MODULE); + return rc; +} + +module_init(arm_init); diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index 50a279d3ddd7..aea43ec60f37 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -29,20 +29,19 @@ #include "trace.h" -#define VM_STAT(x) { #x, offsetof(struct kvm, stat.x), KVM_STAT_VM } -#define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } - struct kvm_stats_debugfs_item debugfs_entries[] = { - VCPU_STAT(halt_successful_poll), - VCPU_STAT(halt_attempted_poll), - VCPU_STAT(halt_poll_invalid), - VCPU_STAT(halt_wakeup), - VCPU_STAT(hvc_exit_stat), - VCPU_STAT(wfe_exit_stat), - VCPU_STAT(wfi_exit_stat), - VCPU_STAT(mmio_exit_user), - VCPU_STAT(mmio_exit_kernel), - VCPU_STAT(exits), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("hvc_exit_stat", hvc_exit_stat), + VCPU_STAT("wfe_exit_stat", wfe_exit_stat), + VCPU_STAT("wfi_exit_stat", wfi_exit_stat), + VCPU_STAT("mmio_exit_user", mmio_exit_user), + VCPU_STAT("mmio_exit_kernel", mmio_exit_kernel), + VCPU_STAT("exits", exits), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), { NULL } }; @@ -267,7 +266,7 @@ static int set_sve_vls(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) /* * Vector lengths supported by the host can't currently be * hidden from the guest individually: instead we can only set a - * maxmium via ZCR_EL2.LEN. So, make sure the available vector + * maximum via ZCR_EL2.LEN. So, make sure the available vector * lengths match the set requested exactly up to the requested * maximum: */ @@ -337,7 +336,7 @@ static int sve_reg_to_region(struct sve_state_reg_region *region, unsigned int reg_num; unsigned int reqoffset, reqlen; /* User-requested offset and length */ - unsigned int maxlen; /* Maxmimum permitted length */ + unsigned int maxlen; /* Maximum permitted length */ size_t sve_state_size; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index aacfc55de44c..eb194696ef62 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -23,7 +23,7 @@ #include <kvm/arm_hypercalls.h> #define CREATE_TRACE_POINTS -#include "trace.h" +#include "trace_handle_exit.h" typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index ea710f674cb6..8c9880783839 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -6,20 +6,10 @@ ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING \ $(DISABLE_STACKLEAK_PLUGIN) -KVM=../../../../virt/kvm +obj-$(CONFIG_KVM) += hyp.o -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/aarch32.o - -obj-$(CONFIG_KVM_ARM_HOST) += vgic-v2-cpuif-proxy.o -obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += entry.o -obj-$(CONFIG_KVM_ARM_HOST) += switch.o -obj-$(CONFIG_KVM_ARM_HOST) += fpsimd.o -obj-$(CONFIG_KVM_ARM_HOST) += tlb.o -obj-$(CONFIG_KVM_ARM_HOST) += hyp-entry.o +hyp-y := vgic-v3-sr.o timer-sr.o aarch32.o vgic-v2-cpuif-proxy.o sysreg-sr.o \ + debug-sr.o entry.o switch.o fpsimd.o tlb.o hyp-entry.o # KVM code is run at a different exception code with a different map, so # compiler instrumentation that inserts callbacks or checks into the code may diff --git a/arch/arm64/kvm/hyp/aarch32.c b/arch/arm64/kvm/hyp/aarch32.c new file mode 100644 index 000000000000..25c0e47d57cb --- /dev/null +++ b/arch/arm64/kvm/hyp/aarch32.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hyp portion of the (not much of an) Emulation layer for 32bit guests. + * + * Copyright (C) 2012,2013 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + * + * based on arch/arm/kvm/emulate.c + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> + +/* + * stolen from arch/arm/kernel/opcodes.c + * + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +/* + * Check if a trapped instruction should have been executed or not. + */ +bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) +{ + unsigned long cpsr; + u32 cpsr_cond; + int cond; + + /* Top two bits non-zero? Unconditional. */ + if (kvm_vcpu_get_hsr(vcpu) >> 30) + return true; + + /* Is condition field valid? */ + cond = kvm_vcpu_get_condition(vcpu); + if (cond == 0xE) + return true; + + cpsr = *vcpu_cpsr(vcpu); + + if (cond < 0) { + /* This can happen in Thumb mode: examine IT state. */ + unsigned long it; + + it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); + + /* it == 0 => unconditional. */ + if (it == 0) + return true; + + /* The cond for this insn works out as the top 4 bits. */ + cond = (it >> 4); + } + + cpsr_cond = cpsr >> 28; + + if (!((cc_map[cond] >> cpsr_cond) & 1)) + return false; + + return true; +} + +/** + * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block + * @vcpu: The VCPU pointer + * + * When exceptions occur while instructions are executed in Thumb IF-THEN + * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have + * to do this little bit of work manually. The fields map like this: + * + * IT[7:0] -> CPSR[26:25],CPSR[15:10] + */ +static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) +{ + unsigned long itbits, cond; + unsigned long cpsr = *vcpu_cpsr(vcpu); + bool is_arm = !(cpsr & PSR_AA32_T_BIT); + + if (is_arm || !(cpsr & PSR_AA32_IT_MASK)) + return; + + cond = (cpsr & 0xe000) >> 13; + itbits = (cpsr & 0x1c00) >> (10 - 2); + itbits |= (cpsr & (0x3 << 25)) >> 25; + + /* Perform ITAdvance (see page A2-52 in ARM DDI 0406C) */ + if ((itbits & 0x7) == 0) + itbits = cond = 0; + else + itbits = (itbits << 1) & 0x1f; + + cpsr &= ~PSR_AA32_IT_MASK; + cpsr |= cond << 13; + cpsr |= (itbits & 0x1c) << (10 - 2); + cpsr |= (itbits & 0x3) << 25; + *vcpu_cpsr(vcpu) = cpsr; +} + +/** + * kvm_skip_instr - skip a trapped instruction and proceed to the next + * @vcpu: The vcpu pointer + */ +void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + u32 pc = *vcpu_pc(vcpu); + bool is_thumb; + + is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_AA32_T_BIT); + if (is_thumb && !is_wide_instr) + pc += 2; + else + pc += 4; + + *vcpu_pc(vcpu) = pc; + + kvm_adjust_itstate(vcpu); +} diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 8a1e81a400e0..676b6585e5ae 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -138,7 +138,7 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) write_sysreg(val, cptr_el2); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; isb(); @@ -181,7 +181,7 @@ static void deactivate_traps_vhe(void) * above before we can switch to the EL2/EL0 translation regime used by * the host. */ - asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT_VHE)); + asm(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_SPECULATIVE_AT)); write_sysreg(CPACR_EL1_DEFAULT, cpacr_el1); write_sysreg(vectors, vbar_el1); @@ -192,7 +192,7 @@ static void __hyp_text __deactivate_traps_nvhe(void) { u64 mdcr_el2 = read_sysreg(mdcr_el2); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; /* @@ -270,8 +270,8 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_save_state(vcpu); - __vgic_v3_deactivate_traps(vcpu); + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_deactivate_traps(&vcpu->arch.vgic_cpu.vgic_v3); } } @@ -279,8 +279,8 @@ static void __hyp_text __hyp_vgic_save_state(struct kvm_vcpu *vcpu) static void __hyp_text __hyp_vgic_restore_state(struct kvm_vcpu *vcpu) { if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) { - __vgic_v3_activate_traps(vcpu); - __vgic_v3_restore_state(vcpu); + __vgic_v3_activate_traps(&vcpu->arch.vgic_cpu.vgic_v3); + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); } } diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 6d2df9fe0b5d..ea5d22fbdacf 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -107,7 +107,8 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - if (!cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (has_vhe() || + !cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); } else if (!ctxt->__hyp_running_vcpu) { @@ -138,7 +139,8 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE) && + if (!has_vhe() && + cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT) && ctxt->__hyp_running_vcpu) { /* * Must only be done for host registers, hence the context diff --git a/arch/arm64/kvm/hyp/timer-sr.c b/arch/arm64/kvm/hyp/timer-sr.c new file mode 100644 index 000000000000..fb5c0be33223 --- /dev/null +++ b/arch/arm64/kvm/hyp/timer-sr.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <clocksource/arm_arch_timer.h> +#include <linux/compiler.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_hyp.h> + +void __hyp_text __kvm_timer_set_cntvoff(u64 cntvoff) +{ + write_sysreg(cntvoff, cntvoff_el2); +} + +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ +void __hyp_text __timer_disable_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* Allow physical timer/counter access for the host */ + val = read_sysreg(cnthctl_el2); + val |= CNTHCTL_EL1PCTEN | CNTHCTL_EL1PCEN; + write_sysreg(val, cnthctl_el2); +} + +/* + * Should only be called on non-VHE systems. + * VHE systems use EL2 timers and configure EL1 timers in kvm_timer_init_vhe(). + */ +void __hyp_text __timer_enable_traps(struct kvm_vcpu *vcpu) +{ + u64 val; + + /* + * Disallow physical timer access for the guest + * Physical counter access is allowed + */ + val = read_sysreg(cnthctl_el2); + val &= ~CNTHCTL_EL1PCEN; + val |= CNTHCTL_EL1PCTEN; + write_sysreg(val, cnthctl_el2); +} diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index ceaddbe4279f..d063a576d511 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -23,7 +23,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, local_irq_save(cxt->flags); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* * For CPUs that are affected by ARM errata 1165522 or 1530923, * we cannot trust stage-1 to be in a correct state at that @@ -63,7 +63,7 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, struct tlb_inv_context *cxt) { - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { u64 val; /* @@ -79,8 +79,9 @@ static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, isb(); } + /* __load_guest_stage2() includes an ISB for the workaround. */ __load_guest_stage2(kvm); - isb(); + asm(ALTERNATIVE("isb", "nop", ARM64_WORKAROUND_SPECULATIVE_AT)); } static void __hyp_text __tlb_switch_to_guest(struct kvm *kvm, @@ -103,7 +104,7 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm, write_sysreg(HCR_HOST_VHE_FLAGS, hcr_el2); isb(); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_VHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* Restore the registers to what they were */ write_sysreg_el1(cxt->tcr, SYS_TCR); write_sysreg_el1(cxt->sctlr, SYS_SCTLR); @@ -117,7 +118,7 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, { write_sysreg(0, vttbr_el2); - if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT_NVHE)) { + if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { /* Ensure write of the host VMID */ isb(); /* Restore the host's TCR_EL1 */ diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c new file mode 100644 index 000000000000..10ed539835c1 --- /dev/null +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c @@ -0,0 +1,1113 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012-2015 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/compiler.h> +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> + +#define vtr_to_max_lr_idx(v) ((v) & 0xf) +#define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) +#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) + +static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) +{ + switch (lr & 0xf) { + case 0: + return read_gicreg(ICH_LR0_EL2); + case 1: + return read_gicreg(ICH_LR1_EL2); + case 2: + return read_gicreg(ICH_LR2_EL2); + case 3: + return read_gicreg(ICH_LR3_EL2); + case 4: + return read_gicreg(ICH_LR4_EL2); + case 5: + return read_gicreg(ICH_LR5_EL2); + case 6: + return read_gicreg(ICH_LR6_EL2); + case 7: + return read_gicreg(ICH_LR7_EL2); + case 8: + return read_gicreg(ICH_LR8_EL2); + case 9: + return read_gicreg(ICH_LR9_EL2); + case 10: + return read_gicreg(ICH_LR10_EL2); + case 11: + return read_gicreg(ICH_LR11_EL2); + case 12: + return read_gicreg(ICH_LR12_EL2); + case 13: + return read_gicreg(ICH_LR13_EL2); + case 14: + return read_gicreg(ICH_LR14_EL2); + case 15: + return read_gicreg(ICH_LR15_EL2); + } + + unreachable(); +} + +static void __hyp_text __gic_v3_set_lr(u64 val, int lr) +{ + switch (lr & 0xf) { + case 0: + write_gicreg(val, ICH_LR0_EL2); + break; + case 1: + write_gicreg(val, ICH_LR1_EL2); + break; + case 2: + write_gicreg(val, ICH_LR2_EL2); + break; + case 3: + write_gicreg(val, ICH_LR3_EL2); + break; + case 4: + write_gicreg(val, ICH_LR4_EL2); + break; + case 5: + write_gicreg(val, ICH_LR5_EL2); + break; + case 6: + write_gicreg(val, ICH_LR6_EL2); + break; + case 7: + write_gicreg(val, ICH_LR7_EL2); + break; + case 8: + write_gicreg(val, ICH_LR8_EL2); + break; + case 9: + write_gicreg(val, ICH_LR9_EL2); + break; + case 10: + write_gicreg(val, ICH_LR10_EL2); + break; + case 11: + write_gicreg(val, ICH_LR11_EL2); + break; + case 12: + write_gicreg(val, ICH_LR12_EL2); + break; + case 13: + write_gicreg(val, ICH_LR13_EL2); + break; + case 14: + write_gicreg(val, ICH_LR14_EL2); + break; + case 15: + write_gicreg(val, ICH_LR15_EL2); + break; + } +} + +static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) +{ + switch (n) { + case 0: + write_gicreg(val, ICH_AP0R0_EL2); + break; + case 1: + write_gicreg(val, ICH_AP0R1_EL2); + break; + case 2: + write_gicreg(val, ICH_AP0R2_EL2); + break; + case 3: + write_gicreg(val, ICH_AP0R3_EL2); + break; + } +} + +static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) +{ + switch (n) { + case 0: + write_gicreg(val, ICH_AP1R0_EL2); + break; + case 1: + write_gicreg(val, ICH_AP1R1_EL2); + break; + case 2: + write_gicreg(val, ICH_AP1R2_EL2); + break; + case 3: + write_gicreg(val, ICH_AP1R3_EL2); + break; + } +} + +static u32 __hyp_text __vgic_v3_read_ap0rn(int n) +{ + u32 val; + + switch (n) { + case 0: + val = read_gicreg(ICH_AP0R0_EL2); + break; + case 1: + val = read_gicreg(ICH_AP0R1_EL2); + break; + case 2: + val = read_gicreg(ICH_AP0R2_EL2); + break; + case 3: + val = read_gicreg(ICH_AP0R3_EL2); + break; + default: + unreachable(); + } + + return val; +} + +static u32 __hyp_text __vgic_v3_read_ap1rn(int n) +{ + u32 val; + + switch (n) { + case 0: + val = read_gicreg(ICH_AP1R0_EL2); + break; + case 1: + val = read_gicreg(ICH_AP1R1_EL2); + break; + case 2: + val = read_gicreg(ICH_AP1R2_EL2); + break; + case 3: + val = read_gicreg(ICH_AP1R3_EL2); + break; + default: + unreachable(); + } + + return val; +} + +void __hyp_text __vgic_v3_save_state(struct vgic_v3_cpu_if *cpu_if) +{ + u64 used_lrs = cpu_if->used_lrs; + + /* + * Make sure stores to the GIC via the memory mapped interface + * are now visible to the system register interface when reading the + * LRs, and when reading back the VMCR on non-VHE systems. + */ + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) { + dsb(sy); + isb(); + } + } + + if (used_lrs || cpu_if->its_vpe.its_vm) { + int i; + u32 elrsr; + + elrsr = read_gicreg(ICH_ELRSR_EL2); + + write_gicreg(cpu_if->vgic_hcr & ~ICH_HCR_EN, ICH_HCR_EL2); + + for (i = 0; i < used_lrs; i++) { + if (elrsr & (1 << i)) + cpu_if->vgic_lr[i] &= ~ICH_LR_STATE; + else + cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); + + __gic_v3_set_lr(0, i); + } + } +} + +void __hyp_text __vgic_v3_restore_state(struct vgic_v3_cpu_if *cpu_if) +{ + u64 used_lrs = cpu_if->used_lrs; + int i; + + if (used_lrs || cpu_if->its_vpe.its_vm) { + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); + + for (i = 0; i < used_lrs; i++) + __gic_v3_set_lr(cpu_if->vgic_lr[i], i); + } + + /* + * Ensure that writes to the LRs, and on non-VHE systems ensure that + * the write to the VMCR in __vgic_v3_activate_traps(), will have + * reached the (re)distributors. This ensure the guest will read the + * correct values from the memory-mapped interface. + */ + if (used_lrs || !has_vhe()) { + if (!cpu_if->vgic_sre) { + isb(); + dsb(sy); + } + } +} + +void __hyp_text __vgic_v3_activate_traps(struct vgic_v3_cpu_if *cpu_if) +{ + /* + * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a + * Group0 interrupt (as generated in GICv2 mode) to be + * delivered as a FIQ to the guest, with potentially fatal + * consequences. So we must make sure that ICC_SRE_EL1 has + * been actually programmed with the value we want before + * starting to mess with the rest of the GIC, and VMCR_EL2 in + * particular. This logic must be called before + * __vgic_v3_restore_state(). + */ + if (!cpu_if->vgic_sre) { + write_gicreg(0, ICC_SRE_EL1); + isb(); + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); + + + if (has_vhe()) { + /* + * Ensure that the write to the VMCR will have reached + * the (re)distributors. This ensure the guest will + * read the correct values from the memory-mapped + * interface. + */ + isb(); + dsb(sy); + } + } + + /* + * Prevent the guest from touching the GIC system registers if + * SRE isn't enabled for GICv3 emulation. + */ + write_gicreg(read_gicreg(ICC_SRE_EL2) & ~ICC_SRE_EL2_ENABLE, + ICC_SRE_EL2); + + /* + * If we need to trap system registers, we must write + * ICH_HCR_EL2 anyway, even if no interrupts are being + * injected, + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); +} + +void __hyp_text __vgic_v3_deactivate_traps(struct vgic_v3_cpu_if *cpu_if) +{ + u64 val; + + if (!cpu_if->vgic_sre) { + cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); + } + + val = read_gicreg(ICC_SRE_EL2); + write_gicreg(val | ICC_SRE_EL2_ENABLE, ICC_SRE_EL2); + + if (!cpu_if->vgic_sre) { + /* Make sure ENABLE is set at EL2 before setting SRE at EL1 */ + isb(); + write_gicreg(1, ICC_SRE_EL1); + } + + /* + * If we were trapping system registers, we enabled the VGIC even if + * no interrupts were being injected, and we disable it again here. + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap) || + cpu_if->its_vpe.its_vm) + write_gicreg(0, ICH_HCR_EL2); +} + +void __hyp_text __vgic_v3_save_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + u64 val; + u32 nr_pre_bits; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); + cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); + /* Fall through */ + case 6: + cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); + /* Fall through */ + default: + cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); + } + + switch (nr_pre_bits) { + case 7: + cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); + cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); + /* Fall through */ + case 6: + cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); + /* Fall through */ + default: + cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); + } +} + +void __hyp_text __vgic_v3_restore_aprs(struct vgic_v3_cpu_if *cpu_if) +{ + u64 val; + u32 nr_pre_bits; + + val = read_gicreg(ICH_VTR_EL2); + nr_pre_bits = vtr_to_nr_pre_bits(val); + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); + /* Fall through */ + case 6: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); + /* Fall through */ + default: + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); + } + + switch (nr_pre_bits) { + case 7: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); + /* Fall through */ + case 6: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); + /* Fall through */ + default: + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); + } +} + +void __hyp_text __vgic_v3_init_lrs(void) +{ + int max_lr_idx = vtr_to_max_lr_idx(read_gicreg(ICH_VTR_EL2)); + int i; + + for (i = 0; i <= max_lr_idx; i++) + __gic_v3_set_lr(0, i); +} + +u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void) +{ + return read_gicreg(ICH_VTR_EL2); +} + +u64 __hyp_text __vgic_v3_read_vmcr(void) +{ + return read_gicreg(ICH_VMCR_EL2); +} + +void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) +{ + write_gicreg(vmcr, ICH_VMCR_EL2); +} + +static int __hyp_text __vgic_v3_bpr_min(void) +{ + /* See Pseudocode for VPriorityGroup */ + return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); +} + +static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; + + return crm != 8; +} + +#define GICv3_IDLE_PRIORITY 0xff + +static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, + u32 vmcr, + u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; + u8 priority = GICv3_IDLE_PRIORITY; + int i, lr = -1; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + u8 lr_prio = (val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* Not pending in the state? */ + if ((val & ICH_LR_STATE) != ICH_LR_PENDING_BIT) + continue; + + /* Group-0 interrupt, but Group-0 disabled? */ + if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK)) + continue; + + /* Group-1 interrupt, but Group-1 disabled? */ + if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK)) + continue; + + /* Not the highest priority? */ + if (lr_prio >= priority) + continue; + + /* This is a candidate */ + priority = lr_prio; + *lr_val = val; + lr = i; + } + + if (lr == -1) + *lr_val = ICC_IAR1_EL1_SPURIOUS; + + return lr; +} + +static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, + int intid, u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; + int i; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + + if ((val & ICH_LR_VIRTUAL_ID_MASK) == intid && + (val & ICH_LR_ACTIVE_BIT)) { + *lr_val = val; + return i; + } + } + + *lr_val = ICC_IAR1_EL1_SPURIOUS; + return -1; +} + +static int __hyp_text __vgic_v3_get_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 val; + + /* + * The ICH_AP0Rn_EL2 and ICH_AP1Rn_EL2 registers + * contain the active priority levels for this VCPU + * for the maximum number of supported priority + * levels, and we return the full priority level only + * if the BPR is programmed to its minimum, otherwise + * we return a combination of the priority level and + * subpriority, as determined by the setting of the + * BPR, but without the full subpriority. + */ + val = __vgic_v3_read_ap0rn(i); + val |= __vgic_v3_read_ap1rn(i); + if (!val) { + hap += 32; + continue; + } + + return (hap + __ffs(val)) << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + +static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) +{ + return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; +} + +static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) +{ + unsigned int bpr; + + if (vmcr & ICH_VMCR_CBPR_MASK) { + bpr = __vgic_v3_get_bpr0(vmcr); + if (bpr < 7) + bpr++; + } else { + bpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + } + + return bpr; +} + +/* + * Convert a priority to a preemption level, taking the relevant BPR + * into account by zeroing the sub-priority bits. + */ +static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) +{ + unsigned int bpr; + + if (!grp) + bpr = __vgic_v3_get_bpr0(vmcr) + 1; + else + bpr = __vgic_v3_get_bpr1(vmcr); + + return pri & (GENMASK(7, 0) << bpr); +} + +/* + * The priority value is independent of any of the BPR values, so we + * normalize it using the minimal BPR value. This guarantees that no + * matter what the guest does with its BPR, we can always set/get the + * same value of a priority. + */ +static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) +{ + u8 pre, ap; + u32 val; + int apr; + + pre = __vgic_v3_pri_to_pre(pri, vmcr, grp); + ap = pre >> __vgic_v3_bpr_min(); + apr = ap / 32; + + if (!grp) { + val = __vgic_v3_read_ap0rn(apr); + __vgic_v3_write_ap0rn(val | BIT(ap % 32), apr); + } else { + val = __vgic_v3_read_ap1rn(apr); + __vgic_v3_write_ap1rn(val | BIT(ap % 32), apr); + } +} + +static int __hyp_text __vgic_v3_clear_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 ap0, ap1; + int c0, c1; + + ap0 = __vgic_v3_read_ap0rn(i); + ap1 = __vgic_v3_read_ap1rn(i); + if (!ap0 && !ap1) { + hap += 32; + continue; + } + + c0 = ap0 ? __ffs(ap0) : 32; + c1 = ap1 ? __ffs(ap1) : 32; + + /* Always clear the LSB, which is the highest priority */ + if (c0 < c1) { + ap0 &= ~BIT(c0); + __vgic_v3_write_ap0rn(ap0, i); + hap += c0; + } else { + ap1 &= ~BIT(c1); + __vgic_v3_write_ap1rn(ap1, i); + hap += c1; + } + + /* Rescale to 8 bits of priority */ + return hap << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + +static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 lr_val; + u8 lr_prio, pmr; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); + if (lr < 0) + goto spurious; + + if (grp != !!(lr_val & ICH_LR_GROUP)) + goto spurious; + + pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + if (pmr <= lr_prio) + goto spurious; + + if (__vgic_v3_get_highest_active_priority() <= __vgic_v3_pri_to_pre(lr_prio, vmcr, grp)) + goto spurious; + + lr_val &= ~ICH_LR_STATE; + /* No active state for LPIs */ + if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI) + lr_val |= ICH_LR_ACTIVE_BIT; + __gic_v3_set_lr(lr_val, lr); + __vgic_v3_set_active_priority(lr_prio, vmcr, grp); + vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); + return; + +spurious: + vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); +} + +static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) +{ + lr_val &= ~ICH_LR_ACTIVE_BIT; + if (lr_val & ICH_LR_HW) { + u32 pid; + + pid = (lr_val & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT; + gic_write_dir(pid); + } + + __gic_v3_set_lr(lr_val, lr); +} + +static void __hyp_text __vgic_v3_bump_eoicount(void) +{ + u32 hcr; + + hcr = read_gicreg(ICH_HCR_EL2); + hcr += 1 << ICH_HCR_EOIcount_SHIFT; + write_gicreg(hcr, ICH_HCR_EL2); +} + +static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 vid = vcpu_get_reg(vcpu, rt); + u64 lr_val; + int lr; + + /* EOImode == 0, nothing to be done here */ + if (!(vmcr & ICH_VMCR_EOIM_MASK)) + return; + + /* No deactivate to be performed on an LPI */ + if (vid >= VGIC_MIN_LPI) + return; + + lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); + if (lr == -1) { + __vgic_v3_bump_eoicount(); + return; + } + + __vgic_v3_clear_active_lr(lr, lr_val); +} + +static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u32 vid = vcpu_get_reg(vcpu, rt); + u64 lr_val; + u8 lr_prio, act_prio; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + /* Drop priority in any case */ + act_prio = __vgic_v3_clear_highest_active_priority(); + + /* If EOIing an LPI, no deactivate to be performed */ + if (vid >= VGIC_MIN_LPI) + return; + + /* EOImode == 1, nothing to be done here */ + if (vmcr & ICH_VMCR_EOIM_MASK) + return; + + lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); + if (lr == -1) { + __vgic_v3_bump_eoicount(); + return; + } + + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* If priorities or group do not match, the guest has fscked-up. */ + if (grp != !!(lr_val & ICH_LR_GROUP) || + __vgic_v3_pri_to_pre(lr_prio, vmcr, grp) != act_prio) + return; + + /* Let's now perform the deactivation */ + __vgic_v3_clear_active_lr(lr, lr_val); +} + +static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); +} + +static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); +} + +static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + + if (val & 1) + vmcr |= ICH_VMCR_ENG0_MASK; + else + vmcr &= ~ICH_VMCR_ENG0_MASK; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + + if (val & 1) + vmcr |= ICH_VMCR_ENG1_MASK; + else + vmcr &= ~ICH_VMCR_ENG1_MASK; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr)); +} + +static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); +} + +static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + u8 bpr_min = __vgic_v3_bpr_min() - 1; + + /* Enforce BPR limiting */ + if (val < bpr_min) + val = bpr_min; + + val <<= ICH_VMCR_BPR0_SHIFT; + val &= ICH_VMCR_BPR0_MASK; + vmcr &= ~ICH_VMCR_BPR0_MASK; + vmcr |= val; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + u8 bpr_min = __vgic_v3_bpr_min(); + + if (vmcr & ICH_VMCR_CBPR_MASK) + return; + + /* Enforce BPR limiting */ + if (val < bpr_min) + val = bpr_min; + + val <<= ICH_VMCR_BPR1_SHIFT; + val &= ICH_VMCR_BPR1_MASK; + vmcr &= ~ICH_VMCR_BPR1_MASK; + vmcr |= val; + + __vgic_v3_write_vmcr(vmcr); +} + +static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +{ + u32 val; + + if (!__vgic_v3_get_group(vcpu)) + val = __vgic_v3_read_ap0rn(n); + else + val = __vgic_v3_read_ap1rn(n); + + vcpu_set_reg(vcpu, rt, val); +} + +static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + if (!__vgic_v3_get_group(vcpu)) + __vgic_v3_write_ap0rn(val, n); + else + __vgic_v3_write_ap1rn(val, n); +} + +static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 0); +} + +static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 1); +} + +static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 2); +} + +static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 3); +} + +static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 0); +} + +static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 1); +} + +static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 2); +} + +static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 3); +} + +static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u64 lr_val; + int lr, lr_grp, grp; + + grp = __vgic_v3_get_group(vcpu); + + lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); + if (lr == -1) + goto spurious; + + lr_grp = !!(lr_val & ICH_LR_GROUP); + if (lr_grp != grp) + lr_val = ICC_IAR1_EL1_SPURIOUS; + +spurious: + vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); +} + +static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + vmcr &= ICH_VMCR_PMR_MASK; + vmcr >>= ICH_VMCR_PMR_SHIFT; + vcpu_set_reg(vcpu, rt, vmcr); +} + +static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + val <<= ICH_VMCR_PMR_SHIFT; + val &= ICH_VMCR_PMR_MASK; + vmcr &= ~ICH_VMCR_PMR_MASK; + vmcr |= val; + + write_gicreg(vmcr, ICH_VMCR_EL2); +} + +static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = __vgic_v3_get_highest_active_priority(); + vcpu_set_reg(vcpu, rt, val); +} + +static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 vtr, val; + + vtr = read_gicreg(ICH_VTR_EL2); + /* PRIbits */ + val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; + /* IDbits */ + val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; + /* SEIS */ + val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT; + /* A3V */ + val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; + /* EOImode */ + val |= ((vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT) << ICC_CTLR_EL1_EOImode_SHIFT; + /* CBPR */ + val |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + + vcpu_set_reg(vcpu, rt, val); +} + +static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + if (val & ICC_CTLR_EL1_CBPR_MASK) + vmcr |= ICH_VMCR_CBPR_MASK; + else + vmcr &= ~ICH_VMCR_CBPR_MASK; + + if (val & ICC_CTLR_EL1_EOImode_MASK) + vmcr |= ICH_VMCR_EOIM_MASK; + else + vmcr &= ~ICH_VMCR_EOIM_MASK; + + write_gicreg(vmcr, ICH_VMCR_EL2); +} + +int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) +{ + int rt; + u32 esr; + u32 vmcr; + void (*fn)(struct kvm_vcpu *, u32, int); + bool is_read; + u32 sysreg; + + esr = kvm_vcpu_get_hsr(vcpu); + if (vcpu_mode_is_32bit(vcpu)) { + if (!kvm_condition_valid(vcpu)) { + __kvm_skip_instr(vcpu); + return 1; + } + + sysreg = esr_cp15_to_sysreg(esr); + } else { + sysreg = esr_sys64_to_sysreg(esr); + } + + is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; + + switch (sysreg) { + case SYS_ICC_IAR0_EL1: + case SYS_ICC_IAR1_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_iar; + break; + case SYS_ICC_EOIR0_EL1: + case SYS_ICC_EOIR1_EL1: + if (unlikely(is_read)) + return 0; + fn = __vgic_v3_write_eoir; + break; + case SYS_ICC_IGRPEN1_EL1: + if (is_read) + fn = __vgic_v3_read_igrpen1; + else + fn = __vgic_v3_write_igrpen1; + break; + case SYS_ICC_BPR1_EL1: + if (is_read) + fn = __vgic_v3_read_bpr1; + else + fn = __vgic_v3_write_bpr1; + break; + case SYS_ICC_AP0Rn_EL1(0): + case SYS_ICC_AP1Rn_EL1(0): + if (is_read) + fn = __vgic_v3_read_apxr0; + else + fn = __vgic_v3_write_apxr0; + break; + case SYS_ICC_AP0Rn_EL1(1): + case SYS_ICC_AP1Rn_EL1(1): + if (is_read) + fn = __vgic_v3_read_apxr1; + else + fn = __vgic_v3_write_apxr1; + break; + case SYS_ICC_AP0Rn_EL1(2): + case SYS_ICC_AP1Rn_EL1(2): + if (is_read) + fn = __vgic_v3_read_apxr2; + else + fn = __vgic_v3_write_apxr2; + break; + case SYS_ICC_AP0Rn_EL1(3): + case SYS_ICC_AP1Rn_EL1(3): + if (is_read) + fn = __vgic_v3_read_apxr3; + else + fn = __vgic_v3_write_apxr3; + break; + case SYS_ICC_HPPIR0_EL1: + case SYS_ICC_HPPIR1_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_hppir; + break; + case SYS_ICC_IGRPEN0_EL1: + if (is_read) + fn = __vgic_v3_read_igrpen0; + else + fn = __vgic_v3_write_igrpen0; + break; + case SYS_ICC_BPR0_EL1: + if (is_read) + fn = __vgic_v3_read_bpr0; + else + fn = __vgic_v3_write_bpr0; + break; + case SYS_ICC_DIR_EL1: + if (unlikely(is_read)) + return 0; + fn = __vgic_v3_write_dir; + break; + case SYS_ICC_RPR_EL1: + if (unlikely(!is_read)) + return 0; + fn = __vgic_v3_read_rpr; + break; + case SYS_ICC_CTLR_EL1: + if (is_read) + fn = __vgic_v3_read_ctlr; + else + fn = __vgic_v3_write_ctlr; + break; + case SYS_ICC_PMR_EL1: + if (is_read) + fn = __vgic_v3_read_pmr; + else + fn = __vgic_v3_write_pmr; + break; + default: + return 0; + } + + vmcr = __vgic_v3_read_vmcr(); + rt = kvm_vcpu_sys_get_rt(vcpu); + fn(vcpu, vmcr, rt); + + __kvm_skip_instr(vcpu); + + return 1; +} diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c new file mode 100644 index 000000000000..550dfa3e53cd --- /dev/null +++ b/arch/arm64/kvm/hypercalls.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Arm Ltd. + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +#include <kvm/arm_hypercalls.h> +#include <kvm/arm_psci.h> + +int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) +{ + u32 func_id = smccc_get_function(vcpu); + long val = SMCCC_RET_NOT_SUPPORTED; + u32 feature; + gpa_t gpa; + + switch (func_id) { + case ARM_SMCCC_VERSION_FUNC_ID: + val = ARM_SMCCC_VERSION_1_1; + break; + case ARM_SMCCC_ARCH_FEATURES_FUNC_ID: + feature = smccc_get_arg1(vcpu); + switch (feature) { + case ARM_SMCCC_ARCH_WORKAROUND_1: + switch (kvm_arm_harden_branch_predictor()) { + case KVM_BP_HARDEN_UNKNOWN: + break; + case KVM_BP_HARDEN_WA_NEEDED: + val = SMCCC_RET_SUCCESS; + break; + case KVM_BP_HARDEN_NOT_REQUIRED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } + break; + case ARM_SMCCC_ARCH_WORKAROUND_2: + switch (kvm_arm_have_ssbd()) { + case KVM_SSBD_FORCE_DISABLE: + case KVM_SSBD_UNKNOWN: + break; + case KVM_SSBD_KERNEL: + val = SMCCC_RET_SUCCESS; + break; + case KVM_SSBD_FORCE_ENABLE: + case KVM_SSBD_MITIGATED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } + break; + case ARM_SMCCC_HV_PV_TIME_FEATURES: + val = SMCCC_RET_SUCCESS; + break; + } + break; + case ARM_SMCCC_HV_PV_TIME_FEATURES: + val = kvm_hypercall_pv_features(vcpu); + break; + case ARM_SMCCC_HV_PV_TIME_ST: + gpa = kvm_init_stolen_time(vcpu); + if (gpa != GPA_INVALID) + val = gpa; + break; + default: + return kvm_psci_call(vcpu); + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; +} diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 6aafc2825c1c..e21fdd93027a 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -26,28 +26,12 @@ enum exception_type { except_type_serror = 0x180, }; -static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) -{ - u64 exc_offset; - - switch (*vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT)) { - case PSR_MODE_EL1t: - exc_offset = CURRENT_EL_SP_EL0_VECTOR; - break; - case PSR_MODE_EL1h: - exc_offset = CURRENT_EL_SP_ELx_VECTOR; - break; - case PSR_MODE_EL0t: - exc_offset = LOWER_EL_AArch64_VECTOR; - break; - default: - exc_offset = LOWER_EL_AArch32_VECTOR; - } - - return vcpu_read_sys_reg(vcpu, VBAR_EL1) + exc_offset + type; -} - /* + * This performs the exception entry at a given EL (@target_mode), stashing PC + * and PSTATE into ELR and SPSR respectively, and compute the new PC/PSTATE. + * The EL passed to this function *must* be a non-secure, privileged mode with + * bit 0 being set (PSTATE.SP == 1). + * * When an exception is taken, most PSTATE fields are left unchanged in the * handler. However, some are explicitly overridden (e.g. M[4:0]). Luckily all * of the inherited bits have the same position in the AArch64/AArch32 SPSR_ELx @@ -59,10 +43,35 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) * Here we manipulate the fields in order of the AArch64 SPSR_ELx layout, from * MSB to LSB. */ -static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) +static void enter_exception64(struct kvm_vcpu *vcpu, unsigned long target_mode, + enum exception_type type) { - unsigned long sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); - unsigned long old, new; + unsigned long sctlr, vbar, old, new, mode; + u64 exc_offset; + + mode = *vcpu_cpsr(vcpu) & (PSR_MODE_MASK | PSR_MODE32_BIT); + + if (mode == target_mode) + exc_offset = CURRENT_EL_SP_ELx_VECTOR; + else if ((mode | PSR_MODE_THREAD_BIT) == target_mode) + exc_offset = CURRENT_EL_SP_EL0_VECTOR; + else if (!(mode & PSR_MODE32_BIT)) + exc_offset = LOWER_EL_AArch64_VECTOR; + else + exc_offset = LOWER_EL_AArch32_VECTOR; + + switch (target_mode) { + case PSR_MODE_EL1h: + vbar = vcpu_read_sys_reg(vcpu, VBAR_EL1); + sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); + vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); + break; + default: + /* Don't do that */ + BUG(); + } + + *vcpu_pc(vcpu) = vbar + exc_offset + type; old = *vcpu_cpsr(vcpu); new = 0; @@ -105,9 +114,10 @@ static unsigned long get_except64_pstate(struct kvm_vcpu *vcpu) new |= PSR_I_BIT; new |= PSR_F_BIT; - new |= PSR_MODE_EL1h; + new |= target_mode; - return new; + *vcpu_cpsr(vcpu) = new; + vcpu_write_spsr(vcpu, old); } static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) @@ -116,11 +126,7 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - - *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); - vcpu_write_spsr(vcpu, cpsr); + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); vcpu_write_sys_reg(vcpu, addr, FAR_EL1); @@ -148,14 +154,9 @@ static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr static void inject_undef64(struct kvm_vcpu *vcpu) { - unsigned long cpsr = *vcpu_cpsr(vcpu); u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - vcpu_write_elr_el1(vcpu, *vcpu_pc(vcpu)); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); - - *vcpu_cpsr(vcpu) = get_except64_pstate(vcpu); - vcpu_write_spsr(vcpu, cpsr); + enter_exception64(vcpu, PSR_MODE_EL1h, except_type_sync); /* * Build an unknown exception, depending on the instruction diff --git a/arch/arm64/kvm/mmio.c b/arch/arm64/kvm/mmio.c new file mode 100644 index 000000000000..4e0366759726 --- /dev/null +++ b/arch/arm64/kvm/mmio.c @@ -0,0 +1,200 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/kvm_host.h> +#include <asm/kvm_emulate.h> +#include <trace/events/kvm.h> + +#include "trace.h" + +void kvm_mmio_write_buf(void *buf, unsigned int len, unsigned long data) +{ + void *datap = NULL; + union { + u8 byte; + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + tmp.byte = data; + datap = &tmp.byte; + break; + case 2: + tmp.hword = data; + datap = &tmp.hword; + break; + case 4: + tmp.word = data; + datap = &tmp.word; + break; + case 8: + tmp.dword = data; + datap = &tmp.dword; + break; + } + + memcpy(buf, datap, len); +} + +unsigned long kvm_mmio_read_buf(const void *buf, unsigned int len) +{ + unsigned long data = 0; + union { + u16 hword; + u32 word; + u64 dword; + } tmp; + + switch (len) { + case 1: + data = *(u8 *)buf; + break; + case 2: + memcpy(&tmp.hword, buf, len); + data = tmp.hword; + break; + case 4: + memcpy(&tmp.word, buf, len); + data = tmp.word; + break; + case 8: + memcpy(&tmp.dword, buf, len); + data = tmp.dword; + break; + } + + return data; +} + +/** + * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation + * or in-kernel IO emulation + * + * @vcpu: The VCPU pointer + * @run: The VCPU run struct containing the mmio data + */ +int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + unsigned long data; + unsigned int len; + int mask; + + /* Detect an already handled MMIO return */ + if (unlikely(!vcpu->mmio_needed)) + return 0; + + vcpu->mmio_needed = 0; + + if (!kvm_vcpu_dabt_iswrite(vcpu)) { + len = kvm_vcpu_dabt_get_as(vcpu); + data = kvm_mmio_read_buf(run->mmio.data, len); + + if (kvm_vcpu_dabt_issext(vcpu) && + len < sizeof(unsigned long)) { + mask = 1U << ((len * 8) - 1); + data = (data ^ mask) - mask; + } + + if (!kvm_vcpu_dabt_issf(vcpu)) + data = data & 0xffffffff; + + trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr, + &data); + data = vcpu_data_host_to_guest(vcpu, data, len); + vcpu_set_reg(vcpu, kvm_vcpu_dabt_get_rd(vcpu), data); + } + + /* + * The MMIO instruction is emulated and should not be re-executed + * in the guest. + */ + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + + return 0; +} + +int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run, + phys_addr_t fault_ipa) +{ + unsigned long data; + unsigned long rt; + int ret; + bool is_write; + int len; + u8 data_buf[8]; + + /* + * No valid syndrome? Ask userspace for help if it has + * volunteered to do so, and bail out otherwise. + */ + if (!kvm_vcpu_dabt_isvalid(vcpu)) { + if (vcpu->kvm->arch.return_nisv_io_abort_to_user) { + run->exit_reason = KVM_EXIT_ARM_NISV; + run->arm_nisv.esr_iss = kvm_vcpu_dabt_iss_nisv_sanitized(vcpu); + run->arm_nisv.fault_ipa = fault_ipa; + return 0; + } + + kvm_pr_unimpl("Data abort outside memslots with no valid syndrome info\n"); + return -ENOSYS; + } + + /* Page table accesses IO mem: tell guest to fix its TTBR */ + if (kvm_vcpu_dabt_iss1tw(vcpu)) { + kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + return 1; + } + + /* + * Prepare MMIO operation. First decode the syndrome data we get + * from the CPU. Then try if some in-kernel emulation feels + * responsible, otherwise let user space do its magic. + */ + is_write = kvm_vcpu_dabt_iswrite(vcpu); + len = kvm_vcpu_dabt_get_as(vcpu); + rt = kvm_vcpu_dabt_get_rd(vcpu); + + if (is_write) { + data = vcpu_data_guest_to_host(vcpu, vcpu_get_reg(vcpu, rt), + len); + + trace_kvm_mmio(KVM_TRACE_MMIO_WRITE, len, fault_ipa, &data); + kvm_mmio_write_buf(data_buf, len, data); + + ret = kvm_io_bus_write(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); + } else { + trace_kvm_mmio(KVM_TRACE_MMIO_READ_UNSATISFIED, len, + fault_ipa, NULL); + + ret = kvm_io_bus_read(vcpu, KVM_MMIO_BUS, fault_ipa, len, + data_buf); + } + + /* Now prepare kvm_run for the potential return to userland. */ + run->mmio.is_write = is_write; + run->mmio.phys_addr = fault_ipa; + run->mmio.len = len; + vcpu->mmio_needed = 1; + + if (!ret) { + /* We handled the access successfully in the kernel. */ + if (!is_write) + memcpy(run->mmio.data, data_buf, len); + vcpu->stat.mmio_exit_kernel++; + kvm_handle_mmio_return(vcpu, run); + return 1; + } + + if (is_write) + memcpy(run->mmio.data, data_buf, len); + vcpu->stat.mmio_exit_user++; + run->exit_reason = KVM_EXIT_MMIO; + return 0; +} diff --git a/arch/arm64/kvm/mmu.c b/arch/arm64/kvm/mmu.c new file mode 100644 index 000000000000..a1f6bc70c4e4 --- /dev/null +++ b/arch/arm64/kvm/mmu.c @@ -0,0 +1,2467 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 - Virtual Open Systems and Columbia University + * Author: Christoffer Dall <c.dall@virtualopensystems.com> + */ + +#include <linux/mman.h> +#include <linux/kvm_host.h> +#include <linux/io.h> +#include <linux/hugetlb.h> +#include <linux/sched/signal.h> +#include <trace/events/kvm.h> +#include <asm/pgalloc.h> +#include <asm/cacheflush.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_ras.h> +#include <asm/kvm_asm.h> +#include <asm/kvm_emulate.h> +#include <asm/virt.h> + +#include "trace.h" + +static pgd_t *boot_hyp_pgd; +static pgd_t *hyp_pgd; +static pgd_t *merged_hyp_pgd; +static DEFINE_MUTEX(kvm_hyp_pgd_mutex); + +static unsigned long hyp_idmap_start; +static unsigned long hyp_idmap_end; +static phys_addr_t hyp_idmap_vector; + +static unsigned long io_map_base; + +#define hyp_pgd_order get_order(PTRS_PER_PGD * sizeof(pgd_t)) + +#define KVM_S2PTE_FLAG_IS_IOMAP (1UL << 0) +#define KVM_S2_FLAG_LOGGING_ACTIVE (1UL << 1) + +static bool is_iomap(unsigned long flags) +{ + return flags & KVM_S2PTE_FLAG_IS_IOMAP; +} + +static bool memslot_is_logging(struct kvm_memory_slot *memslot) +{ + return memslot->dirty_bitmap && !(memslot->flags & KVM_MEM_READONLY); +} + +/** + * kvm_flush_remote_tlbs() - flush all VM TLB entries for v7/8 + * @kvm: pointer to kvm structure. + * + * Interface to HYP function to flush all VM TLB entries + */ +void kvm_flush_remote_tlbs(struct kvm *kvm) +{ + kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); +} + +static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +{ + kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); +} + +/* + * D-Cache management functions. They take the page table entries by + * value, as they are flushing the cache using the kernel mapping (or + * kmap on 32bit). + */ +static void kvm_flush_dcache_pte(pte_t pte) +{ + __kvm_flush_dcache_pte(pte); +} + +static void kvm_flush_dcache_pmd(pmd_t pmd) +{ + __kvm_flush_dcache_pmd(pmd); +} + +static void kvm_flush_dcache_pud(pud_t pud) +{ + __kvm_flush_dcache_pud(pud); +} + +static bool kvm_is_device_pfn(unsigned long pfn) +{ + return !pfn_valid(pfn); +} + +/** + * stage2_dissolve_pmd() - clear and flush huge PMD entry + * @kvm: pointer to kvm structure. + * @addr: IPA + * @pmd: pmd pointer for IPA + * + * Function clears a PMD entry, flushes addr 1st and 2nd stage TLBs. + */ +static void stage2_dissolve_pmd(struct kvm *kvm, phys_addr_t addr, pmd_t *pmd) +{ + if (!pmd_thp_or_huge(*pmd)) + return; + + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pmd)); +} + +/** + * stage2_dissolve_pud() - clear and flush huge PUD entry + * @kvm: pointer to kvm structure. + * @addr: IPA + * @pud: pud pointer for IPA + * + * Function clears a PUD entry, flushes addr 1st and 2nd stage TLBs. + */ +static void stage2_dissolve_pud(struct kvm *kvm, phys_addr_t addr, pud_t *pudp) +{ + if (!stage2_pud_huge(kvm, *pudp)) + return; + + stage2_pud_clear(kvm, pudp); + kvm_tlb_flush_vmid_ipa(kvm, addr); + put_page(virt_to_page(pudp)); +} + +static int mmu_topup_memory_cache(struct kvm_mmu_memory_cache *cache, + int min, int max) +{ + void *page; + + BUG_ON(max > KVM_NR_MEM_OBJS); + if (cache->nobjs >= min) + return 0; + while (cache->nobjs < max) { + page = (void *)__get_free_page(GFP_PGTABLE_USER); + if (!page) + return -ENOMEM; + cache->objects[cache->nobjs++] = page; + } + return 0; +} + +static void mmu_free_memory_cache(struct kvm_mmu_memory_cache *mc) +{ + while (mc->nobjs) + free_page((unsigned long)mc->objects[--mc->nobjs]); +} + +static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc) +{ + void *p; + + BUG_ON(!mc || !mc->nobjs); + p = mc->objects[--mc->nobjs]; + return p; +} + +static void clear_stage2_pgd_entry(struct kvm *kvm, pgd_t *pgd, phys_addr_t addr) +{ + pud_t *pud_table __maybe_unused = stage2_pud_offset(kvm, pgd, 0UL); + stage2_pgd_clear(kvm, pgd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + stage2_pud_free(kvm, pud_table); + put_page(virt_to_page(pgd)); +} + +static void clear_stage2_pud_entry(struct kvm *kvm, pud_t *pud, phys_addr_t addr) +{ + pmd_t *pmd_table __maybe_unused = stage2_pmd_offset(kvm, pud, 0); + VM_BUG_ON(stage2_pud_huge(kvm, *pud)); + stage2_pud_clear(kvm, pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + stage2_pmd_free(kvm, pmd_table); + put_page(virt_to_page(pud)); +} + +static void clear_stage2_pmd_entry(struct kvm *kvm, pmd_t *pmd, phys_addr_t addr) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(pmd_thp_or_huge(*pmd)); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + free_page((unsigned long)pte_table); + put_page(virt_to_page(pmd)); +} + +static inline void kvm_set_pte(pte_t *ptep, pte_t new_pte) +{ + WRITE_ONCE(*ptep, new_pte); + dsb(ishst); +} + +static inline void kvm_set_pmd(pmd_t *pmdp, pmd_t new_pmd) +{ + WRITE_ONCE(*pmdp, new_pmd); + dsb(ishst); +} + +static inline void kvm_pmd_populate(pmd_t *pmdp, pte_t *ptep) +{ + kvm_set_pmd(pmdp, kvm_mk_pmd(ptep)); +} + +static inline void kvm_pud_populate(pud_t *pudp, pmd_t *pmdp) +{ + WRITE_ONCE(*pudp, kvm_mk_pud(pmdp)); + dsb(ishst); +} + +static inline void kvm_pgd_populate(pgd_t *pgdp, pud_t *pudp) +{ + WRITE_ONCE(*pgdp, kvm_mk_pgd(pudp)); + dsb(ishst); +} + +/* + * Unmapping vs dcache management: + * + * If a guest maps certain memory pages as uncached, all writes will + * bypass the data cache and go directly to RAM. However, the CPUs + * can still speculate reads (not writes) and fill cache lines with + * data. + * + * Those cache lines will be *clean* cache lines though, so a + * clean+invalidate operation is equivalent to an invalidate + * operation, because no cache lines are marked dirty. + * + * Those clean cache lines could be filled prior to an uncached write + * by the guest, and the cache coherent IO subsystem would therefore + * end up writing old data to disk. + * + * This is why right after unmapping a page/section and invalidating + * the corresponding TLBs, we call kvm_flush_dcache_p*() to make sure + * the IO subsystem will never hit in the cache. + * + * This is all avoided on systems that have ARM64_HAS_STAGE2_FWB, as + * we then fully enforce cacheability of RAM, no matter what the guest + * does. + */ +static void unmap_stage2_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t start_addr = addr; + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + pte_t old_pte = *pte; + + kvm_set_pte(pte, __pte(0)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + + /* No need to invalidate the cache for device mappings */ + if (!kvm_is_device_pfn(pte_pfn(old_pte))) + kvm_flush_dcache_pte(old_pte); + + put_page(virt_to_page(pte)); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (stage2_pte_table_empty(kvm, start_pte)) + clear_stage2_pmd_entry(kvm, pmd, start_addr); +} + +static void unmap_stage2_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next, start_addr = addr; + pmd_t *pmd, *start_pmd; + + start_pmd = pmd = stage2_pmd_offset(kvm, pud, addr); + do { + next = stage2_pmd_addr_end(kvm, addr, end); + if (!pmd_none(*pmd)) { + if (pmd_thp_or_huge(*pmd)) { + pmd_t old_pmd = *pmd; + + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + + kvm_flush_dcache_pmd(old_pmd); + + put_page(virt_to_page(pmd)); + } else { + unmap_stage2_ptes(kvm, pmd, addr, next); + } + } + } while (pmd++, addr = next, addr != end); + + if (stage2_pmd_table_empty(kvm, start_pmd)) + clear_stage2_pud_entry(kvm, pud, start_addr); +} + +static void unmap_stage2_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next, start_addr = addr; + pud_t *pud, *start_pud; + + start_pud = pud = stage2_pud_offset(kvm, pgd, addr); + do { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) { + pud_t old_pud = *pud; + + stage2_pud_clear(kvm, pud); + kvm_tlb_flush_vmid_ipa(kvm, addr); + kvm_flush_dcache_pud(old_pud); + put_page(virt_to_page(pud)); + } else { + unmap_stage2_pmds(kvm, pud, addr, next); + } + } + } while (pud++, addr = next, addr != end); + + if (stage2_pud_table_empty(kvm, start_pud)) + clear_stage2_pgd_entry(kvm, pgd, start_addr); +} + +/** + * unmap_stage2_range -- Clear stage2 page table entries to unmap a range + * @kvm: The VM pointer + * @start: The intermediate physical base address of the range to unmap + * @size: The size of the area to unmap + * + * Clear a range of stage-2 mappings, lowering the various ref-counts. Must + * be called while holding mmu_lock (unless for freeing the stage2 pgd before + * destroying the VM), otherwise another faulting VCPU may come in and mess + * with things behind our backs. + */ +static void unmap_stage2_range(struct kvm *kvm, phys_addr_t start, u64 size) +{ + pgd_t *pgd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + assert_spin_locked(&kvm->mmu_lock); + WARN_ON(size & ~PAGE_MASK); + + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + do { + /* + * Make sure the page table is still active, as another thread + * could have possibly freed the page table, while we released + * the lock. + */ + if (!READ_ONCE(kvm->arch.pgd)) + break; + next = stage2_pgd_addr_end(kvm, addr, end); + if (!stage2_pgd_none(kvm, *pgd)) + unmap_stage2_puds(kvm, pgd, addr, next); + /* + * If the range is too large, release the kvm->mmu_lock + * to prevent starvation and lockup detector warnings. + */ + if (next != end) + cond_resched_lock(&kvm->mmu_lock); + } while (pgd++, addr = next, addr != end); +} + +static void stage2_flush_ptes(struct kvm *kvm, pmd_t *pmd, + phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte) && !kvm_is_device_pfn(pte_pfn(*pte))) + kvm_flush_dcache_pte(*pte); + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +static void stage2_flush_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = stage2_pmd_offset(kvm, pud, addr); + do { + next = stage2_pmd_addr_end(kvm, addr, end); + if (!pmd_none(*pmd)) { + if (pmd_thp_or_huge(*pmd)) + kvm_flush_dcache_pmd(*pmd); + else + stage2_flush_ptes(kvm, pmd, addr, next); + } + } while (pmd++, addr = next, addr != end); +} + +static void stage2_flush_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = stage2_pud_offset(kvm, pgd, addr); + do { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) + kvm_flush_dcache_pud(*pud); + else + stage2_flush_pmds(kvm, pud, addr, next); + } + } while (pud++, addr = next, addr != end); +} + +static void stage2_flush_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t end = addr + PAGE_SIZE * memslot->npages; + phys_addr_t next; + pgd_t *pgd; + + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + do { + next = stage2_pgd_addr_end(kvm, addr, end); + if (!stage2_pgd_none(kvm, *pgd)) + stage2_flush_puds(kvm, pgd, addr, next); + + if (next != end) + cond_resched_lock(&kvm->mmu_lock); + } while (pgd++, addr = next, addr != end); +} + +/** + * stage2_flush_vm - Invalidate cache for pages mapped in stage 2 + * @kvm: The struct kvm pointer + * + * Go through the stage 2 page tables and invalidate any cache lines + * backing memory already mapped to the VM. + */ +static void stage2_flush_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_flush_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + srcu_read_unlock(&kvm->srcu, idx); +} + +static void clear_hyp_pgd_entry(pgd_t *pgd) +{ + pud_t *pud_table __maybe_unused = pud_offset(pgd, 0UL); + pgd_clear(pgd); + pud_free(NULL, pud_table); + put_page(virt_to_page(pgd)); +} + +static void clear_hyp_pud_entry(pud_t *pud) +{ + pmd_t *pmd_table __maybe_unused = pmd_offset(pud, 0); + VM_BUG_ON(pud_huge(*pud)); + pud_clear(pud); + pmd_free(NULL, pmd_table); + put_page(virt_to_page(pud)); +} + +static void clear_hyp_pmd_entry(pmd_t *pmd) +{ + pte_t *pte_table = pte_offset_kernel(pmd, 0); + VM_BUG_ON(pmd_thp_or_huge(*pmd)); + pmd_clear(pmd); + pte_free_kernel(NULL, pte_table); + put_page(virt_to_page(pmd)); +} + +static void unmap_hyp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte, *start_pte; + + start_pte = pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + kvm_set_pte(pte, __pte(0)); + put_page(virt_to_page(pte)); + } + } while (pte++, addr += PAGE_SIZE, addr != end); + + if (hyp_pte_table_empty(start_pte)) + clear_hyp_pmd_entry(pmd); +} + +static void unmap_hyp_pmds(pud_t *pud, phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next; + pmd_t *pmd, *start_pmd; + + start_pmd = pmd = pmd_offset(pud, addr); + do { + next = pmd_addr_end(addr, end); + /* Hyp doesn't use huge pmds */ + if (!pmd_none(*pmd)) + unmap_hyp_ptes(pmd, addr, next); + } while (pmd++, addr = next, addr != end); + + if (hyp_pmd_table_empty(start_pmd)) + clear_hyp_pud_entry(pud); +} + +static void unmap_hyp_puds(pgd_t *pgd, phys_addr_t addr, phys_addr_t end) +{ + phys_addr_t next; + pud_t *pud, *start_pud; + + start_pud = pud = pud_offset(pgd, addr); + do { + next = pud_addr_end(addr, end); + /* Hyp doesn't use huge puds */ + if (!pud_none(*pud)) + unmap_hyp_pmds(pud, addr, next); + } while (pud++, addr = next, addr != end); + + if (hyp_pud_table_empty(start_pud)) + clear_hyp_pgd_entry(pgd); +} + +static unsigned int kvm_pgd_index(unsigned long addr, unsigned int ptrs_per_pgd) +{ + return (addr >> PGDIR_SHIFT) & (ptrs_per_pgd - 1); +} + +static void __unmap_hyp_range(pgd_t *pgdp, unsigned long ptrs_per_pgd, + phys_addr_t start, u64 size) +{ + pgd_t *pgd; + phys_addr_t addr = start, end = start + size; + phys_addr_t next; + + /* + * We don't unmap anything from HYP, except at the hyp tear down. + * Hence, we don't have to invalidate the TLBs here. + */ + pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); + do { + next = pgd_addr_end(addr, end); + if (!pgd_none(*pgd)) + unmap_hyp_puds(pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +static void unmap_hyp_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + __unmap_hyp_range(pgdp, PTRS_PER_PGD, start, size); +} + +static void unmap_hyp_idmap_range(pgd_t *pgdp, phys_addr_t start, u64 size) +{ + __unmap_hyp_range(pgdp, __kvm_idmap_ptrs_per_pgd(), start, size); +} + +/** + * free_hyp_pgds - free Hyp-mode page tables + * + * Assumes hyp_pgd is a page table used strictly in Hyp-mode and + * therefore contains either mappings in the kernel memory area (above + * PAGE_OFFSET), or device mappings in the idmap range. + * + * boot_hyp_pgd should only map the idmap range, and is only used in + * the extended idmap case. + */ +void free_hyp_pgds(void) +{ + pgd_t *id_pgd; + + mutex_lock(&kvm_hyp_pgd_mutex); + + id_pgd = boot_hyp_pgd ? boot_hyp_pgd : hyp_pgd; + + if (id_pgd) { + /* In case we never called hyp_mmu_init() */ + if (!io_map_base) + io_map_base = hyp_idmap_start; + unmap_hyp_idmap_range(id_pgd, io_map_base, + hyp_idmap_start + PAGE_SIZE - io_map_base); + } + + if (boot_hyp_pgd) { + free_pages((unsigned long)boot_hyp_pgd, hyp_pgd_order); + boot_hyp_pgd = NULL; + } + + if (hyp_pgd) { + unmap_hyp_range(hyp_pgd, kern_hyp_va(PAGE_OFFSET), + (uintptr_t)high_memory - PAGE_OFFSET); + + free_pages((unsigned long)hyp_pgd, hyp_pgd_order); + hyp_pgd = NULL; + } + if (merged_hyp_pgd) { + clear_page(merged_hyp_pgd); + free_page((unsigned long)merged_hyp_pgd); + merged_hyp_pgd = NULL; + } + + mutex_unlock(&kvm_hyp_pgd_mutex); +} + +static void create_hyp_pte_mappings(pmd_t *pmd, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) +{ + pte_t *pte; + unsigned long addr; + + addr = start; + do { + pte = pte_offset_kernel(pmd, addr); + kvm_set_pte(pte, kvm_pfn_pte(pfn, prot)); + get_page(virt_to_page(pte)); + pfn++; + } while (addr += PAGE_SIZE, addr != end); +} + +static int create_hyp_pmd_mappings(pud_t *pud, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) +{ + pmd_t *pmd; + pte_t *pte; + unsigned long addr, next; + + addr = start; + do { + pmd = pmd_offset(pud, addr); + + BUG_ON(pmd_sect(*pmd)); + + if (pmd_none(*pmd)) { + pte = pte_alloc_one_kernel(NULL); + if (!pte) { + kvm_err("Cannot allocate Hyp pte\n"); + return -ENOMEM; + } + kvm_pmd_populate(pmd, pte); + get_page(virt_to_page(pmd)); + } + + next = pmd_addr_end(addr, end); + + create_hyp_pte_mappings(pmd, addr, next, pfn, prot); + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); + + return 0; +} + +static int create_hyp_pud_mappings(pgd_t *pgd, unsigned long start, + unsigned long end, unsigned long pfn, + pgprot_t prot) +{ + pud_t *pud; + pmd_t *pmd; + unsigned long addr, next; + int ret; + + addr = start; + do { + pud = pud_offset(pgd, addr); + + if (pud_none_or_clear_bad(pud)) { + pmd = pmd_alloc_one(NULL, addr); + if (!pmd) { + kvm_err("Cannot allocate Hyp pmd\n"); + return -ENOMEM; + } + kvm_pud_populate(pud, pmd); + get_page(virt_to_page(pud)); + } + + next = pud_addr_end(addr, end); + ret = create_hyp_pmd_mappings(pud, addr, next, pfn, prot); + if (ret) + return ret; + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); + + return 0; +} + +static int __create_hyp_mappings(pgd_t *pgdp, unsigned long ptrs_per_pgd, + unsigned long start, unsigned long end, + unsigned long pfn, pgprot_t prot) +{ + pgd_t *pgd; + pud_t *pud; + unsigned long addr, next; + int err = 0; + + mutex_lock(&kvm_hyp_pgd_mutex); + addr = start & PAGE_MASK; + end = PAGE_ALIGN(end); + do { + pgd = pgdp + kvm_pgd_index(addr, ptrs_per_pgd); + + if (pgd_none(*pgd)) { + pud = pud_alloc_one(NULL, addr); + if (!pud) { + kvm_err("Cannot allocate Hyp pud\n"); + err = -ENOMEM; + goto out; + } + kvm_pgd_populate(pgd, pud); + get_page(virt_to_page(pgd)); + } + + next = pgd_addr_end(addr, end); + err = create_hyp_pud_mappings(pgd, addr, next, pfn, prot); + if (err) + goto out; + pfn += (next - addr) >> PAGE_SHIFT; + } while (addr = next, addr != end); +out: + mutex_unlock(&kvm_hyp_pgd_mutex); + return err; +} + +static phys_addr_t kvm_kaddr_to_phys(void *kaddr) +{ + if (!is_vmalloc_addr(kaddr)) { + BUG_ON(!virt_addr_valid(kaddr)); + return __pa(kaddr); + } else { + return page_to_phys(vmalloc_to_page(kaddr)) + + offset_in_page(kaddr); + } +} + +/** + * create_hyp_mappings - duplicate a kernel virtual address range in Hyp mode + * @from: The virtual kernel start address of the range + * @to: The virtual kernel end address of the range (exclusive) + * @prot: The protection to be applied to this range + * + * The same virtual address as the kernel virtual address is also used + * in Hyp-mode mapping (modulo HYP_PAGE_OFFSET) to the same underlying + * physical pages. + */ +int create_hyp_mappings(void *from, void *to, pgprot_t prot) +{ + phys_addr_t phys_addr; + unsigned long virt_addr; + unsigned long start = kern_hyp_va((unsigned long)from); + unsigned long end = kern_hyp_va((unsigned long)to); + + if (is_kernel_in_hyp_mode()) + return 0; + + start = start & PAGE_MASK; + end = PAGE_ALIGN(end); + + for (virt_addr = start; virt_addr < end; virt_addr += PAGE_SIZE) { + int err; + + phys_addr = kvm_kaddr_to_phys(from + virt_addr - start); + err = __create_hyp_mappings(hyp_pgd, PTRS_PER_PGD, + virt_addr, virt_addr + PAGE_SIZE, + __phys_to_pfn(phys_addr), + prot); + if (err) + return err; + } + + return 0; +} + +static int __create_hyp_private_mapping(phys_addr_t phys_addr, size_t size, + unsigned long *haddr, pgprot_t prot) +{ + pgd_t *pgd = hyp_pgd; + unsigned long base; + int ret = 0; + + mutex_lock(&kvm_hyp_pgd_mutex); + + /* + * This assumes that we have enough space below the idmap + * page to allocate our VAs. If not, the check below will + * kick. A potential alternative would be to detect that + * overflow and switch to an allocation above the idmap. + * + * The allocated size is always a multiple of PAGE_SIZE. + */ + size = PAGE_ALIGN(size + offset_in_page(phys_addr)); + base = io_map_base - size; + + /* + * Verify that BIT(VA_BITS - 1) hasn't been flipped by + * allocating the new area, as it would indicate we've + * overflowed the idmap/IO address range. + */ + if ((base ^ io_map_base) & BIT(VA_BITS - 1)) + ret = -ENOMEM; + else + io_map_base = base; + + mutex_unlock(&kvm_hyp_pgd_mutex); + + if (ret) + goto out; + + if (__kvm_cpu_uses_extended_idmap()) + pgd = boot_hyp_pgd; + + ret = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(), + base, base + size, + __phys_to_pfn(phys_addr), prot); + if (ret) + goto out; + + *haddr = base + offset_in_page(phys_addr); + +out: + return ret; +} + +/** + * create_hyp_io_mappings - Map IO into both kernel and HYP + * @phys_addr: The physical start address which gets mapped + * @size: Size of the region being mapped + * @kaddr: Kernel VA for this mapping + * @haddr: HYP VA for this mapping + */ +int create_hyp_io_mappings(phys_addr_t phys_addr, size_t size, + void __iomem **kaddr, + void __iomem **haddr) +{ + unsigned long addr; + int ret; + + *kaddr = ioremap(phys_addr, size); + if (!*kaddr) + return -ENOMEM; + + if (is_kernel_in_hyp_mode()) { + *haddr = *kaddr; + return 0; + } + + ret = __create_hyp_private_mapping(phys_addr, size, + &addr, PAGE_HYP_DEVICE); + if (ret) { + iounmap(*kaddr); + *kaddr = NULL; + *haddr = NULL; + return ret; + } + + *haddr = (void __iomem *)addr; + return 0; +} + +/** + * create_hyp_exec_mappings - Map an executable range into HYP + * @phys_addr: The physical start address which gets mapped + * @size: Size of the region being mapped + * @haddr: HYP VA for this mapping + */ +int create_hyp_exec_mappings(phys_addr_t phys_addr, size_t size, + void **haddr) +{ + unsigned long addr; + int ret; + + BUG_ON(is_kernel_in_hyp_mode()); + + ret = __create_hyp_private_mapping(phys_addr, size, + &addr, PAGE_HYP_EXEC); + if (ret) { + *haddr = NULL; + return ret; + } + + *haddr = (void *)addr; + return 0; +} + +/** + * kvm_alloc_stage2_pgd - allocate level-1 table for stage-2 translation. + * @kvm: The KVM struct pointer for the VM. + * + * Allocates only the stage-2 HW PGD level table(s) of size defined by + * stage2_pgd_size(kvm). + * + * Note we don't need locking here as this is only called when the VM is + * created, which can only be done once. + */ +int kvm_alloc_stage2_pgd(struct kvm *kvm) +{ + phys_addr_t pgd_phys; + pgd_t *pgd; + + if (kvm->arch.pgd != NULL) { + kvm_err("kvm_arch already initialized?\n"); + return -EINVAL; + } + + /* Allocate the HW PGD, making sure that each page gets its own refcount */ + pgd = alloc_pages_exact(stage2_pgd_size(kvm), GFP_KERNEL | __GFP_ZERO); + if (!pgd) + return -ENOMEM; + + pgd_phys = virt_to_phys(pgd); + if (WARN_ON(pgd_phys & ~kvm_vttbr_baddr_mask(kvm))) + return -EINVAL; + + kvm->arch.pgd = pgd; + kvm->arch.pgd_phys = pgd_phys; + return 0; +} + +static void stage2_unmap_memslot(struct kvm *kvm, + struct kvm_memory_slot *memslot) +{ + hva_t hva = memslot->userspace_addr; + phys_addr_t addr = memslot->base_gfn << PAGE_SHIFT; + phys_addr_t size = PAGE_SIZE * memslot->npages; + hva_t reg_end = hva + size; + + /* + * A memory region could potentially cover multiple VMAs, and any holes + * between them, so iterate over all of them to find out if we should + * unmap any of them. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Take the intersection of this VMA with the memory region + */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (!(vma->vm_flags & VM_PFNMAP)) { + gpa_t gpa = addr + (vm_start - memslot->userspace_addr); + unmap_stage2_range(kvm, gpa, vm_end - vm_start); + } + hva = vm_end; + } while (hva < reg_end); +} + +/** + * stage2_unmap_vm - Unmap Stage-2 RAM mappings + * @kvm: The struct kvm pointer + * + * Go through the memregions and unmap any regular RAM + * backing memory already mapped to the VM. + */ +void stage2_unmap_vm(struct kvm *kvm) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int idx; + + idx = srcu_read_lock(&kvm->srcu); + down_read(¤t->mm->mmap_sem); + spin_lock(&kvm->mmu_lock); + + slots = kvm_memslots(kvm); + kvm_for_each_memslot(memslot, slots) + stage2_unmap_memslot(kvm, memslot); + + spin_unlock(&kvm->mmu_lock); + up_read(¤t->mm->mmap_sem); + srcu_read_unlock(&kvm->srcu, idx); +} + +/** + * kvm_free_stage2_pgd - free all stage-2 tables + * @kvm: The KVM struct pointer for the VM. + * + * Walks the level-1 page table pointed to by kvm->arch.pgd and frees all + * underlying level-2 and level-3 tables before freeing the actual level-1 table + * and setting the struct pointer to NULL. + */ +void kvm_free_stage2_pgd(struct kvm *kvm) +{ + void *pgd = NULL; + + spin_lock(&kvm->mmu_lock); + if (kvm->arch.pgd) { + unmap_stage2_range(kvm, 0, kvm_phys_size(kvm)); + pgd = READ_ONCE(kvm->arch.pgd); + kvm->arch.pgd = NULL; + kvm->arch.pgd_phys = 0; + } + spin_unlock(&kvm->mmu_lock); + + /* Free the HW pgd, one page at a time */ + if (pgd) + free_pages_exact(pgd, stage2_pgd_size(kvm)); +} + +static pud_t *stage2_get_pud(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) +{ + pgd_t *pgd; + pud_t *pud; + + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + if (stage2_pgd_none(kvm, *pgd)) { + if (!cache) + return NULL; + pud = mmu_memory_cache_alloc(cache); + stage2_pgd_populate(kvm, pgd, pud); + get_page(virt_to_page(pgd)); + } + + return stage2_pud_offset(kvm, pgd, addr); +} + +static pmd_t *stage2_get_pmd(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr) +{ + pud_t *pud; + pmd_t *pmd; + + pud = stage2_get_pud(kvm, cache, addr); + if (!pud || stage2_pud_huge(kvm, *pud)) + return NULL; + + if (stage2_pud_none(kvm, *pud)) { + if (!cache) + return NULL; + pmd = mmu_memory_cache_alloc(cache); + stage2_pud_populate(kvm, pud, pmd); + get_page(virt_to_page(pud)); + } + + return stage2_pmd_offset(kvm, pud, addr); +} + +static int stage2_set_pmd_huge(struct kvm *kvm, struct kvm_mmu_memory_cache + *cache, phys_addr_t addr, const pmd_t *new_pmd) +{ + pmd_t *pmd, old_pmd; + +retry: + pmd = stage2_get_pmd(kvm, cache, addr); + VM_BUG_ON(!pmd); + + old_pmd = *pmd; + /* + * Multiple vcpus faulting on the same PMD entry, can + * lead to them sequentially updating the PMD with the + * same value. Following the break-before-make + * (pmd_clear() followed by tlb_flush()) process can + * hinder forward progress due to refaults generated + * on missing translations. + * + * Skip updating the page table if the entry is + * unchanged. + */ + if (pmd_val(old_pmd) == pmd_val(*new_pmd)) + return 0; + + if (pmd_present(old_pmd)) { + /* + * If we already have PTE level mapping for this block, + * we must unmap it to avoid inconsistent TLB state and + * leaking the table page. We could end up in this situation + * if the memory slot was marked for dirty logging and was + * reverted, leaving PTE level mappings for the pages accessed + * during the period. So, unmap the PTE level mapping for this + * block and retry, as we could have released the upper level + * table in the process. + * + * Normal THP split/merge follows mmu_notifier callbacks and do + * get handled accordingly. + */ + if (!pmd_thp_or_huge(old_pmd)) { + unmap_stage2_range(kvm, addr & S2_PMD_MASK, S2_PMD_SIZE); + goto retry; + } + /* + * Mapping in huge pages should only happen through a + * fault. If a page is merged into a transparent huge + * page, the individual subpages of that huge page + * should be unmapped through MMU notifiers before we + * get here. + * + * Merging of CompoundPages is not supported; they + * should become splitting first, unmapped, merged, + * and mapped back in on-demand. + */ + WARN_ON_ONCE(pmd_pfn(old_pmd) != pmd_pfn(*new_pmd)); + pmd_clear(pmd); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + get_page(virt_to_page(pmd)); + } + + kvm_set_pmd(pmd, *new_pmd); + return 0; +} + +static int stage2_set_pud_huge(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pud_t *new_pudp) +{ + pud_t *pudp, old_pud; + +retry: + pudp = stage2_get_pud(kvm, cache, addr); + VM_BUG_ON(!pudp); + + old_pud = *pudp; + + /* + * A large number of vcpus faulting on the same stage 2 entry, + * can lead to a refault due to the stage2_pud_clear()/tlb_flush(). + * Skip updating the page tables if there is no change. + */ + if (pud_val(old_pud) == pud_val(*new_pudp)) + return 0; + + if (stage2_pud_present(kvm, old_pud)) { + /* + * If we already have table level mapping for this block, unmap + * the range for this block and retry. + */ + if (!stage2_pud_huge(kvm, old_pud)) { + unmap_stage2_range(kvm, addr & S2_PUD_MASK, S2_PUD_SIZE); + goto retry; + } + + WARN_ON_ONCE(kvm_pud_pfn(old_pud) != kvm_pud_pfn(*new_pudp)); + stage2_pud_clear(kvm, pudp); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + get_page(virt_to_page(pudp)); + } + + kvm_set_pud(pudp, *new_pudp); + return 0; +} + +/* + * stage2_get_leaf_entry - walk the stage2 VM page tables and return + * true if a valid and present leaf-entry is found. A pointer to the + * leaf-entry is returned in the appropriate level variable - pudpp, + * pmdpp, ptepp. + */ +static bool stage2_get_leaf_entry(struct kvm *kvm, phys_addr_t addr, + pud_t **pudpp, pmd_t **pmdpp, pte_t **ptepp) +{ + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + *pudpp = NULL; + *pmdpp = NULL; + *ptepp = NULL; + + pudp = stage2_get_pud(kvm, NULL, addr); + if (!pudp || stage2_pud_none(kvm, *pudp) || !stage2_pud_present(kvm, *pudp)) + return false; + + if (stage2_pud_huge(kvm, *pudp)) { + *pudpp = pudp; + return true; + } + + pmdp = stage2_pmd_offset(kvm, pudp, addr); + if (!pmdp || pmd_none(*pmdp) || !pmd_present(*pmdp)) + return false; + + if (pmd_thp_or_huge(*pmdp)) { + *pmdpp = pmdp; + return true; + } + + ptep = pte_offset_kernel(pmdp, addr); + if (!ptep || pte_none(*ptep) || !pte_present(*ptep)) + return false; + + *ptepp = ptep; + return true; +} + +static bool stage2_is_exec(struct kvm *kvm, phys_addr_t addr) +{ + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + bool found; + + found = stage2_get_leaf_entry(kvm, addr, &pudp, &pmdp, &ptep); + if (!found) + return false; + + if (pudp) + return kvm_s2pud_exec(pudp); + else if (pmdp) + return kvm_s2pmd_exec(pmdp); + else + return kvm_s2pte_exec(ptep); +} + +static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, + phys_addr_t addr, const pte_t *new_pte, + unsigned long flags) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte, old_pte; + bool iomap = flags & KVM_S2PTE_FLAG_IS_IOMAP; + bool logging_active = flags & KVM_S2_FLAG_LOGGING_ACTIVE; + + VM_BUG_ON(logging_active && !cache); + + /* Create stage-2 page table mapping - Levels 0 and 1 */ + pud = stage2_get_pud(kvm, cache, addr); + if (!pud) { + /* + * Ignore calls from kvm_set_spte_hva for unallocated + * address ranges. + */ + return 0; + } + + /* + * While dirty page logging - dissolve huge PUD, then continue + * on to allocate page. + */ + if (logging_active) + stage2_dissolve_pud(kvm, addr, pud); + + if (stage2_pud_none(kvm, *pud)) { + if (!cache) + return 0; /* ignore calls from kvm_set_spte_hva */ + pmd = mmu_memory_cache_alloc(cache); + stage2_pud_populate(kvm, pud, pmd); + get_page(virt_to_page(pud)); + } + + pmd = stage2_pmd_offset(kvm, pud, addr); + if (!pmd) { + /* + * Ignore calls from kvm_set_spte_hva for unallocated + * address ranges. + */ + return 0; + } + + /* + * While dirty page logging - dissolve huge PMD, then continue on to + * allocate page. + */ + if (logging_active) + stage2_dissolve_pmd(kvm, addr, pmd); + + /* Create stage-2 page mappings - Level 2 */ + if (pmd_none(*pmd)) { + if (!cache) + return 0; /* ignore calls from kvm_set_spte_hva */ + pte = mmu_memory_cache_alloc(cache); + kvm_pmd_populate(pmd, pte); + get_page(virt_to_page(pmd)); + } + + pte = pte_offset_kernel(pmd, addr); + + if (iomap && pte_present(*pte)) + return -EFAULT; + + /* Create 2nd stage page table mapping - Level 3 */ + old_pte = *pte; + if (pte_present(old_pte)) { + /* Skip page table update if there is no change */ + if (pte_val(old_pte) == pte_val(*new_pte)) + return 0; + + kvm_set_pte(pte, __pte(0)); + kvm_tlb_flush_vmid_ipa(kvm, addr); + } else { + get_page(virt_to_page(pte)); + } + + kvm_set_pte(pte, *new_pte); + return 0; +} + +#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +static int stage2_ptep_test_and_clear_young(pte_t *pte) +{ + if (pte_young(*pte)) { + *pte = pte_mkold(*pte); + return 1; + } + return 0; +} +#else +static int stage2_ptep_test_and_clear_young(pte_t *pte) +{ + return __ptep_test_and_clear_young(pte); +} +#endif + +static int stage2_pmdp_test_and_clear_young(pmd_t *pmd) +{ + return stage2_ptep_test_and_clear_young((pte_t *)pmd); +} + +static int stage2_pudp_test_and_clear_young(pud_t *pud) +{ + return stage2_ptep_test_and_clear_young((pte_t *)pud); +} + +/** + * kvm_phys_addr_ioremap - map a device range to guest IPA + * + * @kvm: The KVM pointer + * @guest_ipa: The IPA at which to insert the mapping + * @pa: The physical address of the device + * @size: The size of the mapping + */ +int kvm_phys_addr_ioremap(struct kvm *kvm, phys_addr_t guest_ipa, + phys_addr_t pa, unsigned long size, bool writable) +{ + phys_addr_t addr, end; + int ret = 0; + unsigned long pfn; + struct kvm_mmu_memory_cache cache = { 0, }; + + end = (guest_ipa + size + PAGE_SIZE - 1) & PAGE_MASK; + pfn = __phys_to_pfn(pa); + + for (addr = guest_ipa; addr < end; addr += PAGE_SIZE) { + pte_t pte = kvm_pfn_pte(pfn, PAGE_S2_DEVICE); + + if (writable) + pte = kvm_s2pte_mkwrite(pte); + + ret = mmu_topup_memory_cache(&cache, + kvm_mmu_cache_min_pages(kvm), + KVM_NR_MEM_OBJS); + if (ret) + goto out; + spin_lock(&kvm->mmu_lock); + ret = stage2_set_pte(kvm, &cache, addr, &pte, + KVM_S2PTE_FLAG_IS_IOMAP); + spin_unlock(&kvm->mmu_lock); + if (ret) + goto out; + + pfn++; + } + +out: + mmu_free_memory_cache(&cache); + return ret; +} + +/** + * stage2_wp_ptes - write protect PMD range + * @pmd: pointer to pmd entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_ptes(pmd_t *pmd, phys_addr_t addr, phys_addr_t end) +{ + pte_t *pte; + + pte = pte_offset_kernel(pmd, addr); + do { + if (!pte_none(*pte)) { + if (!kvm_s2pte_readonly(pte)) + kvm_set_s2pte_readonly(pte); + } + } while (pte++, addr += PAGE_SIZE, addr != end); +} + +/** + * stage2_wp_pmds - write protect PUD range + * kvm: kvm instance for the VM + * @pud: pointer to pud entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_pmds(struct kvm *kvm, pud_t *pud, + phys_addr_t addr, phys_addr_t end) +{ + pmd_t *pmd; + phys_addr_t next; + + pmd = stage2_pmd_offset(kvm, pud, addr); + + do { + next = stage2_pmd_addr_end(kvm, addr, end); + if (!pmd_none(*pmd)) { + if (pmd_thp_or_huge(*pmd)) { + if (!kvm_s2pmd_readonly(pmd)) + kvm_set_s2pmd_readonly(pmd); + } else { + stage2_wp_ptes(pmd, addr, next); + } + } + } while (pmd++, addr = next, addr != end); +} + +/** + * stage2_wp_puds - write protect PGD range + * @pgd: pointer to pgd entry + * @addr: range start address + * @end: range end address + */ +static void stage2_wp_puds(struct kvm *kvm, pgd_t *pgd, + phys_addr_t addr, phys_addr_t end) +{ + pud_t *pud; + phys_addr_t next; + + pud = stage2_pud_offset(kvm, pgd, addr); + do { + next = stage2_pud_addr_end(kvm, addr, end); + if (!stage2_pud_none(kvm, *pud)) { + if (stage2_pud_huge(kvm, *pud)) { + if (!kvm_s2pud_readonly(pud)) + kvm_set_s2pud_readonly(pud); + } else { + stage2_wp_pmds(kvm, pud, addr, next); + } + } + } while (pud++, addr = next, addr != end); +} + +/** + * stage2_wp_range() - write protect stage2 memory region range + * @kvm: The KVM pointer + * @addr: Start address of range + * @end: End address of range + */ +static void stage2_wp_range(struct kvm *kvm, phys_addr_t addr, phys_addr_t end) +{ + pgd_t *pgd; + phys_addr_t next; + + pgd = kvm->arch.pgd + stage2_pgd_index(kvm, addr); + do { + /* + * Release kvm_mmu_lock periodically if the memory region is + * large. Otherwise, we may see kernel panics with + * CONFIG_DETECT_HUNG_TASK, CONFIG_LOCKUP_DETECTOR, + * CONFIG_LOCKDEP. Additionally, holding the lock too long + * will also starve other vCPUs. We have to also make sure + * that the page tables are not freed while we released + * the lock. + */ + cond_resched_lock(&kvm->mmu_lock); + if (!READ_ONCE(kvm->arch.pgd)) + break; + next = stage2_pgd_addr_end(kvm, addr, end); + if (stage2_pgd_present(kvm, *pgd)) + stage2_wp_puds(kvm, pgd, addr, next); + } while (pgd++, addr = next, addr != end); +} + +/** + * kvm_mmu_wp_memory_region() - write protect stage 2 entries for memory slot + * @kvm: The KVM pointer + * @slot: The memory slot to write protect + * + * Called to start logging dirty pages after memory region + * KVM_MEM_LOG_DIRTY_PAGES operation is called. After this function returns + * all present PUD, PMD and PTEs are write protected in the memory region. + * Afterwards read of dirty page log can be called. + * + * Acquires kvm_mmu_lock. Called with kvm->slots_lock mutex acquired, + * serializing operations for VM memory regions. + */ +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot) +{ + struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memory_slot *memslot = id_to_memslot(slots, slot); + phys_addr_t start, end; + + if (WARN_ON_ONCE(!memslot)) + return; + + start = memslot->base_gfn << PAGE_SHIFT; + end = (memslot->base_gfn + memslot->npages) << PAGE_SHIFT; + + spin_lock(&kvm->mmu_lock); + stage2_wp_range(kvm, start, end); + spin_unlock(&kvm->mmu_lock); + kvm_flush_remote_tlbs(kvm); +} + +/** + * kvm_mmu_write_protect_pt_masked() - write protect dirty pages + * @kvm: The KVM pointer + * @slot: The memory slot associated with mask + * @gfn_offset: The gfn offset in memory slot + * @mask: The mask of dirty pages at offset 'gfn_offset' in this memory + * slot to be write protected + * + * Walks bits set in mask write protects the associated pte's. Caller must + * acquire kvm_mmu_lock. + */ +static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) +{ + phys_addr_t base_gfn = slot->base_gfn + gfn_offset; + phys_addr_t start = (base_gfn + __ffs(mask)) << PAGE_SHIFT; + phys_addr_t end = (base_gfn + __fls(mask) + 1) << PAGE_SHIFT; + + stage2_wp_range(kvm, start, end); +} + +/* + * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected + * dirty pages. + * + * It calls kvm_mmu_write_protect_pt_masked to write protect selected pages to + * enable dirty logging for them. + */ +void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, + struct kvm_memory_slot *slot, + gfn_t gfn_offset, unsigned long mask) +{ + kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask); +} + +static void clean_dcache_guest_page(kvm_pfn_t pfn, unsigned long size) +{ + __clean_dcache_guest_page(pfn, size); +} + +static void invalidate_icache_guest_page(kvm_pfn_t pfn, unsigned long size) +{ + __invalidate_icache_guest_page(pfn, size); +} + +static void kvm_send_hwpoison_signal(unsigned long address, short lsb) +{ + send_sig_mceerr(BUS_MCEERR_AR, (void __user *)address, lsb, current); +} + +static bool fault_supports_stage2_huge_mapping(struct kvm_memory_slot *memslot, + unsigned long hva, + unsigned long map_size) +{ + gpa_t gpa_start; + hva_t uaddr_start, uaddr_end; + size_t size; + + /* The memslot and the VMA are guaranteed to be aligned to PAGE_SIZE */ + if (map_size == PAGE_SIZE) + return true; + + size = memslot->npages * PAGE_SIZE; + + gpa_start = memslot->base_gfn << PAGE_SHIFT; + + uaddr_start = memslot->userspace_addr; + uaddr_end = uaddr_start + size; + + /* + * Pages belonging to memslots that don't have the same alignment + * within a PMD/PUD for userspace and IPA cannot be mapped with stage-2 + * PMD/PUD entries, because we'll end up mapping the wrong pages. + * + * Consider a layout like the following: + * + * memslot->userspace_addr: + * +-----+--------------------+--------------------+---+ + * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| + * +-----+--------------------+--------------------+---+ + * + * memslot->base_gfn << PAGE_SHIFT: + * +---+--------------------+--------------------+-----+ + * |abc|def Stage-2 block | Stage-2 block |tvxyz| + * +---+--------------------+--------------------+-----+ + * + * If we create those stage-2 blocks, we'll end up with this incorrect + * mapping: + * d -> f + * e -> g + * f -> h + */ + if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1))) + return false; + + /* + * Next, let's make sure we're not trying to map anything not covered + * by the memslot. This means we have to prohibit block size mappings + * for the beginning and end of a non-block aligned and non-block sized + * memory slot (illustrated by the head and tail parts of the + * userspace view above containing pages 'abcde' and 'xyz', + * respectively). + * + * Note that it doesn't matter if we do the check using the + * userspace_addr or the base_gfn, as both are equally aligned (per + * the check above) and equally sized. + */ + return (hva & ~(map_size - 1)) >= uaddr_start && + (hva & ~(map_size - 1)) + map_size <= uaddr_end; +} + +/* + * Check if the given hva is backed by a transparent huge page (THP) and + * whether it can be mapped using block mapping in stage2. If so, adjust + * the stage2 PFN and IPA accordingly. Only PMD_SIZE THPs are currently + * supported. This will need to be updated to support other THP sizes. + * + * Returns the size of the mapping. + */ +static unsigned long +transparent_hugepage_adjust(struct kvm_memory_slot *memslot, + unsigned long hva, kvm_pfn_t *pfnp, + phys_addr_t *ipap) +{ + kvm_pfn_t pfn = *pfnp; + + /* + * Make sure the adjustment is done only for THP pages. Also make + * sure that the HVA and IPA are sufficiently aligned and that the + * block map is contained within the memslot. + */ + if (kvm_is_transparent_hugepage(pfn) && + fault_supports_stage2_huge_mapping(memslot, hva, PMD_SIZE)) { + /* + * The address we faulted on is backed by a transparent huge + * page. However, because we map the compound huge page and + * not the individual tail page, we need to transfer the + * refcount to the head page. We have to be careful that the + * THP doesn't start to split while we are adjusting the + * refcounts. + * + * We are sure this doesn't happen, because mmu_notifier_retry + * was successful and we are holding the mmu_lock, so if this + * THP is trying to split, it will be blocked in the mmu + * notifier before touching any of the pages, specifically + * before being able to call __split_huge_page_refcount(). + * + * We can therefore safely transfer the refcount from PG_tail + * to PG_head and switch the pfn from a tail page to the head + * page accordingly. + */ + *ipap &= PMD_MASK; + kvm_release_pfn_clean(pfn); + pfn &= ~(PTRS_PER_PMD - 1); + kvm_get_pfn(pfn); + *pfnp = pfn; + + return PMD_SIZE; + } + + /* Use page mapping if we cannot use block mapping. */ + return PAGE_SIZE; +} + +static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, + struct kvm_memory_slot *memslot, unsigned long hva, + unsigned long fault_status) +{ + int ret; + bool write_fault, writable, force_pte = false; + bool exec_fault, needs_exec; + unsigned long mmu_seq; + gfn_t gfn = fault_ipa >> PAGE_SHIFT; + struct kvm *kvm = vcpu->kvm; + struct kvm_mmu_memory_cache *memcache = &vcpu->arch.mmu_page_cache; + struct vm_area_struct *vma; + short vma_shift; + kvm_pfn_t pfn; + pgprot_t mem_type = PAGE_S2; + bool logging_active = memslot_is_logging(memslot); + unsigned long vma_pagesize, flags = 0; + + write_fault = kvm_is_write_fault(vcpu); + exec_fault = kvm_vcpu_trap_is_iabt(vcpu); + VM_BUG_ON(write_fault && exec_fault); + + if (fault_status == FSC_PERM && !write_fault && !exec_fault) { + kvm_err("Unexpected L2 read permission error\n"); + return -EFAULT; + } + + /* Let's check if we will get back a huge page backed by hugetlbfs */ + down_read(¤t->mm->mmap_sem); + vma = find_vma_intersection(current->mm, hva, hva + 1); + if (unlikely(!vma)) { + kvm_err("Failed to find VMA for hva 0x%lx\n", hva); + up_read(¤t->mm->mmap_sem); + return -EFAULT; + } + + if (is_vm_hugetlb_page(vma)) + vma_shift = huge_page_shift(hstate_vma(vma)); + else + vma_shift = PAGE_SHIFT; + + vma_pagesize = 1ULL << vma_shift; + if (logging_active || + (vma->vm_flags & VM_PFNMAP) || + !fault_supports_stage2_huge_mapping(memslot, hva, vma_pagesize)) { + force_pte = true; + vma_pagesize = PAGE_SIZE; + } + + /* + * The stage2 has a minimum of 2 level table (For arm64 see + * kvm_arm_setup_stage2()). Hence, we are guaranteed that we can + * use PMD_SIZE huge mappings (even when the PMD is folded into PGD). + * As for PUD huge maps, we must make sure that we have at least + * 3 levels, i.e, PMD is not folded. + */ + if (vma_pagesize == PMD_SIZE || + (vma_pagesize == PUD_SIZE && kvm_stage2_has_pmd(kvm))) + gfn = (fault_ipa & huge_page_mask(hstate_vma(vma))) >> PAGE_SHIFT; + up_read(¤t->mm->mmap_sem); + + /* We need minimum second+third level pages */ + ret = mmu_topup_memory_cache(memcache, kvm_mmu_cache_min_pages(kvm), + KVM_NR_MEM_OBJS); + if (ret) + return ret; + + mmu_seq = vcpu->kvm->mmu_notifier_seq; + /* + * Ensure the read of mmu_notifier_seq happens before we call + * gfn_to_pfn_prot (which calls get_user_pages), so that we don't risk + * the page we just got a reference to gets unmapped before we have a + * chance to grab the mmu_lock, which ensure that if the page gets + * unmapped afterwards, the call to kvm_unmap_hva will take it away + * from us again properly. This smp_rmb() interacts with the smp_wmb() + * in kvm_mmu_notifier_invalidate_<page|range_end>. + */ + smp_rmb(); + + pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); + if (pfn == KVM_PFN_ERR_HWPOISON) { + kvm_send_hwpoison_signal(hva, vma_shift); + return 0; + } + if (is_error_noslot_pfn(pfn)) + return -EFAULT; + + if (kvm_is_device_pfn(pfn)) { + mem_type = PAGE_S2_DEVICE; + flags |= KVM_S2PTE_FLAG_IS_IOMAP; + } else if (logging_active) { + /* + * Faults on pages in a memslot with logging enabled + * should not be mapped with huge pages (it introduces churn + * and performance degradation), so force a pte mapping. + */ + flags |= KVM_S2_FLAG_LOGGING_ACTIVE; + + /* + * Only actually map the page as writable if this was a write + * fault. + */ + if (!write_fault) + writable = false; + } + + if (exec_fault && is_iomap(flags)) + return -ENOEXEC; + + spin_lock(&kvm->mmu_lock); + if (mmu_notifier_retry(kvm, mmu_seq)) + goto out_unlock; + + /* + * If we are not forced to use page mapping, check if we are + * backed by a THP and thus use block mapping if possible. + */ + if (vma_pagesize == PAGE_SIZE && !force_pte) + vma_pagesize = transparent_hugepage_adjust(memslot, hva, + &pfn, &fault_ipa); + if (writable) + kvm_set_pfn_dirty(pfn); + + if (fault_status != FSC_PERM && !is_iomap(flags)) + clean_dcache_guest_page(pfn, vma_pagesize); + + if (exec_fault) + invalidate_icache_guest_page(pfn, vma_pagesize); + + /* + * If we took an execution fault we have made the + * icache/dcache coherent above and should now let the s2 + * mapping be executable. + * + * Write faults (!exec_fault && FSC_PERM) are orthogonal to + * execute permissions, and we preserve whatever we have. + */ + needs_exec = exec_fault || + (fault_status == FSC_PERM && stage2_is_exec(kvm, fault_ipa)); + + if (vma_pagesize == PUD_SIZE) { + pud_t new_pud = kvm_pfn_pud(pfn, mem_type); + + new_pud = kvm_pud_mkhuge(new_pud); + if (writable) + new_pud = kvm_s2pud_mkwrite(new_pud); + + if (needs_exec) + new_pud = kvm_s2pud_mkexec(new_pud); + + ret = stage2_set_pud_huge(kvm, memcache, fault_ipa, &new_pud); + } else if (vma_pagesize == PMD_SIZE) { + pmd_t new_pmd = kvm_pfn_pmd(pfn, mem_type); + + new_pmd = kvm_pmd_mkhuge(new_pmd); + + if (writable) + new_pmd = kvm_s2pmd_mkwrite(new_pmd); + + if (needs_exec) + new_pmd = kvm_s2pmd_mkexec(new_pmd); + + ret = stage2_set_pmd_huge(kvm, memcache, fault_ipa, &new_pmd); + } else { + pte_t new_pte = kvm_pfn_pte(pfn, mem_type); + + if (writable) { + new_pte = kvm_s2pte_mkwrite(new_pte); + mark_page_dirty(kvm, gfn); + } + + if (needs_exec) + new_pte = kvm_s2pte_mkexec(new_pte); + + ret = stage2_set_pte(kvm, memcache, fault_ipa, &new_pte, flags); + } + +out_unlock: + spin_unlock(&kvm->mmu_lock); + kvm_set_pfn_accessed(pfn); + kvm_release_pfn_clean(pfn); + return ret; +} + +/* + * Resolve the access fault by making the page young again. + * Note that because the faulting entry is guaranteed not to be + * cached in the TLB, we don't need to invalidate anything. + * Only the HW Access Flag updates are supported for Stage 2 (no DBM), + * so there is no need for atomic (pte|pmd)_mkyoung operations. + */ +static void handle_access_fault(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + kvm_pfn_t pfn; + bool pfn_valid = false; + + trace_kvm_access_fault(fault_ipa); + + spin_lock(&vcpu->kvm->mmu_lock); + + if (!stage2_get_leaf_entry(vcpu->kvm, fault_ipa, &pud, &pmd, &pte)) + goto out; + + if (pud) { /* HugeTLB */ + *pud = kvm_s2pud_mkyoung(*pud); + pfn = kvm_pud_pfn(*pud); + pfn_valid = true; + } else if (pmd) { /* THP, HugeTLB */ + *pmd = pmd_mkyoung(*pmd); + pfn = pmd_pfn(*pmd); + pfn_valid = true; + } else { + *pte = pte_mkyoung(*pte); /* Just a page... */ + pfn = pte_pfn(*pte); + pfn_valid = true; + } + +out: + spin_unlock(&vcpu->kvm->mmu_lock); + if (pfn_valid) + kvm_set_pfn_accessed(pfn); +} + +/** + * kvm_handle_guest_abort - handles all 2nd stage aborts + * @vcpu: the VCPU pointer + * @run: the kvm_run structure + * + * Any abort that gets to the host is almost guaranteed to be caused by a + * missing second stage translation table entry, which can mean that either the + * guest simply needs more memory and we must allocate an appropriate page or it + * can mean that the guest tried to access I/O memory, which is emulated by user + * space. The distinction is based on the IPA causing the fault and whether this + * memory region has been registered as standard RAM by user space. + */ +int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) +{ + unsigned long fault_status; + phys_addr_t fault_ipa; + struct kvm_memory_slot *memslot; + unsigned long hva; + bool is_iabt, write_fault, writable; + gfn_t gfn; + int ret, idx; + + fault_status = kvm_vcpu_trap_get_fault_type(vcpu); + + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); + + /* Synchronous External Abort? */ + if (kvm_vcpu_dabt_isextabt(vcpu)) { + /* + * For RAS the host kernel may handle this abort. + * There is no need to pass the error into the guest. + */ + if (!kvm_handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) + return 1; + + if (unlikely(!is_iabt)) { + kvm_inject_vabt(vcpu); + return 1; + } + } + + trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), + kvm_vcpu_get_hfar(vcpu), fault_ipa); + + /* Check the stage-2 fault is trans. fault or write fault */ + if (fault_status != FSC_FAULT && fault_status != FSC_PERM && + fault_status != FSC_ACCESS) { + kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", + kvm_vcpu_trap_get_class(vcpu), + (unsigned long)kvm_vcpu_trap_get_fault(vcpu), + (unsigned long)kvm_vcpu_get_hsr(vcpu)); + return -EFAULT; + } + + idx = srcu_read_lock(&vcpu->kvm->srcu); + + gfn = fault_ipa >> PAGE_SHIFT; + memslot = gfn_to_memslot(vcpu->kvm, gfn); + hva = gfn_to_hva_memslot_prot(memslot, gfn, &writable); + write_fault = kvm_is_write_fault(vcpu); + if (kvm_is_error_hva(hva) || (write_fault && !writable)) { + if (is_iabt) { + /* Prefetch Abort on I/O address */ + ret = -ENOEXEC; + goto out; + } + + /* + * Check for a cache maintenance operation. Since we + * ended-up here, we know it is outside of any memory + * slot. But we can't find out if that is for a device, + * or if the guest is just being stupid. The only thing + * we know for sure is that this range cannot be cached. + * + * So let's assume that the guest is just being + * cautious, and skip the instruction. + */ + if (kvm_vcpu_dabt_is_cm(vcpu)) { + kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + ret = 1; + goto out_unlock; + } + + /* + * The IPA is reported as [MAX:12], so we need to + * complement it with the bottom 12 bits from the + * faulting VA. This is always 12 bits, irrespective + * of the page size. + */ + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & ((1 << 12) - 1); + ret = io_mem_abort(vcpu, run, fault_ipa); + goto out_unlock; + } + + /* Userspace should not be able to register out-of-bounds IPAs */ + VM_BUG_ON(fault_ipa >= kvm_phys_size(vcpu->kvm)); + + if (fault_status == FSC_ACCESS) { + handle_access_fault(vcpu, fault_ipa); + ret = 1; + goto out_unlock; + } + + ret = user_mem_abort(vcpu, fault_ipa, memslot, hva, fault_status); + if (ret == 0) + ret = 1; +out: + if (ret == -ENOEXEC) { + kvm_inject_pabt(vcpu, kvm_vcpu_get_hfar(vcpu)); + ret = 1; + } +out_unlock: + srcu_read_unlock(&vcpu->kvm->srcu, idx); + return ret; +} + +static int handle_hva_to_gpa(struct kvm *kvm, + unsigned long start, + unsigned long end, + int (*handler)(struct kvm *kvm, + gpa_t gpa, u64 size, + void *data), + void *data) +{ + struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; + int ret = 0; + + slots = kvm_memslots(kvm); + + /* we only care about the pages that the guest sees */ + kvm_for_each_memslot(memslot, slots) { + unsigned long hva_start, hva_end; + gfn_t gpa; + + hva_start = max(start, memslot->userspace_addr); + hva_end = min(end, memslot->userspace_addr + + (memslot->npages << PAGE_SHIFT)); + if (hva_start >= hva_end) + continue; + + gpa = hva_to_gfn_memslot(hva_start, memslot) << PAGE_SHIFT; + ret |= handler(kvm, gpa, (u64)(hva_end - hva_start), data); + } + + return ret; +} + +static int kvm_unmap_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + unmap_stage2_range(kvm, gpa, size); + return 0; +} + +int kvm_unmap_hva_range(struct kvm *kvm, + unsigned long start, unsigned long end) +{ + if (!kvm->arch.pgd) + return 0; + + trace_kvm_unmap_hva_range(start, end); + handle_hva_to_gpa(kvm, start, end, &kvm_unmap_hva_handler, NULL); + return 0; +} + +static int kvm_set_spte_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + pte_t *pte = (pte_t *)data; + + WARN_ON(size != PAGE_SIZE); + /* + * We can always call stage2_set_pte with KVM_S2PTE_FLAG_LOGGING_ACTIVE + * flag clear because MMU notifiers will have unmapped a huge PMD before + * calling ->change_pte() (which in turn calls kvm_set_spte_hva()) and + * therefore stage2_set_pte() never needs to clear out a huge PMD + * through this calling path. + */ + stage2_set_pte(kvm, NULL, gpa, pte, 0); + return 0; +} + + +int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +{ + unsigned long end = hva + PAGE_SIZE; + kvm_pfn_t pfn = pte_pfn(pte); + pte_t stage2_pte; + + if (!kvm->arch.pgd) + return 0; + + trace_kvm_set_spte_hva(hva); + + /* + * We've moved a page around, probably through CoW, so let's treat it + * just like a translation fault and clean the cache to the PoC. + */ + clean_dcache_guest_page(pfn, PAGE_SIZE); + stage2_pte = kvm_pfn_pte(pfn, PAGE_S2); + handle_hva_to_gpa(kvm, hva, end, &kvm_set_spte_handler, &stage2_pte); + + return 0; +} + +static int kvm_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); + if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + return 0; + + if (pud) + return stage2_pudp_test_and_clear_young(pud); + else if (pmd) + return stage2_pmdp_test_and_clear_young(pmd); + else + return stage2_ptep_test_and_clear_young(pte); +} + +static int kvm_test_age_hva_handler(struct kvm *kvm, gpa_t gpa, u64 size, void *data) +{ + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + WARN_ON(size != PAGE_SIZE && size != PMD_SIZE && size != PUD_SIZE); + if (!stage2_get_leaf_entry(kvm, gpa, &pud, &pmd, &pte)) + return 0; + + if (pud) + return kvm_s2pud_young(*pud); + else if (pmd) + return pmd_young(*pmd); + else + return pte_young(*pte); +} + +int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) +{ + if (!kvm->arch.pgd) + return 0; + trace_kvm_age_hva(start, end); + return handle_hva_to_gpa(kvm, start, end, kvm_age_hva_handler, NULL); +} + +int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) +{ + if (!kvm->arch.pgd) + return 0; + trace_kvm_test_age_hva(hva); + return handle_hva_to_gpa(kvm, hva, hva + PAGE_SIZE, + kvm_test_age_hva_handler, NULL); +} + +void kvm_mmu_free_memory_caches(struct kvm_vcpu *vcpu) +{ + mmu_free_memory_cache(&vcpu->arch.mmu_page_cache); +} + +phys_addr_t kvm_mmu_get_httbr(void) +{ + if (__kvm_cpu_uses_extended_idmap()) + return virt_to_phys(merged_hyp_pgd); + else + return virt_to_phys(hyp_pgd); +} + +phys_addr_t kvm_get_idmap_vector(void) +{ + return hyp_idmap_vector; +} + +static int kvm_map_idmap_text(pgd_t *pgd) +{ + int err; + + /* Create the idmap in the boot page tables */ + err = __create_hyp_mappings(pgd, __kvm_idmap_ptrs_per_pgd(), + hyp_idmap_start, hyp_idmap_end, + __phys_to_pfn(hyp_idmap_start), + PAGE_HYP_EXEC); + if (err) + kvm_err("Failed to idmap %lx-%lx\n", + hyp_idmap_start, hyp_idmap_end); + + return err; +} + +int kvm_mmu_init(void) +{ + int err; + + hyp_idmap_start = __pa_symbol(__hyp_idmap_text_start); + hyp_idmap_start = ALIGN_DOWN(hyp_idmap_start, PAGE_SIZE); + hyp_idmap_end = __pa_symbol(__hyp_idmap_text_end); + hyp_idmap_end = ALIGN(hyp_idmap_end, PAGE_SIZE); + hyp_idmap_vector = __pa_symbol(__kvm_hyp_init); + + /* + * We rely on the linker script to ensure at build time that the HYP + * init code does not cross a page boundary. + */ + BUG_ON((hyp_idmap_start ^ (hyp_idmap_end - 1)) & PAGE_MASK); + + kvm_debug("IDMAP page: %lx\n", hyp_idmap_start); + kvm_debug("HYP VA range: %lx:%lx\n", + kern_hyp_va(PAGE_OFFSET), + kern_hyp_va((unsigned long)high_memory - 1)); + + if (hyp_idmap_start >= kern_hyp_va(PAGE_OFFSET) && + hyp_idmap_start < kern_hyp_va((unsigned long)high_memory - 1) && + hyp_idmap_start != (unsigned long)__hyp_idmap_text_start) { + /* + * The idmap page is intersecting with the VA space, + * it is not safe to continue further. + */ + kvm_err("IDMAP intersecting with HYP VA, unable to continue\n"); + err = -EINVAL; + goto out; + } + + hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, hyp_pgd_order); + if (!hyp_pgd) { + kvm_err("Hyp mode PGD not allocated\n"); + err = -ENOMEM; + goto out; + } + + if (__kvm_cpu_uses_extended_idmap()) { + boot_hyp_pgd = (pgd_t *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, + hyp_pgd_order); + if (!boot_hyp_pgd) { + kvm_err("Hyp boot PGD not allocated\n"); + err = -ENOMEM; + goto out; + } + + err = kvm_map_idmap_text(boot_hyp_pgd); + if (err) + goto out; + + merged_hyp_pgd = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); + if (!merged_hyp_pgd) { + kvm_err("Failed to allocate extra HYP pgd\n"); + goto out; + } + __kvm_extend_hypmap(boot_hyp_pgd, hyp_pgd, merged_hyp_pgd, + hyp_idmap_start); + } else { + err = kvm_map_idmap_text(hyp_pgd); + if (err) + goto out; + } + + io_map_base = hyp_idmap_start; + return 0; +out: + free_hyp_pgds(); + return err; +} + +void kvm_arch_commit_memory_region(struct kvm *kvm, + const struct kvm_userspace_memory_region *mem, + struct kvm_memory_slot *old, + const struct kvm_memory_slot *new, + enum kvm_mr_change change) +{ + /* + * At this point memslot has been committed and there is an + * allocated dirty_bitmap[], dirty pages will be tracked while the + * memory slot is write protected. + */ + if (change != KVM_MR_DELETE && mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { + /* + * If we're with initial-all-set, we don't need to write + * protect any pages because they're all reported as dirty. + * Huge pages and normal pages will be write protect gradually. + */ + if (!kvm_dirty_log_manual_protect_and_init_set(kvm)) { + kvm_mmu_wp_memory_region(kvm, mem->slot); + } + } +} + +int kvm_arch_prepare_memory_region(struct kvm *kvm, + struct kvm_memory_slot *memslot, + const struct kvm_userspace_memory_region *mem, + enum kvm_mr_change change) +{ + hva_t hva = mem->userspace_addr; + hva_t reg_end = hva + mem->memory_size; + bool writable = !(mem->flags & KVM_MEM_READONLY); + int ret = 0; + + if (change != KVM_MR_CREATE && change != KVM_MR_MOVE && + change != KVM_MR_FLAGS_ONLY) + return 0; + + /* + * Prevent userspace from creating a memory region outside of the IPA + * space addressable by the KVM guest IPA space. + */ + if (memslot->base_gfn + memslot->npages >= + (kvm_phys_size(kvm) >> PAGE_SHIFT)) + return -EFAULT; + + down_read(¤t->mm->mmap_sem); + /* + * A memory region could potentially cover multiple VMAs, and any holes + * between them, so iterate over all of them to find out if we can map + * any of them right now. + * + * +--------------------------------------------+ + * +---------------+----------------+ +----------------+ + * | : VMA 1 | VMA 2 | | VMA 3 : | + * +---------------+----------------+ +----------------+ + * | memory region | + * +--------------------------------------------+ + */ + do { + struct vm_area_struct *vma = find_vma(current->mm, hva); + hva_t vm_start, vm_end; + + if (!vma || vma->vm_start >= reg_end) + break; + + /* + * Take the intersection of this VMA with the memory region + */ + vm_start = max(hva, vma->vm_start); + vm_end = min(reg_end, vma->vm_end); + + if (vma->vm_flags & VM_PFNMAP) { + gpa_t gpa = mem->guest_phys_addr + + (vm_start - mem->userspace_addr); + phys_addr_t pa; + + pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + pa += vm_start - vma->vm_start; + + /* IO region dirty page logging not allowed */ + if (memslot->flags & KVM_MEM_LOG_DIRTY_PAGES) { + ret = -EINVAL; + goto out; + } + + ret = kvm_phys_addr_ioremap(kvm, gpa, pa, + vm_end - vm_start, + writable); + if (ret) + break; + } + hva = vm_end; + } while (hva < reg_end); + + if (change == KVM_MR_FLAGS_ONLY) + goto out; + + spin_lock(&kvm->mmu_lock); + if (ret) + unmap_stage2_range(kvm, mem->guest_phys_addr, mem->memory_size); + else + stage2_flush_memslot(kvm, memslot); + spin_unlock(&kvm->mmu_lock); +out: + up_read(¤t->mm->mmap_sem); + return ret; +} + +void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) +{ +} + +void kvm_arch_memslots_updated(struct kvm *kvm, u64 gen) +{ +} + +void kvm_arch_flush_shadow_all(struct kvm *kvm) +{ + kvm_free_stage2_pgd(kvm); +} + +void kvm_arch_flush_shadow_memslot(struct kvm *kvm, + struct kvm_memory_slot *slot) +{ + gpa_t gpa = slot->base_gfn << PAGE_SHIFT; + phys_addr_t size = slot->npages << PAGE_SHIFT; + + spin_lock(&kvm->mmu_lock); + unmap_stage2_range(kvm, gpa, size); + spin_unlock(&kvm->mmu_lock); +} + +/* + * See note at ARMv7 ARM B1.14.4 (TL;DR: S/W ops are not easily virtualized). + * + * Main problems: + * - S/W ops are local to a CPU (not broadcast) + * - We have line migration behind our back (speculation) + * - System caches don't support S/W at all (damn!) + * + * In the face of the above, the best we can do is to try and convert + * S/W ops to VA ops. Because the guest is not allowed to infer the + * S/W to PA mapping, it can only use S/W to nuke the whole cache, + * which is a rather good thing for us. + * + * Also, it is only used when turning caches on/off ("The expected + * usage of the cache maintenance instructions that operate by set/way + * is associated with the cache maintenance instructions associated + * with the powerdown and powerup of caches, if this is required by + * the implementation."). + * + * We use the following policy: + * + * - If we trap a S/W operation, we enable VM trapping to detect + * caches being turned on/off, and do a full clean. + * + * - We flush the caches on both caches being turned on and off. + * + * - Once the caches are enabled, we stop trapping VM ops. + */ +void kvm_set_way_flush(struct kvm_vcpu *vcpu) +{ + unsigned long hcr = *vcpu_hcr(vcpu); + + /* + * If this is the first time we do a S/W operation + * (i.e. HCR_TVM not set) flush the whole memory, and set the + * VM trapping. + * + * Otherwise, rely on the VM trapping to wait for the MMU + + * Caches to be turned off. At that point, we'll be able to + * clean the caches again. + */ + if (!(hcr & HCR_TVM)) { + trace_kvm_set_way_flush(*vcpu_pc(vcpu), + vcpu_has_cache_enabled(vcpu)); + stage2_flush_vm(vcpu->kvm); + *vcpu_hcr(vcpu) = hcr | HCR_TVM; + } +} + +void kvm_toggle_cache(struct kvm_vcpu *vcpu, bool was_enabled) +{ + bool now_enabled = vcpu_has_cache_enabled(vcpu); + + /* + * If switching the MMU+caches on, need to invalidate the caches. + * If switching it off, need to clean the caches. + * Clean + invalidate does the trick always. + */ + if (now_enabled != was_enabled) + stage2_flush_vm(vcpu->kvm); + + /* Caches are now on, stop trapping VM ops (until a S/W op) */ + if (now_enabled) + *vcpu_hcr(vcpu) &= ~HCR_TVM; + + trace_kvm_toggle_cache(*vcpu_pc(vcpu), was_enabled, now_enabled); +} diff --git a/arch/arm64/kvm/perf.c b/arch/arm64/kvm/perf.c new file mode 100644 index 000000000000..d45b8b9a4415 --- /dev/null +++ b/arch/arm64/kvm/perf.c @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Based on the x86 implementation. + * + * Copyright (C) 2012 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/perf_event.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_emulate.h> + +static int kvm_is_in_guest(void) +{ + return kvm_get_running_vcpu() != NULL; +} + +static int kvm_is_user_mode(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_running_vcpu(); + + if (vcpu) + return !vcpu_mode_priv(vcpu); + + return 0; +} + +static unsigned long kvm_get_guest_ip(void) +{ + struct kvm_vcpu *vcpu; + + vcpu = kvm_get_running_vcpu(); + + if (vcpu) + return *vcpu_pc(vcpu); + + return 0; +} + +static struct perf_guest_info_callbacks kvm_guest_cbs = { + .is_in_guest = kvm_is_in_guest, + .is_user_mode = kvm_is_user_mode, + .get_guest_ip = kvm_get_guest_ip, +}; + +int kvm_perf_init(void) +{ + return perf_register_guest_info_callbacks(&kvm_guest_cbs); +} + +int kvm_perf_teardown(void) +{ + return perf_unregister_guest_info_callbacks(&kvm_guest_cbs); +} diff --git a/arch/arm64/kvm/pmu-emul.c b/arch/arm64/kvm/pmu-emul.c new file mode 100644 index 000000000000..f0d0312c0a55 --- /dev/null +++ b/arch/arm64/kvm/pmu-emul.c @@ -0,0 +1,869 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015 Linaro Ltd. + * Author: Shannon Zhao <shannon.zhao@linaro.org> + */ + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/perf_event.h> +#include <linux/perf/arm_pmu.h> +#include <linux/uaccess.h> +#include <asm/kvm_emulate.h> +#include <kvm/arm_pmu.h> +#include <kvm/arm_vgic.h> + +static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx); +static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc); + +#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1 + +/** + * kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx) +{ + return (select_idx == ARMV8_PMU_CYCLE_IDX && + __vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC); +} + +static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc) +{ + struct kvm_pmu *pmu; + struct kvm_vcpu_arch *vcpu_arch; + + pmc -= pmc->idx; + pmu = container_of(pmc, struct kvm_pmu, pmc[0]); + vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu); + return container_of(vcpu_arch, struct kvm_vcpu, arch); +} + +/** + * kvm_pmu_pmc_is_chained - determine if the pmc is chained + * @pmc: The PMU counter pointer + */ +static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc) +{ + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); + + return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); +} + +/** + * kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter + * @select_idx: The counter index + */ +static bool kvm_pmu_idx_is_high_counter(u64 select_idx) +{ + return select_idx & 0x1; +} + +/** + * kvm_pmu_get_canonical_pmc - obtain the canonical pmc + * @pmc: The PMU counter pointer + * + * When a pair of PMCs are chained together we use the low counter (canonical) + * to hold the underlying perf event. + */ +static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc) +{ + if (kvm_pmu_pmc_is_chained(pmc) && + kvm_pmu_idx_is_high_counter(pmc->idx)) + return pmc - 1; + + return pmc; +} +static struct kvm_pmc *kvm_pmu_get_alternate_pmc(struct kvm_pmc *pmc) +{ + if (kvm_pmu_idx_is_high_counter(pmc->idx)) + return pmc - 1; + else + return pmc + 1; +} + +/** + * kvm_pmu_idx_has_chain_evtype - determine if the event type is chain + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx) +{ + u64 eventsel, reg; + + select_idx |= 0x1; + + if (select_idx == ARMV8_PMU_CYCLE_IDX) + return false; + + reg = PMEVTYPER0_EL0 + select_idx; + eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT; + + return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN; +} + +/** + * kvm_pmu_get_pair_counter_value - get PMU counter value + * @vcpu: The vcpu pointer + * @pmc: The PMU counter pointer + */ +static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu, + struct kvm_pmc *pmc) +{ + u64 counter, counter_high, reg, enabled, running; + + if (kvm_pmu_pmc_is_chained(pmc)) { + pmc = kvm_pmu_get_canonical_pmc(pmc); + reg = PMEVCNTR0_EL0 + pmc->idx; + + counter = __vcpu_sys_reg(vcpu, reg); + counter_high = __vcpu_sys_reg(vcpu, reg + 1); + + counter = lower_32_bits(counter) | (counter_high << 32); + } else { + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx; + counter = __vcpu_sys_reg(vcpu, reg); + } + + /* + * The real counter value is equal to the value of counter register plus + * the value perf event counts. + */ + if (pmc->perf_event) + counter += perf_event_read_value(pmc->perf_event, &enabled, + &running); + + return counter; +} + +/** + * kvm_pmu_get_counter_value - get PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + */ +u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx) +{ + u64 counter; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc = &pmu->pmc[select_idx]; + + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + + if (kvm_pmu_pmc_is_chained(pmc) && + kvm_pmu_idx_is_high_counter(select_idx)) + counter = upper_32_bits(counter); + else if (select_idx != ARMV8_PMU_CYCLE_IDX) + counter = lower_32_bits(counter); + + return counter; +} + +/** + * kvm_pmu_set_counter_value - set PMU counter value + * @vcpu: The vcpu pointer + * @select_idx: The counter index + * @val: The counter value + */ +void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val) +{ + u64 reg; + + reg = (select_idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx; + __vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx); + + /* Recreate the perf event to reflect the updated sample_period */ + kvm_pmu_create_perf_event(vcpu, select_idx); +} + +/** + * kvm_pmu_release_perf_event - remove the perf event + * @pmc: The PMU counter pointer + */ +static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc) +{ + pmc = kvm_pmu_get_canonical_pmc(pmc); + if (pmc->perf_event) { + perf_event_disable(pmc->perf_event); + perf_event_release_kernel(pmc->perf_event); + pmc->perf_event = NULL; + } +} + +/** + * kvm_pmu_stop_counter - stop PMU counter + * @pmc: The PMU counter pointer + * + * If this counter has been configured to monitor some event, release it here. + */ +static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc) +{ + u64 counter, reg, val; + + pmc = kvm_pmu_get_canonical_pmc(pmc); + if (!pmc->perf_event) + return; + + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + + if (pmc->idx == ARMV8_PMU_CYCLE_IDX) { + reg = PMCCNTR_EL0; + val = counter; + } else { + reg = PMEVCNTR0_EL0 + pmc->idx; + val = lower_32_bits(counter); + } + + __vcpu_sys_reg(vcpu, reg) = val; + + if (kvm_pmu_pmc_is_chained(pmc)) + __vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter); + + kvm_pmu_release_perf_event(pmc); +} + +/** + * kvm_pmu_vcpu_init - assign pmu counter idx for cpu + * @vcpu: The vcpu pointer + * + */ +void kvm_pmu_vcpu_init(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) + pmu->pmc[i].idx = i; +} + +/** + * kvm_pmu_vcpu_reset - reset pmu state for cpu + * @vcpu: The vcpu pointer + * + */ +void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) +{ + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); + struct kvm_pmu *pmu = &vcpu->arch.pmu; + int i; + + for_each_set_bit(i, &mask, 32) + kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]); + + bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS); +} + +/** + * kvm_pmu_vcpu_destroy - free perf event of PMU for cpu + * @vcpu: The vcpu pointer + * + */ +void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + int i; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) + kvm_pmu_release_perf_event(&pmu->pmc[i]); +} + +u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu) +{ + u64 val = __vcpu_sys_reg(vcpu, PMCR_EL0) >> ARMV8_PMU_PMCR_N_SHIFT; + + val &= ARMV8_PMU_PMCR_N_MASK; + if (val == 0) + return BIT(ARMV8_PMU_CYCLE_IDX); + else + return GENMASK(val - 1, 0) | BIT(ARMV8_PMU_CYCLE_IDX); +} + +/** + * kvm_pmu_enable_counter_mask - enable selected PMU counters + * @vcpu: The vcpu pointer + * @val: the value guest writes to PMCNTENSET register + * + * Call perf_event_enable to start counting the perf event + */ +void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) +{ + int i; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc; + + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) || !val) + return; + + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + if (!(val & BIT(i))) + continue; + + pmc = &pmu->pmc[i]; + + /* A change in the enable state may affect the chain state */ + kvm_pmu_update_pmc_chained(vcpu, i); + kvm_pmu_create_perf_event(vcpu, i); + + /* At this point, pmc must be the canonical */ + if (pmc->perf_event) { + perf_event_enable(pmc->perf_event); + if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE) + kvm_debug("fail to enable perf event\n"); + } + } +} + +/** + * kvm_pmu_disable_counter_mask - disable selected PMU counters + * @vcpu: The vcpu pointer + * @val: the value guest writes to PMCNTENCLR register + * + * Call perf_event_disable to stop counting the perf event + */ +void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) +{ + int i; + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc; + + if (!val) + return; + + for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) { + if (!(val & BIT(i))) + continue; + + pmc = &pmu->pmc[i]; + + /* A change in the enable state may affect the chain state */ + kvm_pmu_update_pmc_chained(vcpu, i); + kvm_pmu_create_perf_event(vcpu, i); + + /* At this point, pmc must be the canonical */ + if (pmc->perf_event) + perf_event_disable(pmc->perf_event); + } +} + +static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) +{ + u64 reg = 0; + + if ((__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) { + reg = __vcpu_sys_reg(vcpu, PMOVSSET_EL0); + reg &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + reg &= __vcpu_sys_reg(vcpu, PMINTENSET_EL1); + reg &= kvm_pmu_valid_counter_mask(vcpu); + } + + return reg; +} + +static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + bool overflow; + + if (!kvm_arm_pmu_v3_ready(vcpu)) + return; + + overflow = !!kvm_pmu_overflow_status(vcpu); + if (pmu->irq_level == overflow) + return; + + pmu->irq_level = overflow; + + if (likely(irqchip_in_kernel(vcpu->kvm))) { + int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + pmu->irq_num, overflow, pmu); + WARN_ON(ret); + } +} + +bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_sync_regs *sregs = &vcpu->run->s.regs; + bool run_level = sregs->device_irq_level & KVM_ARM_DEV_PMU; + + if (likely(irqchip_in_kernel(vcpu->kvm))) + return false; + + return pmu->irq_level != run_level; +} + +/* + * Reflect the PMU overflow interrupt output level into the kvm_run structure + */ +void kvm_pmu_update_run(struct kvm_vcpu *vcpu) +{ + struct kvm_sync_regs *regs = &vcpu->run->s.regs; + + /* Populate the timer bitmap for user space */ + regs->device_irq_level &= ~KVM_ARM_DEV_PMU; + if (vcpu->arch.pmu.irq_level) + regs->device_irq_level |= KVM_ARM_DEV_PMU; +} + +/** + * kvm_pmu_flush_hwstate - flush pmu state to cpu + * @vcpu: The vcpu pointer + * + * Check if the PMU has overflowed while we were running in the host, and inject + * an interrupt if that was the case. + */ +void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) +{ + kvm_pmu_update_state(vcpu); +} + +/** + * kvm_pmu_sync_hwstate - sync pmu state from cpu + * @vcpu: The vcpu pointer + * + * Check if the PMU has overflowed while we were running in the guest, and + * inject an interrupt if that was the case. + */ +void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) +{ + kvm_pmu_update_state(vcpu); +} + +/** + * When the perf event overflows, set the overflow status and inform the vcpu. + */ +static void kvm_pmu_perf_overflow(struct perf_event *perf_event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct kvm_pmc *pmc = perf_event->overflow_handler_context; + struct arm_pmu *cpu_pmu = to_arm_pmu(perf_event->pmu); + struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc); + int idx = pmc->idx; + u64 period; + + cpu_pmu->pmu.stop(perf_event, PERF_EF_UPDATE); + + /* + * Reset the sample period to the architectural limit, + * i.e. the point where the counter overflows. + */ + period = -(local64_read(&perf_event->count)); + + if (!kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) + period &= GENMASK(31, 0); + + local64_set(&perf_event->hw.period_left, 0); + perf_event->attr.sample_period = period; + perf_event->hw.sample_period = period; + + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(idx); + + if (kvm_pmu_overflow_status(vcpu)) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + } + + cpu_pmu->pmu.start(perf_event, PERF_EF_RELOAD); +} + +/** + * kvm_pmu_software_increment - do software increment + * @vcpu: The vcpu pointer + * @val: the value guest writes to PMSWINC register + */ +void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + int i; + + if (!(__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) + return; + + /* Weed out disabled counters */ + val &= __vcpu_sys_reg(vcpu, PMCNTENSET_EL0); + + for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++) { + u64 type, reg; + + if (!(val & BIT(i))) + continue; + + /* PMSWINC only applies to ... SW_INC! */ + type = __vcpu_sys_reg(vcpu, PMEVTYPER0_EL0 + i); + type &= ARMV8_PMU_EVTYPE_EVENT; + if (type != ARMV8_PMUV3_PERFCTR_SW_INCR) + continue; + + /* increment this even SW_INC counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) + 1; + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i) = reg; + + if (reg) /* no overflow on the low part */ + continue; + + if (kvm_pmu_pmc_is_chained(&pmu->pmc[i])) { + /* increment the high counter */ + reg = __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) + 1; + reg = lower_32_bits(reg); + __vcpu_sys_reg(vcpu, PMEVCNTR0_EL0 + i + 1) = reg; + if (!reg) /* mark overflow on the high counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i + 1); + } else { + /* mark overflow on low counter */ + __vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= BIT(i); + } + } +} + +/** + * kvm_pmu_handle_pmcr - handle PMCR register + * @vcpu: The vcpu pointer + * @val: the value guest writes to PMCR register + */ +void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) +{ + unsigned long mask = kvm_pmu_valid_counter_mask(vcpu); + int i; + + if (val & ARMV8_PMU_PMCR_E) { + kvm_pmu_enable_counter_mask(vcpu, + __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask); + } else { + kvm_pmu_disable_counter_mask(vcpu, mask); + } + + if (val & ARMV8_PMU_PMCR_C) + kvm_pmu_set_counter_value(vcpu, ARMV8_PMU_CYCLE_IDX, 0); + + if (val & ARMV8_PMU_PMCR_P) { + for_each_set_bit(i, &mask, 32) + kvm_pmu_set_counter_value(vcpu, i, 0); + } +} + +static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx) +{ + return (__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E) && + (__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx)); +} + +/** + * kvm_pmu_create_perf_event - create a perf event for a counter + * @vcpu: The vcpu pointer + * @select_idx: The number of selected counter + */ +static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc; + struct perf_event *event; + struct perf_event_attr attr; + u64 eventsel, counter, reg, data; + + /* + * For chained counters the event type and filtering attributes are + * obtained from the low/even counter. We also use this counter to + * determine if the event is enabled/disabled. + */ + pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]); + + reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx; + data = __vcpu_sys_reg(vcpu, reg); + + kvm_pmu_stop_counter(vcpu, pmc); + eventsel = data & ARMV8_PMU_EVTYPE_EVENT; + + /* Software increment event does't need to be backed by a perf event */ + if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR && + pmc->idx != ARMV8_PMU_CYCLE_IDX) + return; + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.type = PERF_TYPE_RAW; + attr.size = sizeof(attr); + attr.pinned = 1; + attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx); + attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0; + attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0; + attr.exclude_hv = 1; /* Don't count EL2 events */ + attr.exclude_host = 1; /* Don't count host events */ + attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ? + ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel; + + counter = kvm_pmu_get_pair_counter_value(vcpu, pmc); + + if (kvm_pmu_pmc_is_chained(pmc)) { + /** + * The initial sample period (overflow count) of an event. For + * chained counters we only support overflow interrupts on the + * high counter. + */ + attr.sample_period = (-counter) & GENMASK(63, 0); + attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED; + + event = perf_event_create_kernel_counter(&attr, -1, current, + kvm_pmu_perf_overflow, + pmc + 1); + } else { + /* The initial sample period (overflow count) of an event. */ + if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx)) + attr.sample_period = (-counter) & GENMASK(63, 0); + else + attr.sample_period = (-counter) & GENMASK(31, 0); + + event = perf_event_create_kernel_counter(&attr, -1, current, + kvm_pmu_perf_overflow, pmc); + } + + if (IS_ERR(event)) { + pr_err_once("kvm: pmu event creation failed %ld\n", + PTR_ERR(event)); + return; + } + + pmc->perf_event = event; +} + +/** + * kvm_pmu_update_pmc_chained - update chained bitmap + * @vcpu: The vcpu pointer + * @select_idx: The number of selected counter + * + * Update the chained bitmap based on the event type written in the + * typer register and the enable state of the odd register. + */ +static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + struct kvm_pmc *pmc = &pmu->pmc[select_idx], *canonical_pmc; + bool new_state, old_state; + + old_state = kvm_pmu_pmc_is_chained(pmc); + new_state = kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx) && + kvm_pmu_counter_is_enabled(vcpu, pmc->idx | 0x1); + + if (old_state == new_state) + return; + + canonical_pmc = kvm_pmu_get_canonical_pmc(pmc); + kvm_pmu_stop_counter(vcpu, canonical_pmc); + if (new_state) { + /* + * During promotion from !chained to chained we must ensure + * the adjacent counter is stopped and its event destroyed + */ + kvm_pmu_stop_counter(vcpu, kvm_pmu_get_alternate_pmc(pmc)); + set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); + return; + } + clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained); +} + +/** + * kvm_pmu_set_counter_event_type - set selected counter to monitor some event + * @vcpu: The vcpu pointer + * @data: The data guest writes to PMXEVTYPER_EL0 + * @select_idx: The number of selected counter + * + * When OS accesses PMXEVTYPER_EL0, that means it wants to set a PMC to count an + * event with given hardware event number. Here we call perf_event API to + * emulate this action and create a kernel perf event for it. + */ +void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data, + u64 select_idx) +{ + u64 reg, event_type = data & ARMV8_PMU_EVTYPE_MASK; + + reg = (select_idx == ARMV8_PMU_CYCLE_IDX) + ? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx; + + __vcpu_sys_reg(vcpu, reg) = event_type; + + kvm_pmu_update_pmc_chained(vcpu, select_idx); + kvm_pmu_create_perf_event(vcpu, select_idx); +} + +bool kvm_arm_support_pmu_v3(void) +{ + /* + * Check if HW_PERF_EVENTS are supported by checking the number of + * hardware performance counters. This could ensure the presence of + * a physical PMU and CONFIG_PERF_EVENT is selected. + */ + return (perf_num_counters() > 0); +} + +int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) +{ + if (!vcpu->arch.pmu.created) + return 0; + + /* + * A valid interrupt configuration for the PMU is either to have a + * properly configured interrupt number and using an in-kernel + * irqchip, or to not have an in-kernel GIC and not set an IRQ. + */ + if (irqchip_in_kernel(vcpu->kvm)) { + int irq = vcpu->arch.pmu.irq_num; + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -EINVAL; + + /* + * If we are using an in-kernel vgic, at this point we know + * the vgic will be initialized, so we can check the PMU irq + * number against the dimensions of the vgic and make sure + * it's valid. + */ + if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) + return -EINVAL; + } else if (kvm_arm_pmu_irq_initialized(vcpu)) { + return -EINVAL; + } + + kvm_pmu_vcpu_reset(vcpu); + vcpu->arch.pmu.ready = true; + + return 0; +} + +static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) +{ + if (!kvm_arm_support_pmu_v3()) + return -ENODEV; + + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + return -ENXIO; + + if (vcpu->arch.pmu.created) + return -EBUSY; + + if (irqchip_in_kernel(vcpu->kvm)) { + int ret; + + /* + * If using the PMU with an in-kernel virtual GIC + * implementation, we require the GIC to be already + * initialized when initializing the PMU. + */ + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -ENXIO; + + ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, + &vcpu->arch.pmu); + if (ret) + return ret; + } + + vcpu->arch.pmu.created = true; + return 0; +} + +/* + * For one VM the interrupt type must be same for each vcpu. + * As a PPI, the interrupt number is the same for all vcpus, + * while as an SPI it must be a separate number per vcpu. + */ +static bool pmu_irq_is_valid(struct kvm *kvm, int irq) +{ + int i; + struct kvm_vcpu *vcpu; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (!kvm_arm_pmu_irq_initialized(vcpu)) + continue; + + if (irq_is_ppi(irq)) { + if (vcpu->arch.pmu.irq_num != irq) + return false; + } else { + if (vcpu->arch.pmu.irq_num == irq) + return false; + } + } + + return true; +} + +int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_PMU_V3_IRQ: { + int __user *uaddr = (int __user *)(long)attr->addr; + int irq; + + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + return -ENODEV; + + if (get_user(irq, uaddr)) + return -EFAULT; + + /* The PMU overflow interrupt can be a PPI or a valid SPI. */ + if (!(irq_is_ppi(irq) || irq_is_spi(irq))) + return -EINVAL; + + if (!pmu_irq_is_valid(vcpu->kvm, irq)) + return -EINVAL; + + if (kvm_arm_pmu_irq_initialized(vcpu)) + return -EBUSY; + + kvm_debug("Set kvm ARM PMU irq: %d\n", irq); + vcpu->arch.pmu.irq_num = irq; + return 0; + } + case KVM_ARM_VCPU_PMU_V3_INIT: + return kvm_arm_pmu_v3_init(vcpu); + } + + return -ENXIO; +} + +int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_PMU_V3_IRQ: { + int __user *uaddr = (int __user *)(long)attr->addr; + int irq; + + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + return -ENODEV; + + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -ENXIO; + + irq = vcpu->arch.pmu.irq_num; + return put_user(irq, uaddr); + } + } + + return -ENXIO; +} + +int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_PMU_V3_IRQ: + case KVM_ARM_VCPU_PMU_V3_INIT: + if (kvm_arm_support_pmu_v3() && + test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) + return 0; + } + + return -ENXIO; +} diff --git a/arch/arm64/kvm/psci.c b/arch/arm64/kvm/psci.c new file mode 100644 index 000000000000..83415e96b589 --- /dev/null +++ b/arch/arm64/kvm/psci.c @@ -0,0 +1,564 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2012 - ARM Ltd + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/arm-smccc.h> +#include <linux/preempt.h> +#include <linux/kvm_host.h> +#include <linux/uaccess.h> +#include <linux/wait.h> + +#include <asm/cputype.h> +#include <asm/kvm_emulate.h> + +#include <kvm/arm_psci.h> +#include <kvm/arm_hypercalls.h> + +/* + * This is an implementation of the Power State Coordination Interface + * as described in ARM document number ARM DEN 0022A. + */ + +#define AFFINITY_MASK(level) ~((0x1UL << ((level) * MPIDR_LEVEL_BITS)) - 1) + +static unsigned long psci_affinity_mask(unsigned long affinity_level) +{ + if (affinity_level <= 3) + return MPIDR_HWID_BITMASK & AFFINITY_MASK(affinity_level); + + return 0; +} + +static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) +{ + /* + * NOTE: For simplicity, we make VCPU suspend emulation to be + * same-as WFI (Wait-for-interrupt) emulation. + * + * This means for KVM the wakeup events are interrupts and + * this is consistent with intended use of StateID as described + * in section 5.4.1 of PSCI v0.2 specification (ARM DEN 0022A). + * + * Further, we also treat power-down request to be same as + * stand-by request as-per section 5.4.2 clause 3 of PSCI v0.2 + * specification (ARM DEN 0022A). This means all suspend states + * for KVM will preserve the register state. + */ + kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); + + return PSCI_RET_SUCCESS; +} + +static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_SLEEP, vcpu); + kvm_vcpu_kick(vcpu); +} + +static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) +{ + struct vcpu_reset_state *reset_state; + struct kvm *kvm = source_vcpu->kvm; + struct kvm_vcpu *vcpu = NULL; + unsigned long cpu_id; + + cpu_id = smccc_get_arg1(source_vcpu) & MPIDR_HWID_BITMASK; + if (vcpu_mode_is_32bit(source_vcpu)) + cpu_id &= ~((u32) 0); + + vcpu = kvm_mpidr_to_vcpu(kvm, cpu_id); + + /* + * Make sure the caller requested a valid CPU and that the CPU is + * turned off. + */ + if (!vcpu) + return PSCI_RET_INVALID_PARAMS; + if (!vcpu->arch.power_off) { + if (kvm_psci_version(source_vcpu, kvm) != KVM_ARM_PSCI_0_1) + return PSCI_RET_ALREADY_ON; + else + return PSCI_RET_INVALID_PARAMS; + } + + reset_state = &vcpu->arch.reset_state; + + reset_state->pc = smccc_get_arg2(source_vcpu); + + /* Propagate caller endianness */ + reset_state->be = kvm_vcpu_is_be(source_vcpu); + + /* + * NOTE: We always update r0 (or x0) because for PSCI v0.1 + * the general purpose registers are undefined upon CPU_ON. + */ + reset_state->r0 = smccc_get_arg3(source_vcpu); + + WRITE_ONCE(reset_state->reset, true); + kvm_make_request(KVM_REQ_VCPU_RESET, vcpu); + + /* + * Make sure the reset request is observed if the change to + * power_state is observed. + */ + smp_wmb(); + + vcpu->arch.power_off = false; + kvm_vcpu_wake_up(vcpu); + + return PSCI_RET_SUCCESS; +} + +static unsigned long kvm_psci_vcpu_affinity_info(struct kvm_vcpu *vcpu) +{ + int i, matching_cpus = 0; + unsigned long mpidr; + unsigned long target_affinity; + unsigned long target_affinity_mask; + unsigned long lowest_affinity_level; + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *tmp; + + target_affinity = smccc_get_arg1(vcpu); + lowest_affinity_level = smccc_get_arg2(vcpu); + + /* Determine target affinity mask */ + target_affinity_mask = psci_affinity_mask(lowest_affinity_level); + if (!target_affinity_mask) + return PSCI_RET_INVALID_PARAMS; + + /* Ignore other bits of target affinity */ + target_affinity &= target_affinity_mask; + + /* + * If one or more VCPU matching target affinity are running + * then ON else OFF + */ + kvm_for_each_vcpu(i, tmp, kvm) { + mpidr = kvm_vcpu_get_mpidr_aff(tmp); + if ((mpidr & target_affinity_mask) == target_affinity) { + matching_cpus++; + if (!tmp->arch.power_off) + return PSCI_0_2_AFFINITY_LEVEL_ON; + } + } + + if (!matching_cpus) + return PSCI_RET_INVALID_PARAMS; + + return PSCI_0_2_AFFINITY_LEVEL_OFF; +} + +static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) +{ + int i; + struct kvm_vcpu *tmp; + + /* + * The KVM ABI specifies that a system event exit may call KVM_RUN + * again and may perform shutdown/reboot at a later time that when the + * actual request is made. Since we are implementing PSCI and a + * caller of PSCI reboot and shutdown expects that the system shuts + * down or reboots immediately, let's make sure that VCPUs are not run + * after this call is handled and before the VCPUs have been + * re-initialized. + */ + kvm_for_each_vcpu(i, tmp, vcpu->kvm) + tmp->arch.power_off = true; + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); + + memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); + vcpu->run->system_event.type = type; + vcpu->run->exit_reason = KVM_EXIT_SYSTEM_EVENT; +} + +static void kvm_psci_system_off(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_SHUTDOWN); +} + +static void kvm_psci_system_reset(struct kvm_vcpu *vcpu) +{ + kvm_prepare_system_event(vcpu, KVM_SYSTEM_EVENT_RESET); +} + +static void kvm_psci_narrow_to_32bit(struct kvm_vcpu *vcpu) +{ + int i; + + /* + * Zero the input registers' upper 32 bits. They will be fully + * zeroed on exit, so we're fine changing them in place. + */ + for (i = 1; i < 4; i++) + vcpu_set_reg(vcpu, i, lower_32_bits(vcpu_get_reg(vcpu, i))); +} + +static unsigned long kvm_psci_check_allowed_function(struct kvm_vcpu *vcpu, u32 fn) +{ + switch(fn) { + case PSCI_0_2_FN64_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_ON: + case PSCI_0_2_FN64_AFFINITY_INFO: + /* Disallow these functions for 32bit guests */ + if (vcpu_mode_is_32bit(vcpu)) + return PSCI_RET_NOT_SUPPORTED; + break; + } + + return 0; +} + +static int kvm_psci_0_2_call(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + u32 psci_fn = smccc_get_function(vcpu); + unsigned long val; + int ret = 1; + + val = kvm_psci_check_allowed_function(vcpu, psci_fn); + if (val) + goto out; + + switch (psci_fn) { + case PSCI_0_2_FN_PSCI_VERSION: + /* + * Bits[31:16] = Major Version = 0 + * Bits[15:0] = Minor Version = 2 + */ + val = KVM_ARM_PSCI_0_2; + break; + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + val = kvm_psci_vcpu_suspend(vcpu); + break; + case PSCI_0_2_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = PSCI_RET_SUCCESS; + break; + case PSCI_0_2_FN_CPU_ON: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; + case PSCI_0_2_FN64_CPU_ON: + mutex_lock(&kvm->lock); + val = kvm_psci_vcpu_on(vcpu); + mutex_unlock(&kvm->lock); + break; + case PSCI_0_2_FN_AFFINITY_INFO: + kvm_psci_narrow_to_32bit(vcpu); + fallthrough; + case PSCI_0_2_FN64_AFFINITY_INFO: + val = kvm_psci_vcpu_affinity_info(vcpu); + break; + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + /* + * Trusted OS is MP hence does not require migration + * or + * Trusted OS is not present + */ + val = PSCI_0_2_TOS_MP; + break; + case PSCI_0_2_FN_SYSTEM_OFF: + kvm_psci_system_off(vcpu); + /* + * We shouldn't be going back to guest VCPU after + * receiving SYSTEM_OFF request. + * + * If user space accidentally/deliberately resumes + * guest VCPU after SYSTEM_OFF request then guest + * VCPU should see internal failure from PSCI return + * value. To achieve this, we preload r0 (or x0) with + * PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + case PSCI_0_2_FN_SYSTEM_RESET: + kvm_psci_system_reset(vcpu); + /* + * Same reason as SYSTEM_OFF for preloading r0 (or x0) + * with PSCI return value INTERNAL_FAILURE. + */ + val = PSCI_RET_INTERNAL_FAILURE; + ret = 0; + break; + default: + val = PSCI_RET_NOT_SUPPORTED; + break; + } + +out: + smccc_set_retval(vcpu, val, 0, 0, 0); + return ret; +} + +static int kvm_psci_1_0_call(struct kvm_vcpu *vcpu) +{ + u32 psci_fn = smccc_get_function(vcpu); + u32 feature; + unsigned long val; + int ret = 1; + + switch(psci_fn) { + case PSCI_0_2_FN_PSCI_VERSION: + val = KVM_ARM_PSCI_1_0; + break; + case PSCI_1_0_FN_PSCI_FEATURES: + feature = smccc_get_arg1(vcpu); + val = kvm_psci_check_allowed_function(vcpu, feature); + if (val) + break; + + switch(feature) { + case PSCI_0_2_FN_PSCI_VERSION: + case PSCI_0_2_FN_CPU_SUSPEND: + case PSCI_0_2_FN64_CPU_SUSPEND: + case PSCI_0_2_FN_CPU_OFF: + case PSCI_0_2_FN_CPU_ON: + case PSCI_0_2_FN64_CPU_ON: + case PSCI_0_2_FN_AFFINITY_INFO: + case PSCI_0_2_FN64_AFFINITY_INFO: + case PSCI_0_2_FN_MIGRATE_INFO_TYPE: + case PSCI_0_2_FN_SYSTEM_OFF: + case PSCI_0_2_FN_SYSTEM_RESET: + case PSCI_1_0_FN_PSCI_FEATURES: + case ARM_SMCCC_VERSION_FUNC_ID: + val = 0; + break; + default: + val = PSCI_RET_NOT_SUPPORTED; + break; + } + break; + default: + return kvm_psci_0_2_call(vcpu); + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return ret; +} + +static int kvm_psci_0_1_call(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + u32 psci_fn = smccc_get_function(vcpu); + unsigned long val; + + switch (psci_fn) { + case KVM_PSCI_FN_CPU_OFF: + kvm_psci_vcpu_off(vcpu); + val = PSCI_RET_SUCCESS; + break; + case KVM_PSCI_FN_CPU_ON: + mutex_lock(&kvm->lock); + val = kvm_psci_vcpu_on(vcpu); + mutex_unlock(&kvm->lock); + break; + default: + val = PSCI_RET_NOT_SUPPORTED; + break; + } + + smccc_set_retval(vcpu, val, 0, 0, 0); + return 1; +} + +/** + * kvm_psci_call - handle PSCI call if r0 value is in range + * @vcpu: Pointer to the VCPU struct + * + * Handle PSCI calls from guests through traps from HVC instructions. + * The calling convention is similar to SMC calls to the secure world + * where the function number is placed in r0. + * + * This function returns: > 0 (success), 0 (success but exit to user + * space), and < 0 (errors) + * + * Errors: + * -EINVAL: Unrecognized PSCI function + */ +int kvm_psci_call(struct kvm_vcpu *vcpu) +{ + switch (kvm_psci_version(vcpu, vcpu->kvm)) { + case KVM_ARM_PSCI_1_0: + return kvm_psci_1_0_call(vcpu); + case KVM_ARM_PSCI_0_2: + return kvm_psci_0_2_call(vcpu); + case KVM_ARM_PSCI_0_1: + return kvm_psci_0_1_call(vcpu); + default: + return -EINVAL; + }; +} + +int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu) +{ + return 3; /* PSCI version and two workaround registers */ +} + +int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) +{ + if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++)) + return -EFAULT; + + if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++)) + return -EFAULT; + + if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++)) + return -EFAULT; + + return 0; +} + +#define KVM_REG_FEATURE_LEVEL_WIDTH 4 +#define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1) + +/* + * Convert the workaround level into an easy-to-compare number, where higher + * values mean better protection. + */ +static int get_kernel_wa_level(u64 regid) +{ + switch (regid) { + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + switch (kvm_arm_harden_branch_predictor()) { + case KVM_BP_HARDEN_UNKNOWN: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; + case KVM_BP_HARDEN_WA_NEEDED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL; + case KVM_BP_HARDEN_NOT_REQUIRED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED; + } + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + switch (kvm_arm_have_ssbd()) { + case KVM_SSBD_FORCE_DISABLE: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL; + case KVM_SSBD_KERNEL: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL; + case KVM_SSBD_FORCE_ENABLE: + case KVM_SSBD_MITIGATED: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED; + case KVM_SSBD_UNKNOWN: + default: + return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN; + } + } + + return -EINVAL; +} + +int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + + switch (reg->id) { + case KVM_REG_ARM_PSCI_VERSION: + val = kvm_psci_version(vcpu, vcpu->kvm); + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK; + + if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL && + kvm_arm_get_vcpu_workaround_2_flag(vcpu)) + val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED; + break; + default: + return -ENOENT; + } + + if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + return 0; +} + +int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg) +{ + void __user *uaddr = (void __user *)(long)reg->addr; + u64 val; + int wa_level; + + if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id))) + return -EFAULT; + + switch (reg->id) { + case KVM_REG_ARM_PSCI_VERSION: + { + bool wants_02; + + wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features); + + switch (val) { + case KVM_ARM_PSCI_0_1: + if (wants_02) + return -EINVAL; + vcpu->kvm->arch.psci_version = val; + return 0; + case KVM_ARM_PSCI_0_2: + case KVM_ARM_PSCI_1_0: + if (!wants_02) + return -EINVAL; + vcpu->kvm->arch.psci_version = val; + return 0; + } + break; + } + + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1: + if (val & ~KVM_REG_FEATURE_LEVEL_MASK) + return -EINVAL; + + if (get_kernel_wa_level(reg->id) < val) + return -EINVAL; + + return 0; + + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2: + if (val & ~(KVM_REG_FEATURE_LEVEL_MASK | + KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED)) + return -EINVAL; + + wa_level = val & KVM_REG_FEATURE_LEVEL_MASK; + + if (get_kernel_wa_level(reg->id) < wa_level) + return -EINVAL; + + /* The enabled bit must not be set unless the level is AVAIL. */ + if (wa_level != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL && + wa_level != val) + return -EINVAL; + + /* Are we finished or do we need to check the enable bit ? */ + if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL) + return 0; + + /* + * If this kernel supports the workaround to be switched on + * or off, make sure it matches the requested setting. + */ + switch (wa_level) { + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: + kvm_arm_set_vcpu_workaround_2_flag(vcpu, + val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED); + break; + case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: + kvm_arm_set_vcpu_workaround_2_flag(vcpu, true); + break; + } + + return 0; + default: + return -ENOENT; + } + + return -EINVAL; +} diff --git a/arch/arm64/kvm/pvtime.c b/arch/arm64/kvm/pvtime.c new file mode 100644 index 000000000000..1e0f4c284888 --- /dev/null +++ b/arch/arm64/kvm/pvtime.c @@ -0,0 +1,131 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2019 Arm Ltd. + +#include <linux/arm-smccc.h> +#include <linux/kvm_host.h> + +#include <asm/kvm_mmu.h> +#include <asm/pvclock-abi.h> + +#include <kvm/arm_hypercalls.h> + +void kvm_update_stolen_time(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + u64 steal; + __le64 steal_le; + u64 offset; + int idx; + u64 base = vcpu->arch.steal.base; + + if (base == GPA_INVALID) + return; + + /* Let's do the local bookkeeping */ + steal = vcpu->arch.steal.steal; + steal += current->sched_info.run_delay - vcpu->arch.steal.last_steal; + vcpu->arch.steal.last_steal = current->sched_info.run_delay; + vcpu->arch.steal.steal = steal; + + steal_le = cpu_to_le64(steal); + idx = srcu_read_lock(&kvm->srcu); + offset = offsetof(struct pvclock_vcpu_stolen_time, stolen_time); + kvm_put_guest(kvm, base + offset, steal_le, u64); + srcu_read_unlock(&kvm->srcu, idx); +} + +long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu) +{ + u32 feature = smccc_get_arg1(vcpu); + long val = SMCCC_RET_NOT_SUPPORTED; + + switch (feature) { + case ARM_SMCCC_HV_PV_TIME_FEATURES: + case ARM_SMCCC_HV_PV_TIME_ST: + val = SMCCC_RET_SUCCESS; + break; + } + + return val; +} + +gpa_t kvm_init_stolen_time(struct kvm_vcpu *vcpu) +{ + struct pvclock_vcpu_stolen_time init_values = {}; + struct kvm *kvm = vcpu->kvm; + u64 base = vcpu->arch.steal.base; + int idx; + + if (base == GPA_INVALID) + return base; + + /* + * Start counting stolen time from the time the guest requests + * the feature enabled. + */ + vcpu->arch.steal.steal = 0; + vcpu->arch.steal.last_steal = current->sched_info.run_delay; + + idx = srcu_read_lock(&kvm->srcu); + kvm_write_guest(kvm, base, &init_values, sizeof(init_values)); + srcu_read_unlock(&kvm->srcu, idx); + + return base; +} + +int kvm_arm_pvtime_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + struct kvm *kvm = vcpu->kvm; + u64 ipa; + int ret = 0; + int idx; + + if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) + return -ENXIO; + + if (get_user(ipa, user)) + return -EFAULT; + if (!IS_ALIGNED(ipa, 64)) + return -EINVAL; + if (vcpu->arch.steal.base != GPA_INVALID) + return -EEXIST; + + /* Check the address is in a valid memslot */ + idx = srcu_read_lock(&kvm->srcu); + if (kvm_is_error_hva(gfn_to_hva(kvm, ipa >> PAGE_SHIFT))) + ret = -EINVAL; + srcu_read_unlock(&kvm->srcu, idx); + + if (!ret) + vcpu->arch.steal.base = ipa; + + return ret; +} + +int kvm_arm_pvtime_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + u64 __user *user = (u64 __user *)attr->addr; + u64 ipa; + + if (attr->attr != KVM_ARM_VCPU_PVTIME_IPA) + return -ENXIO; + + ipa = vcpu->arch.steal.base; + + if (put_user(ipa, user)) + return -EFAULT; + return 0; +} + +int kvm_arm_pvtime_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_PVTIME_IPA: + return 0; + } + return -ENXIO; +} diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 30b7ea680f66..d3b209023727 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -36,23 +36,11 @@ static u32 kvm_ipa_limit; /* * ARMv8 Reset Values */ -static const struct kvm_regs default_regs_reset = { - .regs.pstate = (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | - PSR_F_BIT | PSR_D_BIT), -}; +#define VCPU_RESET_PSTATE_EL1 (PSR_MODE_EL1h | PSR_A_BIT | PSR_I_BIT | \ + PSR_F_BIT | PSR_D_BIT) -static const struct kvm_regs default_regs_reset32 = { - .regs.pstate = (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | - PSR_AA32_I_BIT | PSR_AA32_F_BIT), -}; - -static bool cpu_has_32bit_el1(void) -{ - u64 pfr0; - - pfr0 = read_sanitised_ftr_reg(SYS_ID_AA64PFR0_EL1); - return !!(pfr0 & 0x20); -} +#define VCPU_RESET_PSTATE_SVC (PSR_AA32_MODE_SVC | PSR_AA32_A_BIT | \ + PSR_AA32_I_BIT | PSR_AA32_F_BIT) /** * kvm_arch_vm_ioctl_check_extension @@ -66,7 +54,7 @@ int kvm_arch_vm_ioctl_check_extension(struct kvm *kvm, long ext) switch (ext) { case KVM_CAP_ARM_EL1_32BIT: - r = cpu_has_32bit_el1(); + r = cpus_have_const_cap(ARM64_HAS_32BIT_EL1); break; case KVM_CAP_GUEST_DEBUG_HW_BPS: r = get_num_brps(); @@ -163,7 +151,7 @@ static int kvm_vcpu_finalize_sve(struct kvm_vcpu *vcpu) vl = vcpu->arch.sve_max_vl; /* - * Resposibility for these properties is shared between + * Responsibility for these properties is shared between * kvm_arm_init_arch_resources(), kvm_vcpu_enable_sve() and * set_sve_vls(). Double-check here just to be sure: */ @@ -249,7 +237,7 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) * ioctl or as part of handling a request issued by another VCPU in the PSCI * handling code. In the first case, the VCPU will not be loaded, and in the * second case the VCPU will be loaded. Because this function operates purely - * on the memory-backed valus of system registers, we want to do a full put if + * on the memory-backed values of system registers, we want to do a full put if * we were loaded (handling a request) and load the values back at the end of * the function. Otherwise we leave the state alone. In both cases, we * disable preemption around the vcpu reset as we would otherwise race with @@ -257,9 +245,9 @@ static int kvm_vcpu_enable_ptrauth(struct kvm_vcpu *vcpu) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - const struct kvm_regs *cpu_reset; int ret = -EINVAL; bool loaded; + u32 pstate; /* Reset PMU outside of the non-preemptible section */ kvm_pmu_vcpu_reset(vcpu); @@ -288,18 +276,19 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) switch (vcpu->arch.target) { default: if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) { - if (!cpu_has_32bit_el1()) + if (!cpus_have_const_cap(ARM64_HAS_32BIT_EL1)) goto out; - cpu_reset = &default_regs_reset32; + pstate = VCPU_RESET_PSTATE_SVC; } else { - cpu_reset = &default_regs_reset; + pstate = VCPU_RESET_PSTATE_EL1; } break; } /* Reset core registers */ - memcpy(vcpu_gp_regs(vcpu), cpu_reset, sizeof(*cpu_reset)); + memset(vcpu_gp_regs(vcpu), 0, sizeof(*vcpu_gp_regs(vcpu))); + vcpu_gp_regs(vcpu)->regs.pstate = pstate; /* Reset system registers */ kvm_reset_sys_regs(vcpu); @@ -340,11 +329,50 @@ out: return ret; } -void kvm_set_ipa_limit(void) +u32 get_kvm_ipa_limit(void) +{ + return kvm_ipa_limit; +} + +int kvm_set_ipa_limit(void) { - unsigned int ipa_max, pa_max, va_max, parange; + unsigned int ipa_max, pa_max, va_max, parange, tgran_2; + u64 mmfr0; + + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + parange = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_PARANGE_SHIFT); + + /* + * Check with ARMv8.5-GTG that our PAGE_SIZE is supported at + * Stage-2. If not, things will stop very quickly. + */ + switch (PAGE_SIZE) { + default: + case SZ_4K: + tgran_2 = ID_AA64MMFR0_TGRAN4_2_SHIFT; + break; + case SZ_16K: + tgran_2 = ID_AA64MMFR0_TGRAN16_2_SHIFT; + break; + case SZ_64K: + tgran_2 = ID_AA64MMFR0_TGRAN64_2_SHIFT; + break; + } + + switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) { + default: + case 1: + kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n"); + return -EINVAL; + case 0: + kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n"); + break; + case 2: + kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n"); + break; + } - parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 0x7; pa_max = id_aa64mmfr0_parange_to_phys_shift(parange); /* Clamp the IPA limit to the PA size supported by the kernel */ @@ -357,7 +385,7 @@ void kvm_set_ipa_limit(void) * * So clamp the ipa limit further down to limit the number of levels. * Since we can concatenate upto 16 tables at entry level, we could - * go upto 4bits above the maximum VA addressible with the current + * go upto 4bits above the maximum VA addressable with the current * number of levels. */ va_max = PGDIR_SHIFT + PAGE_SHIFT - 3; @@ -378,6 +406,8 @@ void kvm_set_ipa_limit(void) "KVM IPA limit (%d bit) is smaller than default size\n", ipa_max); kvm_ipa_limit = ipa_max; kvm_info("IPA Size Limit: %dbits\n", kvm_ipa_limit); + + return 0; } /* @@ -390,7 +420,7 @@ void kvm_set_ipa_limit(void) */ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) { - u64 vtcr = VTCR_EL2_FLAGS; + u64 vtcr = VTCR_EL2_FLAGS, mmfr0; u32 parange, phys_shift; u8 lvls; @@ -406,7 +436,9 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type) phys_shift = KVM_PHYS_SHIFT; } - parange = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1) & 7; + mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); + parange = cpuid_feature_extract_unsigned_field(mmfr0, + ID_AA64MMFR0_PARANGE_SHIFT); if (parange > ID_AA64MMFR0_PARANGE_MAX) parange = ID_AA64MMFR0_PARANGE_MAX; vtcr |= parange << VTCR_EL2_PS_SHIFT; diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 51db934702b6..80985439bfb2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -34,7 +34,7 @@ #include "trace.h" /* - * All of this file is extremly similar to the ARM coproc.c, but the + * All of this file is extremely similar to the ARM coproc.c, but the * types are different. My gut feeling is that it should be pretty * easy to merge, but that would be an ABI breakage -- again. VFP * would also need to be abstracted. @@ -64,11 +64,8 @@ static bool write_to_read_only(struct kvm_vcpu *vcpu, return false; } -u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +static bool __vcpu_read_sys_reg_from_cpu(int reg, u64 *val) { - if (!vcpu->arch.sysregs_loaded_on_cpu) - goto immediate_read; - /* * System registers listed in the switch are not saved on every * exit from the guest but are only saved on vcpu_put. @@ -79,75 +76,92 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) * thread when emulating cross-VCPU communication. */ switch (reg) { - case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1); - case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12); - case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1); - case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12); - case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12); - case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12); - case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12); - case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12); - case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12); - case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12); - case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12); - case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12); - case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12); - case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12); - case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0); - case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0); - case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1); - case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12); - case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12); - case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1); - case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2); - case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2); - case DBGVCR32_EL2: return read_sysreg_s(SYS_DBGVCR32_EL2); + case CSSELR_EL1: *val = read_sysreg_s(SYS_CSSELR_EL1); break; + case SCTLR_EL1: *val = read_sysreg_s(SYS_SCTLR_EL12); break; + case ACTLR_EL1: *val = read_sysreg_s(SYS_ACTLR_EL1); break; + case CPACR_EL1: *val = read_sysreg_s(SYS_CPACR_EL12); break; + case TTBR0_EL1: *val = read_sysreg_s(SYS_TTBR0_EL12); break; + case TTBR1_EL1: *val = read_sysreg_s(SYS_TTBR1_EL12); break; + case TCR_EL1: *val = read_sysreg_s(SYS_TCR_EL12); break; + case ESR_EL1: *val = read_sysreg_s(SYS_ESR_EL12); break; + case AFSR0_EL1: *val = read_sysreg_s(SYS_AFSR0_EL12); break; + case AFSR1_EL1: *val = read_sysreg_s(SYS_AFSR1_EL12); break; + case FAR_EL1: *val = read_sysreg_s(SYS_FAR_EL12); break; + case MAIR_EL1: *val = read_sysreg_s(SYS_MAIR_EL12); break; + case VBAR_EL1: *val = read_sysreg_s(SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: *val = read_sysreg_s(SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: *val = read_sysreg_s(SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: *val = read_sysreg_s(SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: *val = read_sysreg_s(SYS_TPIDR_EL1); break; + case AMAIR_EL1: *val = read_sysreg_s(SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: *val = read_sysreg_s(SYS_CNTKCTL_EL12); break; + case PAR_EL1: *val = read_sysreg_s(SYS_PAR_EL1); break; + case DACR32_EL2: *val = read_sysreg_s(SYS_DACR32_EL2); break; + case IFSR32_EL2: *val = read_sysreg_s(SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: *val = read_sysreg_s(SYS_DBGVCR32_EL2); break; + default: return false; } -immediate_read: - return __vcpu_sys_reg(vcpu, reg); + return true; } -void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +static bool __vcpu_write_sys_reg_to_cpu(u64 val, int reg) { - if (!vcpu->arch.sysregs_loaded_on_cpu) - goto immediate_write; - /* * System registers listed in the switch are not restored on every * entry to the guest but are only restored on vcpu_load. * * Note that MPIDR_EL1 for the guest is set by KVM via VMPIDR_EL2 but - * should never be listed below, because the the MPIDR should only be - * set once, before running the VCPU, and never changed later. + * should never be listed below, because the MPIDR should only be set + * once, before running the VCPU, and never changed later. */ switch (reg) { - case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return; - case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return; - case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return; - case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return; - case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return; - case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return; - case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return; - case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return; - case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return; - case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return; - case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return; - case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return; - case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return; - case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return; - case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return; - case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return; - case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return; - case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return; - case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return; - case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return; - case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return; - case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return; - case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); return; + case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); break; + case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); break; + case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); break; + case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); break; + case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); break; + case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); break; + case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); break; + case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); break; + case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); break; + case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); break; + case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); break; + case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); break; + case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); break; + case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12);break; + case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); break; + case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); break; + case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); break; + case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); break; + case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); break; + case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); break; + case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); break; + case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); break; + case DBGVCR32_EL2: write_sysreg_s(val, SYS_DBGVCR32_EL2); break; + default: return false; } -immediate_write: + return true; +} + +u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg) +{ + u64 val = 0x8badf00d8badf00d; + + if (vcpu->arch.sysregs_loaded_on_cpu && + __vcpu_read_sys_reg_from_cpu(reg, &val)) + return val; + + return __vcpu_sys_reg(vcpu, reg); +} + +void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg) +{ + if (vcpu->arch.sysregs_loaded_on_cpu && + __vcpu_write_sys_reg_to_cpu(val, reg)) + return; + __vcpu_sys_reg(vcpu, reg) = val; } @@ -1456,9 +1470,9 @@ static const struct sys_reg_desc sys_reg_descs[] = { ID_SANITISED(MVFR1_EL1), ID_SANITISED(MVFR2_EL1), ID_UNALLOCATED(3,3), - ID_UNALLOCATED(3,4), - ID_UNALLOCATED(3,5), - ID_UNALLOCATED(3,6), + ID_SANITISED(ID_PFR2_EL1), + ID_HIDDEN(ID_DFR1_EL1), + ID_SANITISED(ID_MMFR5_EL1), ID_UNALLOCATED(3,7), /* AArch64 ID registers */ @@ -1532,7 +1546,7 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PAR_EL1), NULL, reset_unknown, PAR_EL1 }, { SYS_DESC(SYS_PMINTENSET_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, - { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, NULL, PMINTENSET_EL1 }, + { SYS_DESC(SYS_PMINTENCLR_EL1), access_pminten, reset_unknown, PMINTENSET_EL1 }, { SYS_DESC(SYS_MAIR_EL1), access_vm_reg, reset_unknown, MAIR_EL1 }, { SYS_DESC(SYS_AMAIR_EL1), access_vm_reg, reset_amair_el1, AMAIR_EL1 }, @@ -1571,8 +1585,8 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_PMCR_EL0), access_pmcr, reset_pmcr, PMCR_EL0 }, { SYS_DESC(SYS_PMCNTENSET_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, - { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, NULL, PMCNTENSET_EL0 }, - { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, NULL, PMOVSSET_EL0 }, + { SYS_DESC(SYS_PMCNTENCLR_EL0), access_pmcnten, reset_unknown, PMCNTENSET_EL0 }, + { SYS_DESC(SYS_PMOVSCLR_EL0), access_pmovs, reset_unknown, PMOVSSET_EL0 }, { SYS_DESC(SYS_PMSWINC_EL0), access_pmswinc, reset_unknown, PMSWINC_EL0 }, { SYS_DESC(SYS_PMSELR_EL0), access_pmselr, reset_unknown, PMSELR_EL0 }, { SYS_DESC(SYS_PMCEID0_EL0), access_pmceid }, @@ -2073,12 +2087,37 @@ static const struct sys_reg_desc cp15_64_regs[] = { { SYS_DESC(SYS_AARCH32_CNTP_CVAL), access_arch_timer }, }; +static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n, + bool is_32) +{ + unsigned int i; + + for (i = 0; i < n; i++) { + if (!is_32 && table[i].reg && !table[i].reset) { + kvm_err("sys_reg table %p entry %d has lacks reset\n", + table, i); + return 1; + } + + if (i && cmp_sys_reg(&table[i-1], &table[i]) >= 0) { + kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); + return 1; + } + } + + return 0; +} + /* Target specific emulation tables */ static struct kvm_sys_reg_target_table *target_tables[KVM_ARM_NUM_TARGETS]; void kvm_register_target_sys_reg_table(unsigned int target, struct kvm_sys_reg_target_table *table) { + if (check_sysreg_table(table->table64.table, table->table64.num, false) || + check_sysreg_table(table->table32.table, table->table32.num, true)) + return; + target_tables[target] = table; } @@ -2364,19 +2403,13 @@ static int emulate_sys_reg(struct kvm_vcpu *vcpu, } static void reset_sys_reg_descs(struct kvm_vcpu *vcpu, - const struct sys_reg_desc *table, size_t num, - unsigned long *bmap) + const struct sys_reg_desc *table, size_t num) { unsigned long i; for (i = 0; i < num; i++) - if (table[i].reset) { - int reg = table[i].reg; - + if (table[i].reset) table[i].reset(vcpu, &table[i]); - if (reg > 0 && reg < NR_SYS_REGS) - set_bit(reg, bmap); - } } /** @@ -2832,32 +2865,18 @@ int kvm_arm_copy_sys_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices) return write_demux_regids(uindices); } -static int check_sysreg_table(const struct sys_reg_desc *table, unsigned int n) -{ - unsigned int i; - - for (i = 1; i < n; i++) { - if (cmp_sys_reg(&table[i-1], &table[i]) >= 0) { - kvm_err("sys_reg table %p out of order (%d)\n", table, i - 1); - return 1; - } - } - - return 0; -} - void kvm_sys_reg_table_init(void) { unsigned int i; struct sys_reg_desc clidr; /* Make sure tables are unique and in order. */ - BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs))); - BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs))); - BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs))); - BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs))); - BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs))); - BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs))); + BUG_ON(check_sysreg_table(sys_reg_descs, ARRAY_SIZE(sys_reg_descs), false)); + BUG_ON(check_sysreg_table(cp14_regs, ARRAY_SIZE(cp14_regs), true)); + BUG_ON(check_sysreg_table(cp14_64_regs, ARRAY_SIZE(cp14_64_regs), true)); + BUG_ON(check_sysreg_table(cp15_regs, ARRAY_SIZE(cp15_regs), true)); + BUG_ON(check_sysreg_table(cp15_64_regs, ARRAY_SIZE(cp15_64_regs), true)); + BUG_ON(check_sysreg_table(invariant_sys_regs, ARRAY_SIZE(invariant_sys_regs), false)); /* We abuse the reset function to overwrite the table itself. */ for (i = 0; i < ARRAY_SIZE(invariant_sys_regs); i++) @@ -2893,17 +2912,10 @@ void kvm_reset_sys_regs(struct kvm_vcpu *vcpu) { size_t num; const struct sys_reg_desc *table; - DECLARE_BITMAP(bmap, NR_SYS_REGS) = { 0, }; /* Generic chip reset first (so target could override). */ - reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs), bmap); + reset_sys_reg_descs(vcpu, sys_reg_descs, ARRAY_SIZE(sys_reg_descs)); table = get_target_table(vcpu->arch.target, true, &num); - reset_sys_reg_descs(vcpu, table, num, bmap); - - for (num = 1; num < NR_SYS_REGS; num++) { - if (WARN(!test_bit(num, bmap), - "Didn't reset __vcpu_sys_reg(%zi)\n", num)) - break; - } + reset_sys_reg_descs(vcpu, table, num); } diff --git a/arch/arm64/kvm/trace.h b/arch/arm64/kvm/trace.h index eab91ad0effb..86f9ea47be29 100644 --- a/arch/arm64/kvm/trace.h +++ b/arch/arm64/kvm/trace.h @@ -1,216 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#if !defined(_TRACE_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#ifndef _TRACE_ARM64_KVM_H #define _TRACE_ARM64_KVM_H -#include <linux/tracepoint.h> -#include "sys_regs.h" +#include "trace_arm.h" +#include "trace_handle_exit.h" -#undef TRACE_SYSTEM -#define TRACE_SYSTEM kvm - -TRACE_EVENT(kvm_wfx_arm64, - TP_PROTO(unsigned long vcpu_pc, bool is_wfe), - TP_ARGS(vcpu_pc, is_wfe), - - TP_STRUCT__entry( - __field(unsigned long, vcpu_pc) - __field(bool, is_wfe) - ), - - TP_fast_assign( - __entry->vcpu_pc = vcpu_pc; - __entry->is_wfe = is_wfe; - ), - - TP_printk("guest executed wf%c at: 0x%08lx", - __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) -); - -TRACE_EVENT(kvm_hvc_arm64, - TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), - TP_ARGS(vcpu_pc, r0, imm), - - TP_STRUCT__entry( - __field(unsigned long, vcpu_pc) - __field(unsigned long, r0) - __field(unsigned long, imm) - ), - - TP_fast_assign( - __entry->vcpu_pc = vcpu_pc; - __entry->r0 = r0; - __entry->imm = imm; - ), - - TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)", - __entry->vcpu_pc, __entry->r0, __entry->imm) -); - -TRACE_EVENT(kvm_arm_setup_debug, - TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), - TP_ARGS(vcpu, guest_debug), - - TP_STRUCT__entry( - __field(struct kvm_vcpu *, vcpu) - __field(__u32, guest_debug) - ), - - TP_fast_assign( - __entry->vcpu = vcpu; - __entry->guest_debug = guest_debug; - ), - - TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) -); - -TRACE_EVENT(kvm_arm_clear_debug, - TP_PROTO(__u32 guest_debug), - TP_ARGS(guest_debug), - - TP_STRUCT__entry( - __field(__u32, guest_debug) - ), - - TP_fast_assign( - __entry->guest_debug = guest_debug; - ), - - TP_printk("flags: 0x%08x", __entry->guest_debug) -); - -TRACE_EVENT(kvm_arm_set_dreg32, - TP_PROTO(const char *name, __u32 value), - TP_ARGS(name, value), - - TP_STRUCT__entry( - __field(const char *, name) - __field(__u32, value) - ), - - TP_fast_assign( - __entry->name = name; - __entry->value = value; - ), - - TP_printk("%s: 0x%08x", __entry->name, __entry->value) -); - -TRACE_DEFINE_SIZEOF(__u64); - -TRACE_EVENT(kvm_arm_set_regset, - TP_PROTO(const char *type, int len, __u64 *control, __u64 *value), - TP_ARGS(type, len, control, value), - TP_STRUCT__entry( - __field(const char *, name) - __field(int, len) - __array(u64, ctrls, 16) - __array(u64, values, 16) - ), - TP_fast_assign( - __entry->name = type; - __entry->len = len; - memcpy(__entry->ctrls, control, len << 3); - memcpy(__entry->values, value, len << 3); - ), - TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name, - __print_array(__entry->ctrls, __entry->len, sizeof(__u64)), - __print_array(__entry->values, __entry->len, sizeof(__u64))) -); - -TRACE_EVENT(trap_reg, - TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value), - TP_ARGS(fn, reg, is_write, write_value), - - TP_STRUCT__entry( - __field(const char *, fn) - __field(int, reg) - __field(bool, is_write) - __field(u64, write_value) - ), - - TP_fast_assign( - __entry->fn = fn; - __entry->reg = reg; - __entry->is_write = is_write; - __entry->write_value = write_value; - ), - - TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) -); - -TRACE_EVENT(kvm_handle_sys_reg, - TP_PROTO(unsigned long hsr), - TP_ARGS(hsr), - - TP_STRUCT__entry( - __field(unsigned long, hsr) - ), - - TP_fast_assign( - __entry->hsr = hsr; - ), - - TP_printk("HSR 0x%08lx", __entry->hsr) -); - -TRACE_EVENT(kvm_sys_access, - TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg), - TP_ARGS(vcpu_pc, params, reg), - - TP_STRUCT__entry( - __field(unsigned long, vcpu_pc) - __field(bool, is_write) - __field(const char *, name) - __field(u8, Op0) - __field(u8, Op1) - __field(u8, CRn) - __field(u8, CRm) - __field(u8, Op2) - ), - - TP_fast_assign( - __entry->vcpu_pc = vcpu_pc; - __entry->is_write = params->is_write; - __entry->name = reg->name; - __entry->Op0 = reg->Op0; - __entry->Op0 = reg->Op0; - __entry->Op1 = reg->Op1; - __entry->CRn = reg->CRn; - __entry->CRm = reg->CRm; - __entry->Op2 = reg->Op2; - ), - - TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s", - __entry->vcpu_pc, __entry->name ?: "UNKN", - __entry->Op0, __entry->Op1, __entry->CRn, - __entry->CRm, __entry->Op2, - __entry->is_write ? "write" : "read") -); - -TRACE_EVENT(kvm_set_guest_debug, - TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), - TP_ARGS(vcpu, guest_debug), - - TP_STRUCT__entry( - __field(struct kvm_vcpu *, vcpu) - __field(__u32, guest_debug) - ), - - TP_fast_assign( - __entry->vcpu = vcpu; - __entry->guest_debug = guest_debug; - ), - - TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) -); - - -#endif /* _TRACE_ARM64_KVM_H */ - -#undef TRACE_INCLUDE_PATH -#define TRACE_INCLUDE_PATH . -#undef TRACE_INCLUDE_FILE -#define TRACE_INCLUDE_FILE trace - -/* This part must be outside protection */ -#include <trace/define_trace.h> +#endif /* _TRACE_ARM64_KVM_H */ diff --git a/arch/arm64/kvm/trace_arm.h b/arch/arm64/kvm/trace_arm.h new file mode 100644 index 000000000000..4c71270cc097 --- /dev/null +++ b/arch/arm64/kvm/trace_arm.h @@ -0,0 +1,378 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_TRACE_ARM_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_ARM_ARM64_KVM_H + +#include <kvm/arm_arch_timer.h> +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +/* + * Tracepoints for entry/exit to guest + */ +TRACE_EVENT(kvm_entry, + TP_PROTO(unsigned long vcpu_pc), + TP_ARGS(vcpu_pc), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("PC: 0x%08lx", __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_exit, + TP_PROTO(int ret, unsigned int esr_ec, unsigned long vcpu_pc), + TP_ARGS(ret, esr_ec, vcpu_pc), + + TP_STRUCT__entry( + __field( int, ret ) + __field( unsigned int, esr_ec ) + __field( unsigned long, vcpu_pc ) + ), + + TP_fast_assign( + __entry->ret = ARM_EXCEPTION_CODE(ret); + __entry->esr_ec = ARM_EXCEPTION_IS_TRAP(ret) ? esr_ec : 0; + __entry->vcpu_pc = vcpu_pc; + ), + + TP_printk("%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx", + __print_symbolic(__entry->ret, kvm_arm_exception_type), + __entry->esr_ec, + __print_symbolic(__entry->esr_ec, kvm_arm_exception_class), + __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_guest_fault, + TP_PROTO(unsigned long vcpu_pc, unsigned long hsr, + unsigned long hxfar, + unsigned long long ipa), + TP_ARGS(vcpu_pc, hsr, hxfar, ipa), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( unsigned long, hsr ) + __field( unsigned long, hxfar ) + __field( unsigned long long, ipa ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->hsr = hsr; + __entry->hxfar = hxfar; + __entry->ipa = ipa; + ), + + TP_printk("ipa %#llx, hsr %#08lx, hxfar %#08lx, pc %#08lx", + __entry->ipa, __entry->hsr, + __entry->hxfar, __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_access_fault, + TP_PROTO(unsigned long ipa), + TP_ARGS(ipa), + + TP_STRUCT__entry( + __field( unsigned long, ipa ) + ), + + TP_fast_assign( + __entry->ipa = ipa; + ), + + TP_printk("IPA: %lx", __entry->ipa) +); + +TRACE_EVENT(kvm_irq_line, + TP_PROTO(unsigned int type, int vcpu_idx, int irq_num, int level), + TP_ARGS(type, vcpu_idx, irq_num, level), + + TP_STRUCT__entry( + __field( unsigned int, type ) + __field( int, vcpu_idx ) + __field( int, irq_num ) + __field( int, level ) + ), + + TP_fast_assign( + __entry->type = type; + __entry->vcpu_idx = vcpu_idx; + __entry->irq_num = irq_num; + __entry->level = level; + ), + + TP_printk("Inject %s interrupt (%d), vcpu->idx: %d, num: %d, level: %d", + (__entry->type == KVM_ARM_IRQ_TYPE_CPU) ? "CPU" : + (__entry->type == KVM_ARM_IRQ_TYPE_PPI) ? "VGIC PPI" : + (__entry->type == KVM_ARM_IRQ_TYPE_SPI) ? "VGIC SPI" : "UNKNOWN", + __entry->type, __entry->vcpu_idx, __entry->irq_num, __entry->level) +); + +TRACE_EVENT(kvm_mmio_emulate, + TP_PROTO(unsigned long vcpu_pc, unsigned long instr, + unsigned long cpsr), + TP_ARGS(vcpu_pc, instr, cpsr), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( unsigned long, instr ) + __field( unsigned long, cpsr ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->instr = instr; + __entry->cpsr = cpsr; + ), + + TP_printk("Emulate MMIO at: 0x%08lx (instr: %08lx, cpsr: %08lx)", + __entry->vcpu_pc, __entry->instr, __entry->cpsr) +); + +TRACE_EVENT(kvm_unmap_hva_range, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier unmap range: %#08lx -- %#08lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_set_spte_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier set pte hva: %#08lx", __entry->hva) +); + +TRACE_EVENT(kvm_age_hva, + TP_PROTO(unsigned long start, unsigned long end), + TP_ARGS(start, end), + + TP_STRUCT__entry( + __field( unsigned long, start ) + __field( unsigned long, end ) + ), + + TP_fast_assign( + __entry->start = start; + __entry->end = end; + ), + + TP_printk("mmu notifier age hva: %#08lx -- %#08lx", + __entry->start, __entry->end) +); + +TRACE_EVENT(kvm_test_age_hva, + TP_PROTO(unsigned long hva), + TP_ARGS(hva), + + TP_STRUCT__entry( + __field( unsigned long, hva ) + ), + + TP_fast_assign( + __entry->hva = hva; + ), + + TP_printk("mmu notifier test age hva: %#08lx", __entry->hva) +); + +TRACE_EVENT(kvm_set_way_flush, + TP_PROTO(unsigned long vcpu_pc, bool cache), + TP_ARGS(vcpu_pc, cache), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( bool, cache ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->cache = cache; + ), + + TP_printk("S/W flush at 0x%016lx (cache %s)", + __entry->vcpu_pc, __entry->cache ? "on" : "off") +); + +TRACE_EVENT(kvm_toggle_cache, + TP_PROTO(unsigned long vcpu_pc, bool was, bool now), + TP_ARGS(vcpu_pc, was, now), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_pc ) + __field( bool, was ) + __field( bool, now ) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->was = was; + __entry->now = now; + ), + + TP_printk("VM op at 0x%016lx (cache was %s, now %s)", + __entry->vcpu_pc, __entry->was ? "on" : "off", + __entry->now ? "on" : "off") +); + +/* + * Tracepoints for arch_timer + */ +TRACE_EVENT(kvm_timer_update_irq, + TP_PROTO(unsigned long vcpu_id, __u32 irq, int level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( int, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +TRACE_EVENT(kvm_get_timer_map, + TP_PROTO(unsigned long vcpu_id, struct timer_map *map), + TP_ARGS(vcpu_id, map), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( int, direct_vtimer ) + __field( int, direct_ptimer ) + __field( int, emul_ptimer ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->direct_vtimer = arch_timer_ctx_index(map->direct_vtimer); + __entry->direct_ptimer = + (map->direct_ptimer) ? arch_timer_ctx_index(map->direct_ptimer) : -1; + __entry->emul_ptimer = + (map->emul_ptimer) ? arch_timer_ctx_index(map->emul_ptimer) : -1; + ), + + TP_printk("VCPU: %ld, dv: %d, dp: %d, ep: %d", + __entry->vcpu_id, + __entry->direct_vtimer, + __entry->direct_ptimer, + __entry->emul_ptimer) +); + +TRACE_EVENT(kvm_timer_save_state, + TP_PROTO(struct arch_timer_context *ctx), + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field( unsigned long, ctl ) + __field( unsigned long long, cval ) + __field( int, timer_idx ) + ), + + TP_fast_assign( + __entry->ctl = ctx->cnt_ctl; + __entry->cval = ctx->cnt_cval; + __entry->timer_idx = arch_timer_ctx_index(ctx); + ), + + TP_printk(" CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", + __entry->ctl, + __entry->cval, + __entry->timer_idx) +); + +TRACE_EVENT(kvm_timer_restore_state, + TP_PROTO(struct arch_timer_context *ctx), + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field( unsigned long, ctl ) + __field( unsigned long long, cval ) + __field( int, timer_idx ) + ), + + TP_fast_assign( + __entry->ctl = ctx->cnt_ctl; + __entry->cval = ctx->cnt_cval; + __entry->timer_idx = arch_timer_ctx_index(ctx); + ), + + TP_printk("CTL: %#08lx CVAL: %#16llx arch_timer_ctx_index: %d", + __entry->ctl, + __entry->cval, + __entry->timer_idx) +); + +TRACE_EVENT(kvm_timer_hrtimer_expire, + TP_PROTO(struct arch_timer_context *ctx), + TP_ARGS(ctx), + + TP_STRUCT__entry( + __field( int, timer_idx ) + ), + + TP_fast_assign( + __entry->timer_idx = arch_timer_ctx_index(ctx); + ), + + TP_printk("arch_timer_ctx_index: %d", __entry->timer_idx) +); + +TRACE_EVENT(kvm_timer_emulate, + TP_PROTO(struct arch_timer_context *ctx, bool should_fire), + TP_ARGS(ctx, should_fire), + + TP_STRUCT__entry( + __field( int, timer_idx ) + __field( bool, should_fire ) + ), + + TP_fast_assign( + __entry->timer_idx = arch_timer_ctx_index(ctx); + __entry->should_fire = should_fire; + ), + + TP_printk("arch_timer_ctx_index: %d (should_fire: %d)", + __entry->timer_idx, __entry->should_fire) +); + +#endif /* _TRACE_ARM_ARM64_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace_arm + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/arm64/kvm/trace_handle_exit.h b/arch/arm64/kvm/trace_handle_exit.h new file mode 100644 index 000000000000..2c56d1e0f5bd --- /dev/null +++ b/arch/arm64/kvm/trace_handle_exit.h @@ -0,0 +1,215 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_TRACE_HANDLE_EXIT_ARM64_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HANDLE_EXIT_ARM64_KVM_H + +#include <linux/tracepoint.h> +#include "sys_regs.h" + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +TRACE_EVENT(kvm_wfx_arm64, + TP_PROTO(unsigned long vcpu_pc, bool is_wfe), + TP_ARGS(vcpu_pc, is_wfe), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_pc) + __field(bool, is_wfe) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->is_wfe = is_wfe; + ), + + TP_printk("guest executed wf%c at: 0x%08lx", + __entry->is_wfe ? 'e' : 'i', __entry->vcpu_pc) +); + +TRACE_EVENT(kvm_hvc_arm64, + TP_PROTO(unsigned long vcpu_pc, unsigned long r0, unsigned long imm), + TP_ARGS(vcpu_pc, r0, imm), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_pc) + __field(unsigned long, r0) + __field(unsigned long, imm) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->r0 = r0; + __entry->imm = imm; + ), + + TP_printk("HVC at 0x%08lx (r0: 0x%08lx, imm: 0x%lx)", + __entry->vcpu_pc, __entry->r0, __entry->imm) +); + +TRACE_EVENT(kvm_arm_setup_debug, + TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), + TP_ARGS(vcpu, guest_debug), + + TP_STRUCT__entry( + __field(struct kvm_vcpu *, vcpu) + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->guest_debug = guest_debug; + ), + + TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) +); + +TRACE_EVENT(kvm_arm_clear_debug, + TP_PROTO(__u32 guest_debug), + TP_ARGS(guest_debug), + + TP_STRUCT__entry( + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->guest_debug = guest_debug; + ), + + TP_printk("flags: 0x%08x", __entry->guest_debug) +); + +TRACE_EVENT(kvm_arm_set_dreg32, + TP_PROTO(const char *name, __u32 value), + TP_ARGS(name, value), + + TP_STRUCT__entry( + __field(const char *, name) + __field(__u32, value) + ), + + TP_fast_assign( + __entry->name = name; + __entry->value = value; + ), + + TP_printk("%s: 0x%08x", __entry->name, __entry->value) +); + +TRACE_DEFINE_SIZEOF(__u64); + +TRACE_EVENT(kvm_arm_set_regset, + TP_PROTO(const char *type, int len, __u64 *control, __u64 *value), + TP_ARGS(type, len, control, value), + TP_STRUCT__entry( + __field(const char *, name) + __field(int, len) + __array(u64, ctrls, 16) + __array(u64, values, 16) + ), + TP_fast_assign( + __entry->name = type; + __entry->len = len; + memcpy(__entry->ctrls, control, len << 3); + memcpy(__entry->values, value, len << 3); + ), + TP_printk("%d %s CTRL:%s VALUE:%s", __entry->len, __entry->name, + __print_array(__entry->ctrls, __entry->len, sizeof(__u64)), + __print_array(__entry->values, __entry->len, sizeof(__u64))) +); + +TRACE_EVENT(trap_reg, + TP_PROTO(const char *fn, int reg, bool is_write, u64 write_value), + TP_ARGS(fn, reg, is_write, write_value), + + TP_STRUCT__entry( + __field(const char *, fn) + __field(int, reg) + __field(bool, is_write) + __field(u64, write_value) + ), + + TP_fast_assign( + __entry->fn = fn; + __entry->reg = reg; + __entry->is_write = is_write; + __entry->write_value = write_value; + ), + + TP_printk("%s %s reg %d (0x%08llx)", __entry->fn, __entry->is_write?"write to":"read from", __entry->reg, __entry->write_value) +); + +TRACE_EVENT(kvm_handle_sys_reg, + TP_PROTO(unsigned long hsr), + TP_ARGS(hsr), + + TP_STRUCT__entry( + __field(unsigned long, hsr) + ), + + TP_fast_assign( + __entry->hsr = hsr; + ), + + TP_printk("HSR 0x%08lx", __entry->hsr) +); + +TRACE_EVENT(kvm_sys_access, + TP_PROTO(unsigned long vcpu_pc, struct sys_reg_params *params, const struct sys_reg_desc *reg), + TP_ARGS(vcpu_pc, params, reg), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_pc) + __field(bool, is_write) + __field(const char *, name) + __field(u8, Op0) + __field(u8, Op1) + __field(u8, CRn) + __field(u8, CRm) + __field(u8, Op2) + ), + + TP_fast_assign( + __entry->vcpu_pc = vcpu_pc; + __entry->is_write = params->is_write; + __entry->name = reg->name; + __entry->Op0 = reg->Op0; + __entry->Op0 = reg->Op0; + __entry->Op1 = reg->Op1; + __entry->CRn = reg->CRn; + __entry->CRm = reg->CRm; + __entry->Op2 = reg->Op2; + ), + + TP_printk("PC: %lx %s (%d,%d,%d,%d,%d) %s", + __entry->vcpu_pc, __entry->name ?: "UNKN", + __entry->Op0, __entry->Op1, __entry->CRn, + __entry->CRm, __entry->Op2, + __entry->is_write ? "write" : "read") +); + +TRACE_EVENT(kvm_set_guest_debug, + TP_PROTO(struct kvm_vcpu *vcpu, __u32 guest_debug), + TP_ARGS(vcpu, guest_debug), + + TP_STRUCT__entry( + __field(struct kvm_vcpu *, vcpu) + __field(__u32, guest_debug) + ), + + TP_fast_assign( + __entry->vcpu = vcpu; + __entry->guest_debug = guest_debug; + ), + + TP_printk("vcpu: %p, flags: 0x%08x", __entry->vcpu, __entry->guest_debug) +); + +#endif /* _TRACE_HANDLE_EXIT_ARM64_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH . +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace_handle_exit + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index e7d1ea92095d..2f92bdcb1188 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -7,7 +7,7 @@ #include <linux/kvm.h> #include <linux/kvm_host.h> #include <asm/kvm_emulate.h> -#include "vgic.h" +#include "vgic/vgic.h" #include "sys_regs.h" static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p, diff --git a/arch/arm64/kvm/vgic/trace.h b/arch/arm64/kvm/vgic/trace.h new file mode 100644 index 000000000000..83c64401a7fc --- /dev/null +++ b/arch/arm64/kvm/vgic/trace.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#if !defined(_TRACE_VGIC_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_VGIC_H + +#include <linux/tracepoint.h> + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +TRACE_EVENT(vgic_update_irq_pending, + TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( bool, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level: %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +#endif /* _TRACE_VGIC_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../arch/arm64/kvm/vgic +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/arch/arm64/kvm/vgic/vgic-debug.c b/arch/arm64/kvm/vgic/vgic-debug.c new file mode 100644 index 000000000000..b13a9e3f99dd --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-debug.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2016 Linaro + * Author: Christoffer Dall <christoffer.dall@linaro.org> + */ + +#include <linux/cpu.h> +#include <linux/debugfs.h> +#include <linux/interrupt.h> +#include <linux/kvm_host.h> +#include <linux/seq_file.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_mmu.h> +#include "vgic.h" + +/* + * Structure to control looping through the entire vgic state. We start at + * zero for each field and move upwards. So, if dist_id is 0 we print the + * distributor info. When dist_id is 1, we have already printed it and move + * on. + * + * When vcpu_id < nr_cpus we print the vcpu info until vcpu_id == nr_cpus and + * so on. + */ +struct vgic_state_iter { + int nr_cpus; + int nr_spis; + int nr_lpis; + int dist_id; + int vcpu_id; + int intid; + int lpi_idx; + u32 *lpi_array; +}; + +static void iter_next(struct vgic_state_iter *iter) +{ + if (iter->dist_id == 0) { + iter->dist_id++; + return; + } + + iter->intid++; + if (iter->intid == VGIC_NR_PRIVATE_IRQS && + ++iter->vcpu_id < iter->nr_cpus) + iter->intid = 0; + + if (iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS)) { + if (iter->lpi_idx < iter->nr_lpis) + iter->intid = iter->lpi_array[iter->lpi_idx]; + iter->lpi_idx++; + } +} + +static void iter_init(struct kvm *kvm, struct vgic_state_iter *iter, + loff_t pos) +{ + int nr_cpus = atomic_read(&kvm->online_vcpus); + + memset(iter, 0, sizeof(*iter)); + + iter->nr_cpus = nr_cpus; + iter->nr_spis = kvm->arch.vgic.nr_spis; + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + iter->nr_lpis = vgic_copy_lpi_list(kvm, NULL, &iter->lpi_array); + if (iter->nr_lpis < 0) + iter->nr_lpis = 0; + } + + /* Fast forward to the right position if needed */ + while (pos--) + iter_next(iter); +} + +static bool end_of_vgic(struct vgic_state_iter *iter) +{ + return iter->dist_id > 0 && + iter->vcpu_id == iter->nr_cpus && + iter->intid >= (iter->nr_spis + VGIC_NR_PRIVATE_IRQS) && + iter->lpi_idx > iter->nr_lpis; +} + +static void *vgic_debug_start(struct seq_file *s, loff_t *pos) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter; + + mutex_lock(&kvm->lock); + iter = kvm->arch.vgic.iter; + if (iter) { + iter = ERR_PTR(-EBUSY); + goto out; + } + + iter = kmalloc(sizeof(*iter), GFP_KERNEL); + if (!iter) { + iter = ERR_PTR(-ENOMEM); + goto out; + } + + iter_init(kvm, iter, *pos); + kvm->arch.vgic.iter = iter; + + if (end_of_vgic(iter)) + iter = NULL; +out: + mutex_unlock(&kvm->lock); + return iter; +} + +static void *vgic_debug_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter = kvm->arch.vgic.iter; + + ++*pos; + iter_next(iter); + if (end_of_vgic(iter)) + iter = NULL; + return iter; +} + +static void vgic_debug_stop(struct seq_file *s, void *v) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter; + + /* + * If the seq file wasn't properly opened, there's nothing to clearn + * up. + */ + if (IS_ERR(v)) + return; + + mutex_lock(&kvm->lock); + iter = kvm->arch.vgic.iter; + kfree(iter->lpi_array); + kfree(iter); + kvm->arch.vgic.iter = NULL; + mutex_unlock(&kvm->lock); +} + +static void print_dist_state(struct seq_file *s, struct vgic_dist *dist) +{ + bool v3 = dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3; + + seq_printf(s, "Distributor\n"); + seq_printf(s, "===========\n"); + seq_printf(s, "vgic_model:\t%s\n", v3 ? "GICv3" : "GICv2"); + seq_printf(s, "nr_spis:\t%d\n", dist->nr_spis); + if (v3) + seq_printf(s, "nr_lpis:\t%d\n", dist->lpi_list_count); + seq_printf(s, "enabled:\t%d\n", dist->enabled); + seq_printf(s, "\n"); + + seq_printf(s, "P=pending_latch, L=line_level, A=active\n"); + seq_printf(s, "E=enabled, H=hw, C=config (level=1, edge=0)\n"); + seq_printf(s, "G=group\n"); +} + +static void print_header(struct seq_file *s, struct vgic_irq *irq, + struct kvm_vcpu *vcpu) +{ + int id = 0; + char *hdr = "SPI "; + + if (vcpu) { + hdr = "VCPU"; + id = vcpu->vcpu_id; + } + + seq_printf(s, "\n"); + seq_printf(s, "%s%2d TYP ID TGT_ID PLAEHCG HWID TARGET SRC PRI VCPU_ID\n", hdr, id); + seq_printf(s, "----------------------------------------------------------------\n"); +} + +static void print_irq_state(struct seq_file *s, struct vgic_irq *irq, + struct kvm_vcpu *vcpu) +{ + char *type; + bool pending; + + if (irq->intid < VGIC_NR_SGIS) + type = "SGI"; + else if (irq->intid < VGIC_NR_PRIVATE_IRQS) + type = "PPI"; + else if (irq->intid < VGIC_MAX_SPI) + type = "SPI"; + else + type = "LPI"; + + if (irq->intid ==0 || irq->intid == VGIC_NR_PRIVATE_IRQS) + print_header(s, irq, vcpu); + + pending = irq->pending_latch; + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + int err; + + err = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &pending); + WARN_ON_ONCE(err); + } + + seq_printf(s, " %s %4d " + " %2d " + "%d%d%d%d%d%d%d " + "%8d " + "%8x " + " %2x " + "%3d " + " %2d " + "\n", + type, irq->intid, + (irq->target_vcpu) ? irq->target_vcpu->vcpu_id : -1, + pending, + irq->line_level, + irq->active, + irq->enabled, + irq->hw, + irq->config == VGIC_CONFIG_LEVEL, + irq->group, + irq->hwintid, + irq->mpidr, + irq->source, + irq->priority, + (irq->vcpu) ? irq->vcpu->vcpu_id : -1); +} + +static int vgic_debug_show(struct seq_file *s, void *v) +{ + struct kvm *kvm = (struct kvm *)s->private; + struct vgic_state_iter *iter = (struct vgic_state_iter *)v; + struct vgic_irq *irq; + struct kvm_vcpu *vcpu = NULL; + unsigned long flags; + + if (iter->dist_id == 0) { + print_dist_state(s, &kvm->arch.vgic); + return 0; + } + + if (!kvm->arch.vgic.initialized) + return 0; + + if (iter->vcpu_id < iter->nr_cpus) + vcpu = kvm_get_vcpu(kvm, iter->vcpu_id); + + irq = vgic_get_irq(kvm, vcpu, iter->intid); + if (!irq) { + seq_printf(s, " LPI %4d freed\n", iter->intid); + return 0; + } + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + print_irq_state(s, irq, vcpu); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(kvm, irq); + return 0; +} + +static const struct seq_operations vgic_debug_seq_ops = { + .start = vgic_debug_start, + .next = vgic_debug_next, + .stop = vgic_debug_stop, + .show = vgic_debug_show +}; + +static int debug_open(struct inode *inode, struct file *file) +{ + int ret; + ret = seq_open(file, &vgic_debug_seq_ops); + if (!ret) { + struct seq_file *seq; + /* seq_open will have modified file->private_data */ + seq = file->private_data; + seq->private = inode->i_private; + } + + return ret; +}; + +static const struct file_operations vgic_debug_fops = { + .owner = THIS_MODULE, + .open = debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + +void vgic_debug_init(struct kvm *kvm) +{ + debugfs_create_file("vgic-state", 0444, kvm->debugfs_dentry, kvm, + &vgic_debug_fops); +} + +void vgic_debug_destroy(struct kvm *kvm) +{ +} diff --git a/arch/arm64/kvm/vgic/vgic-init.c b/arch/arm64/kvm/vgic/vgic-init.c new file mode 100644 index 000000000000..32e32d67a127 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-init.c @@ -0,0 +1,556 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/uaccess.h> +#include <linux/interrupt.h> +#include <linux/cpu.h> +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_emulate.h> +#include <asm/kvm_mmu.h> +#include "vgic.h" + +/* + * Initialization rules: there are multiple stages to the vgic + * initialization, both for the distributor and the CPU interfaces. The basic + * idea is that even though the VGIC is not functional or not requested from + * user space, the critical path of the run loop can still call VGIC functions + * that just won't do anything, without them having to check additional + * initialization flags to ensure they don't look at uninitialized data + * structures. + * + * Distributor: + * + * - kvm_vgic_early_init(): initialization of static data that doesn't + * depend on any sizing information or emulation type. No allocation + * is allowed there. + * + * - vgic_init(): allocation and initialization of the generic data + * structures that depend on sizing information (number of CPUs, + * number of interrupts). Also initializes the vcpu specific data + * structures. Can be executed lazily for GICv2. + * + * CPU Interface: + * + * - kvm_vgic_vcpu_init(): initialization of static data that + * doesn't depend on any sizing information or emulation type. No + * allocation is allowed there. + */ + +/* EARLY INIT */ + +/** + * kvm_vgic_early_init() - Initialize static VGIC VCPU data structures + * @kvm: The VM whose VGIC districutor should be initialized + * + * Only do initialization of static structures that don't require any + * allocation or sizing information from userspace. vgic_init() called + * kvm_vgic_dist_init() which takes care of the rest. + */ +void kvm_vgic_early_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + INIT_LIST_HEAD(&dist->lpi_list_head); + INIT_LIST_HEAD(&dist->lpi_translation_cache); + raw_spin_lock_init(&dist->lpi_list_lock); +} + +/* CREATION */ + +/** + * kvm_vgic_create: triggered by the instantiation of the VGIC device by + * user space, either through the legacy KVM_CREATE_IRQCHIP ioctl (v2 only) + * or through the generic KVM_CREATE_DEVICE API ioctl. + * irqchip_in_kernel() tells you if this function succeeded or not. + * @kvm: kvm struct pointer + * @type: KVM_DEV_TYPE_ARM_VGIC_V[23] + */ +int kvm_vgic_create(struct kvm *kvm, u32 type) +{ + int i, ret; + struct kvm_vcpu *vcpu; + + if (irqchip_in_kernel(kvm)) + return -EEXIST; + + /* + * This function is also called by the KVM_CREATE_IRQCHIP handler, + * which had no chance yet to check the availability of the GICv2 + * emulation. So check this here again. KVM_CREATE_DEVICE does + * the proper checks already. + */ + if (type == KVM_DEV_TYPE_ARM_VGIC_V2 && + !kvm_vgic_global_state.can_emulate_gicv2) + return -ENODEV; + + ret = -EBUSY; + if (!lock_all_vcpus(kvm)) + return ret; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->arch.has_run_once) + goto out_unlock; + } + ret = 0; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + kvm->arch.max_vcpus = VGIC_V2_MAX_CPUS; + else + kvm->arch.max_vcpus = VGIC_V3_MAX_CPUS; + + if (atomic_read(&kvm->online_vcpus) > kvm->arch.max_vcpus) { + ret = -E2BIG; + goto out_unlock; + } + + kvm->arch.vgic.in_kernel = true; + kvm->arch.vgic.vgic_model = type; + + kvm->arch.vgic.vgic_dist_base = VGIC_ADDR_UNDEF; + + if (type == KVM_DEV_TYPE_ARM_VGIC_V2) + kvm->arch.vgic.vgic_cpu_base = VGIC_ADDR_UNDEF; + else + INIT_LIST_HEAD(&kvm->arch.vgic.rd_regions); + +out_unlock: + unlock_all_vcpus(kvm); + return ret; +} + +/* INIT/DESTROY */ + +/** + * kvm_vgic_dist_init: initialize the dist data structures + * @kvm: kvm struct pointer + * @nr_spis: number of spis, frozen by caller + */ +static int kvm_vgic_dist_init(struct kvm *kvm, unsigned int nr_spis) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu0 = kvm_get_vcpu(kvm, 0); + int i; + + dist->spis = kcalloc(nr_spis, sizeof(struct vgic_irq), GFP_KERNEL); + if (!dist->spis) + return -ENOMEM; + + /* + * In the following code we do not take the irq struct lock since + * no other action on irq structs can happen while the VGIC is + * not initialized yet: + * If someone wants to inject an interrupt or does a MMIO access, we + * require prior initialization in case of a virtual GICv3 or trigger + * initialization when using a virtual GICv2. + */ + for (i = 0; i < nr_spis; i++) { + struct vgic_irq *irq = &dist->spis[i]; + + irq->intid = i + VGIC_NR_PRIVATE_IRQS; + INIT_LIST_HEAD(&irq->ap_list); + raw_spin_lock_init(&irq->irq_lock); + irq->vcpu = NULL; + irq->target_vcpu = vcpu0; + kref_init(&irq->refcount); + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V2: + irq->targets = 0; + irq->group = 0; + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: + irq->mpidr = 0; + irq->group = 1; + break; + default: + kfree(dist->spis); + dist->spis = NULL; + return -EINVAL; + } + } + return 0; +} + +/** + * kvm_vgic_vcpu_init() - Initialize static VGIC VCPU data + * structures and register VCPU-specific KVM iodevs + * + * @vcpu: pointer to the VCPU being created and initialized + * + * Only do initialization, but do not actually enable the + * VGIC CPU interface + */ +int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + int ret = 0; + int i; + + vgic_cpu->rd_iodev.base_addr = VGIC_ADDR_UNDEF; + + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); + raw_spin_lock_init(&vgic_cpu->ap_list_lock); + atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0); + + /* + * Enable and configure all SGIs to be edge-triggered and + * configure all PPIs as level-triggered. + */ + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + + INIT_LIST_HEAD(&irq->ap_list); + raw_spin_lock_init(&irq->irq_lock); + irq->intid = i; + irq->vcpu = NULL; + irq->target_vcpu = vcpu; + kref_init(&irq->refcount); + if (vgic_irq_is_sgi(i)) { + /* SGIs */ + irq->enabled = 1; + irq->config = VGIC_CONFIG_EDGE; + } else { + /* PPIs */ + irq->config = VGIC_CONFIG_LEVEL; + } + } + + if (!irqchip_in_kernel(vcpu->kvm)) + return 0; + + /* + * If we are creating a VCPU with a GICv3 we must also register the + * KVM io device for the redistributor that belongs to this VCPU. + */ + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + mutex_lock(&vcpu->kvm->lock); + ret = vgic_register_redist_iodev(vcpu); + mutex_unlock(&vcpu->kvm->lock); + } + return ret; +} + +static void kvm_vgic_vcpu_enable(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_enable(vcpu); + else + vgic_v3_enable(vcpu); +} + +/* + * vgic_init: allocates and initializes dist and vcpu data structures + * depending on two dimensioning parameters: + * - the number of spis + * - the number of vcpus + * The function is generally called when nr_spis has been explicitly set + * by the guest through the KVM DEVICE API. If not nr_spis is set to 256. + * vgic_initialized() returns true when this function has succeeded. + * Must be called with kvm->lock held! + */ +int vgic_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int ret = 0, i, idx; + + if (vgic_initialized(kvm)) + return 0; + + /* Are we also in the middle of creating a VCPU? */ + if (kvm->created_vcpus != atomic_read(&kvm->online_vcpus)) + return -EBUSY; + + /* freeze the number of spis */ + if (!dist->nr_spis) + dist->nr_spis = VGIC_NR_IRQS_LEGACY - VGIC_NR_PRIVATE_IRQS; + + ret = kvm_vgic_dist_init(kvm, dist->nr_spis); + if (ret) + goto out; + + /* Initialize groups on CPUs created before the VGIC type was known */ + kvm_for_each_vcpu(idx, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + struct vgic_irq *irq = &vgic_cpu->private_irqs[i]; + switch (dist->vgic_model) { + case KVM_DEV_TYPE_ARM_VGIC_V3: + irq->group = 1; + irq->mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + break; + case KVM_DEV_TYPE_ARM_VGIC_V2: + irq->group = 0; + irq->targets = 1U << idx; + break; + default: + ret = -EINVAL; + goto out; + } + } + } + + if (vgic_has_its(kvm)) + vgic_lpi_translation_cache_init(kvm); + + /* + * If we have GICv4.1 enabled, unconditionnaly request enable the + * v4 support so that we get HW-accelerated vSGIs. Otherwise, only + * enable it if we present a virtual ITS to the guest. + */ + if (vgic_supports_direct_msis(kvm)) { + ret = vgic_v4_init(kvm); + if (ret) + goto out; + } + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vgic_vcpu_enable(vcpu); + + ret = kvm_vgic_setup_default_irq_routing(kvm); + if (ret) + goto out; + + vgic_debug_init(kvm); + + dist->implementation_rev = 2; + dist->initialized = true; + +out: + return ret; +} + +static void kvm_vgic_dist_destroy(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_redist_region *rdreg, *next; + + dist->ready = false; + dist->initialized = false; + + kfree(dist->spis); + dist->spis = NULL; + dist->nr_spis = 0; + + if (kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + list_for_each_entry_safe(rdreg, next, &dist->rd_regions, list) { + list_del(&rdreg->list); + kfree(rdreg); + } + INIT_LIST_HEAD(&dist->rd_regions); + } + + if (vgic_has_its(kvm)) + vgic_lpi_translation_cache_destroy(kvm); + + if (vgic_supports_direct_msis(kvm)) + vgic_v4_teardown(kvm); +} + +void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + /* + * Retire all pending LPIs on this vcpu anyway as we're + * going to destroy it. + */ + vgic_flush_pending_lpis(vcpu); + + INIT_LIST_HEAD(&vgic_cpu->ap_list_head); +} + +/* To be called with kvm->lock held */ +static void __kvm_vgic_destroy(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int i; + + vgic_debug_destroy(kvm); + + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_vgic_vcpu_destroy(vcpu); + + kvm_vgic_dist_destroy(kvm); +} + +void kvm_vgic_destroy(struct kvm *kvm) +{ + mutex_lock(&kvm->lock); + __kvm_vgic_destroy(kvm); + mutex_unlock(&kvm->lock); +} + +/** + * vgic_lazy_init: Lazy init is only allowed if the GIC exposed to the guest + * is a GICv2. A GICv3 must be explicitly initialized by the guest using the + * KVM_DEV_ARM_VGIC_GRP_CTRL KVM_DEVICE group. + * @kvm: kvm struct pointer + */ +int vgic_lazy_init(struct kvm *kvm) +{ + int ret = 0; + + if (unlikely(!vgic_initialized(kvm))) { + /* + * We only provide the automatic initialization of the VGIC + * for the legacy case of a GICv2. Any other type must + * be explicitly initialized once setup with the respective + * KVM device call. + */ + if (kvm->arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V2) + return -EBUSY; + + mutex_lock(&kvm->lock); + ret = vgic_init(kvm); + mutex_unlock(&kvm->lock); + } + + return ret; +} + +/* RESOURCE MAPPING */ + +/** + * Map the MMIO regions depending on the VGIC model exposed to the guest + * called on the first VCPU run. + * Also map the virtual CPU interface into the VM. + * v2/v3 derivatives call vgic_init if not already done. + * vgic_ready() returns true if this function has succeeded. + * @kvm: kvm struct pointer + */ +int kvm_vgic_map_resources(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int ret = 0; + + mutex_lock(&kvm->lock); + if (!irqchip_in_kernel(kvm)) + goto out; + + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2) + ret = vgic_v2_map_resources(kvm); + else + ret = vgic_v3_map_resources(kvm); + + if (ret) + __kvm_vgic_destroy(kvm); + +out: + mutex_unlock(&kvm->lock); + return ret; +} + +/* GENERIC PROBE */ + +static int vgic_init_cpu_starting(unsigned int cpu) +{ + enable_percpu_irq(kvm_vgic_global_state.maint_irq, 0); + return 0; +} + + +static int vgic_init_cpu_dying(unsigned int cpu) +{ + disable_percpu_irq(kvm_vgic_global_state.maint_irq); + return 0; +} + +static irqreturn_t vgic_maintenance_handler(int irq, void *data) +{ + /* + * We cannot rely on the vgic maintenance interrupt to be + * delivered synchronously. This means we can only use it to + * exit the VM, and we perform the handling of EOIed + * interrupts on the exit path (see vgic_fold_lr_state). + */ + return IRQ_HANDLED; +} + +/** + * kvm_vgic_init_cpu_hardware - initialize the GIC VE hardware + * + * For a specific CPU, initialize the GIC VE hardware. + */ +void kvm_vgic_init_cpu_hardware(void) +{ + BUG_ON(preemptible()); + + /* + * We want to make sure the list registers start out clear so that we + * only have the program the used registers. + */ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_init_lrs(); + else + kvm_call_hyp(__vgic_v3_init_lrs); +} + +/** + * kvm_vgic_hyp_init: populates the kvm_vgic_global_state variable + * according to the host GIC model. Accordingly calls either + * vgic_v2/v3_probe which registers the KVM_DEVICE that can be + * instantiated by a guest later on . + */ +int kvm_vgic_hyp_init(void) +{ + const struct gic_kvm_info *gic_kvm_info; + int ret; + + gic_kvm_info = gic_get_kvm_info(); + if (!gic_kvm_info) + return -ENODEV; + + if (!gic_kvm_info->maint_irq) { + kvm_err("No vgic maintenance irq\n"); + return -ENXIO; + } + + switch (gic_kvm_info->type) { + case GIC_V2: + ret = vgic_v2_probe(gic_kvm_info); + break; + case GIC_V3: + ret = vgic_v3_probe(gic_kvm_info); + if (!ret) { + static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif); + kvm_info("GIC system register CPU interface enabled\n"); + } + break; + default: + ret = -ENODEV; + } + + if (ret) + return ret; + + kvm_vgic_global_state.maint_irq = gic_kvm_info->maint_irq; + ret = request_percpu_irq(kvm_vgic_global_state.maint_irq, + vgic_maintenance_handler, + "vgic", kvm_get_running_vcpus()); + if (ret) { + kvm_err("Cannot register interrupt %d\n", + kvm_vgic_global_state.maint_irq); + return ret; + } + + ret = cpuhp_setup_state(CPUHP_AP_KVM_ARM_VGIC_INIT_STARTING, + "kvm/arm/vgic:starting", + vgic_init_cpu_starting, vgic_init_cpu_dying); + if (ret) { + kvm_err("Cannot register vgic CPU notifier\n"); + goto out_free_irq; + } + + kvm_info("vgic interrupt IRQ%d\n", kvm_vgic_global_state.maint_irq); + return 0; + +out_free_irq: + free_percpu_irq(kvm_vgic_global_state.maint_irq, + kvm_get_running_vcpus()); + return ret; +} diff --git a/arch/arm64/kvm/vgic/vgic-irqfd.c b/arch/arm64/kvm/vgic/vgic-irqfd.c new file mode 100644 index 000000000000..d8cdfea5cc96 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-irqfd.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <trace/events/kvm.h> +#include <kvm/arm_vgic.h> +#include "vgic.h" + +/** + * vgic_irqfd_set_irq: inject the IRQ corresponding to the + * irqchip routing entry + * + * This is the entry point for irqfd IRQ injection + */ +static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS; + + if (!vgic_valid_spi(kvm, spi_id)) + return -EINVAL; + return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL); +} + +/** + * kvm_set_routing_entry: populate a kvm routing entry + * from a user routing entry + * + * @kvm: the VM this entry is applied to + * @e: kvm kernel routing entry handle + * @ue: user api routing entry handle + * return 0 on success, -EINVAL on errors. + */ +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = vgic_irqfd_set_irq; + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) || + (e->irqchip.irqchip >= KVM_NR_IRQCHIPS)) + goto out; + break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + e->msi.flags = ue->flags; + e->msi.devid = ue->u.msi.devid; + break; + default: + goto out; + } + r = 0; +out: + return r; +} + +static void kvm_populate_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm_msi *msi) +{ + msi->address_lo = e->msi.address_lo; + msi->address_hi = e->msi.address_hi; + msi->data = e->msi.data; + msi->flags = e->msi.flags; + msi->devid = e->msi.devid; +} +/** + * kvm_set_msi: inject the MSI corresponding to the + * MSI routing entry + * + * This is the entry point for irqfd MSI injection + * and userspace MSI injection. + */ +int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) +{ + struct kvm_msi msi; + + if (!vgic_has_its(kvm)) + return -ENODEV; + + if (!level) + return -1; + + kvm_populate_msi(e, &msi); + return vgic_its_inject_msi(kvm, &msi); +} + +/** + * kvm_arch_set_irq_inatomic: fast-path for irqfd injection + * + * Currently only direct MSI injection is supported. + */ +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) +{ + if (e->type == KVM_IRQ_ROUTING_MSI && vgic_has_its(kvm) && level) { + struct kvm_msi msi; + + kvm_populate_msi(e, &msi); + if (!vgic_its_inject_cached_translation(kvm, &msi)) + return 0; + } + + return -EWOULDBLOCK; +} + +int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) +{ + struct kvm_irq_routing_entry *entries; + struct vgic_dist *dist = &kvm->arch.vgic; + u32 nr = dist->nr_spis; + int i, ret; + + entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL); + if (!entries) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + entries[i].gsi = i; + entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + entries[i].u.irqchip.irqchip = 0; + entries[i].u.irqchip.pin = i; + } + ret = kvm_set_irq_routing(kvm, entries, nr, 0); + kfree(entries); + return ret; +} diff --git a/arch/arm64/kvm/vgic/vgic-its.c b/arch/arm64/kvm/vgic/vgic-its.c new file mode 100644 index 000000000000..c012a52b19f5 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-its.c @@ -0,0 +1,2783 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * GICv3 ITS emulation + * + * Copyright (C) 2015,2016 ARM Ltd. + * Author: Andre Przywara <andre.przywara@arm.com> + */ + +#include <linux/cpu.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <linux/list.h> +#include <linux/uaccess.h> +#include <linux/list_sort.h> + +#include <linux/irqchip/arm-gic-v3.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +static int vgic_its_save_tables_v0(struct vgic_its *its); +static int vgic_its_restore_tables_v0(struct vgic_its *its); +static int vgic_its_commit_v0(struct vgic_its *its); +static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, + struct kvm_vcpu *filter_vcpu, bool needs_inv); + +/* + * Creates a new (reference to a) struct vgic_irq for a given LPI. + * If this LPI is already mapped on another ITS, we increase its refcount + * and return a pointer to the existing structure. + * If this is a "new" LPI, we allocate and initialize a new struct vgic_irq. + * This function returns a pointer to the _unlocked_ structure. + */ +static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, + struct kvm_vcpu *vcpu) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; + unsigned long flags; + int ret; + + /* In this case there is no put, since we keep the reference. */ + if (irq) + return irq; + + irq = kzalloc(sizeof(struct vgic_irq), GFP_KERNEL); + if (!irq) + return ERR_PTR(-ENOMEM); + + INIT_LIST_HEAD(&irq->lpi_list); + INIT_LIST_HEAD(&irq->ap_list); + raw_spin_lock_init(&irq->irq_lock); + + irq->config = VGIC_CONFIG_EDGE; + kref_init(&irq->refcount); + irq->intid = intid; + irq->target_vcpu = vcpu; + irq->group = 1; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + /* + * There could be a race with another vgic_add_lpi(), so we need to + * check that we don't add a second list entry with the same LPI. + */ + list_for_each_entry(oldirq, &dist->lpi_list_head, lpi_list) { + if (oldirq->intid != intid) + continue; + + /* Someone was faster with adding this LPI, lets use that. */ + kfree(irq); + irq = oldirq; + + /* + * This increases the refcount, the caller is expected to + * call vgic_put_irq() on the returned pointer once it's + * finished with the IRQ. + */ + vgic_get_irq_kref(irq); + + goto out_unlock; + } + + list_add_tail(&irq->lpi_list, &dist->lpi_list_head); + dist->lpi_list_count++; + +out_unlock: + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + /* + * We "cache" the configuration table entries in our struct vgic_irq's. + * However we only have those structs for mapped IRQs, so we read in + * the respective config data from memory here upon mapping the LPI. + * + * Should any of these fail, behave as if we couldn't create the LPI + * by dropping the refcount and returning the error. + */ + ret = update_lpi_config(kvm, irq, NULL, false); + if (ret) { + vgic_put_irq(kvm, irq); + return ERR_PTR(ret); + } + + ret = vgic_v3_lpi_sync_pending_status(kvm, irq); + if (ret) { + vgic_put_irq(kvm, irq); + return ERR_PTR(ret); + } + + return irq; +} + +struct its_device { + struct list_head dev_list; + + /* the head for the list of ITTEs */ + struct list_head itt_head; + u32 num_eventid_bits; + gpa_t itt_addr; + u32 device_id; +}; + +#define COLLECTION_NOT_MAPPED ((u32)~0) + +struct its_collection { + struct list_head coll_list; + + u32 collection_id; + u32 target_addr; +}; + +#define its_is_collection_mapped(coll) ((coll) && \ + ((coll)->target_addr != COLLECTION_NOT_MAPPED)) + +struct its_ite { + struct list_head ite_list; + + struct vgic_irq *irq; + struct its_collection *collection; + u32 event_id; +}; + +struct vgic_translation_cache_entry { + struct list_head entry; + phys_addr_t db; + u32 devid; + u32 eventid; + struct vgic_irq *irq; +}; + +/** + * struct vgic_its_abi - ITS abi ops and settings + * @cte_esz: collection table entry size + * @dte_esz: device table entry size + * @ite_esz: interrupt translation table entry size + * @save tables: save the ITS tables into guest RAM + * @restore_tables: restore the ITS internal structs from tables + * stored in guest RAM + * @commit: initialize the registers which expose the ABI settings, + * especially the entry sizes + */ +struct vgic_its_abi { + int cte_esz; + int dte_esz; + int ite_esz; + int (*save_tables)(struct vgic_its *its); + int (*restore_tables)(struct vgic_its *its); + int (*commit)(struct vgic_its *its); +}; + +#define ABI_0_ESZ 8 +#define ESZ_MAX ABI_0_ESZ + +static const struct vgic_its_abi its_table_abi_versions[] = { + [0] = { + .cte_esz = ABI_0_ESZ, + .dte_esz = ABI_0_ESZ, + .ite_esz = ABI_0_ESZ, + .save_tables = vgic_its_save_tables_v0, + .restore_tables = vgic_its_restore_tables_v0, + .commit = vgic_its_commit_v0, + }, +}; + +#define NR_ITS_ABIS ARRAY_SIZE(its_table_abi_versions) + +inline const struct vgic_its_abi *vgic_its_get_abi(struct vgic_its *its) +{ + return &its_table_abi_versions[its->abi_rev]; +} + +static int vgic_its_set_abi(struct vgic_its *its, u32 rev) +{ + const struct vgic_its_abi *abi; + + its->abi_rev = rev; + abi = vgic_its_get_abi(its); + return abi->commit(its); +} + +/* + * Find and returns a device in the device table for an ITS. + * Must be called with the its_lock mutex held. + */ +static struct its_device *find_its_device(struct vgic_its *its, u32 device_id) +{ + struct its_device *device; + + list_for_each_entry(device, &its->device_list, dev_list) + if (device_id == device->device_id) + return device; + + return NULL; +} + +/* + * Find and returns an interrupt translation table entry (ITTE) for a given + * Device ID/Event ID pair on an ITS. + * Must be called with the its_lock mutex held. + */ +static struct its_ite *find_ite(struct vgic_its *its, u32 device_id, + u32 event_id) +{ + struct its_device *device; + struct its_ite *ite; + + device = find_its_device(its, device_id); + if (device == NULL) + return NULL; + + list_for_each_entry(ite, &device->itt_head, ite_list) + if (ite->event_id == event_id) + return ite; + + return NULL; +} + +/* To be used as an iterator this macro misses the enclosing parentheses */ +#define for_each_lpi_its(dev, ite, its) \ + list_for_each_entry(dev, &(its)->device_list, dev_list) \ + list_for_each_entry(ite, &(dev)->itt_head, ite_list) + +#define GIC_LPI_OFFSET 8192 + +#define VITS_TYPER_IDBITS 16 +#define VITS_TYPER_DEVBITS 16 +#define VITS_DTE_MAX_DEVID_OFFSET (BIT(14) - 1) +#define VITS_ITE_MAX_EVENTID_OFFSET (BIT(16) - 1) + +/* + * Finds and returns a collection in the ITS collection table. + * Must be called with the its_lock mutex held. + */ +static struct its_collection *find_collection(struct vgic_its *its, int coll_id) +{ + struct its_collection *collection; + + list_for_each_entry(collection, &its->collection_list, coll_list) { + if (coll_id == collection->collection_id) + return collection; + } + + return NULL; +} + +#define LPI_PROP_ENABLE_BIT(p) ((p) & LPI_PROP_ENABLED) +#define LPI_PROP_PRIORITY(p) ((p) & 0xfc) + +/* + * Reads the configuration data for a given LPI from guest memory and + * updates the fields in struct vgic_irq. + * If filter_vcpu is not NULL, applies only if the IRQ is targeting this + * VCPU. Unconditionally applies if filter_vcpu is NULL. + */ +static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, + struct kvm_vcpu *filter_vcpu, bool needs_inv) +{ + u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser); + u8 prop; + int ret; + unsigned long flags; + + ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET, + &prop, 1); + + if (ret) + return ret; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (!filter_vcpu || filter_vcpu == irq->target_vcpu) { + irq->priority = LPI_PROP_PRIORITY(prop); + irq->enabled = LPI_PROP_ENABLE_BIT(prop); + + if (!irq->hw) { + vgic_queue_irq_unlock(kvm, irq, flags); + return 0; + } + } + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + if (irq->hw) + return its_prop_update_vlpi(irq->host_irq, prop, needs_inv); + + return 0; +} + +/* + * Create a snapshot of the current LPIs targeting @vcpu, so that we can + * enumerate those LPIs without holding any lock. + * Returns their number and puts the kmalloc'ed array into intid_ptr. + */ +int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + unsigned long flags; + u32 *intids; + int irq_count, i = 0; + + /* + * There is an obvious race between allocating the array and LPIs + * being mapped/unmapped. If we ended up here as a result of a + * command, we're safe (locks are held, preventing another + * command). If coming from another path (such as enabling LPIs), + * we must be careful not to overrun the array. + */ + irq_count = READ_ONCE(dist->lpi_list_count); + intids = kmalloc_array(irq_count, sizeof(intids[0]), GFP_KERNEL); + if (!intids) + return -ENOMEM; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + if (i == irq_count) + break; + /* We don't need to "get" the IRQ, as we hold the list lock. */ + if (vcpu && irq->target_vcpu != vcpu) + continue; + intids[i++] = irq->intid; + } + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + *intid_ptr = intids; + return i; +} + +static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) +{ + int ret = 0; + unsigned long flags; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->target_vcpu = vcpu; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + if (irq->hw) { + struct its_vlpi_map map; + + ret = its_get_vlpi(irq->host_irq, &map); + if (ret) + return ret; + + if (map.vpe) + atomic_dec(&map.vpe->vlpi_count); + map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + atomic_inc(&map.vpe->vlpi_count); + + ret = its_map_vlpi(irq->host_irq, &map); + } + + return ret; +} + +/* + * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI + * is targeting) to the VGIC's view, which deals with target VCPUs. + * Needs to be called whenever either the collection for a LPIs has + * changed or the collection itself got retargeted. + */ +static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite) +{ + struct kvm_vcpu *vcpu; + + if (!its_is_collection_mapped(ite->collection)) + return; + + vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + update_affinity(ite->irq, vcpu); +} + +/* + * Updates the target VCPU for every LPI targeting this collection. + * Must be called with the its_lock mutex held. + */ +static void update_affinity_collection(struct kvm *kvm, struct vgic_its *its, + struct its_collection *coll) +{ + struct its_device *device; + struct its_ite *ite; + + for_each_lpi_its(device, ite, its) { + if (!ite->collection || coll != ite->collection) + continue; + + update_affinity_ite(kvm, ite); + } +} + +static u32 max_lpis_propbaser(u64 propbaser) +{ + int nr_idbits = (propbaser & 0x1f) + 1; + + return 1U << min(nr_idbits, INTERRUPT_ID_BITS_ITS); +} + +/* + * Sync the pending table pending bit of LPIs targeting @vcpu + * with our own data structures. This relies on the LPI being + * mapped before. + */ +static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) +{ + gpa_t pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + struct vgic_irq *irq; + int last_byte_offset = -1; + int ret = 0; + u32 *intids; + int nr_irqs, i; + unsigned long flags; + u8 pendmask; + + nr_irqs = vgic_copy_lpi_list(vcpu->kvm, vcpu, &intids); + if (nr_irqs < 0) + return nr_irqs; + + for (i = 0; i < nr_irqs; i++) { + int byte_offset, bit_nr; + + byte_offset = intids[i] / BITS_PER_BYTE; + bit_nr = intids[i] % BITS_PER_BYTE; + + /* + * For contiguously allocated LPIs chances are we just read + * this very same byte in the last iteration. Reuse that. + */ + if (byte_offset != last_byte_offset) { + ret = kvm_read_guest_lock(vcpu->kvm, + pendbase + byte_offset, + &pendmask, 1); + if (ret) { + kfree(intids); + return ret; + } + last_byte_offset = byte_offset; + } + + irq = vgic_get_irq(vcpu->kvm, NULL, intids[i]); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = pendmask & (1U << bit_nr); + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + vgic_put_irq(vcpu->kvm, irq); + } + + kfree(intids); + + return ret; +} + +static unsigned long vgic_mmio_read_its_typer(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 reg = GITS_TYPER_PLPIS; + + /* + * We use linear CPU numbers for redistributor addressing, + * so GITS_TYPER.PTA is 0. + * Also we force all PROPBASER registers to be the same, so + * CommonLPIAff is 0 as well. + * To avoid memory waste in the guest, we keep the number of IDBits and + * DevBits low - as least for the time being. + */ + reg |= GIC_ENCODE_SZ(VITS_TYPER_DEVBITS, 5) << GITS_TYPER_DEVBITS_SHIFT; + reg |= GIC_ENCODE_SZ(VITS_TYPER_IDBITS, 5) << GITS_TYPER_IDBITS_SHIFT; + reg |= GIC_ENCODE_SZ(abi->ite_esz, 4) << GITS_TYPER_ITT_ENTRY_SIZE_SHIFT; + + return extract_bytes(reg, addr & 7, len); +} + +static unsigned long vgic_mmio_read_its_iidr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 val; + + val = (its->abi_rev << GITS_IIDR_REV_SHIFT) & GITS_IIDR_REV_MASK; + val |= (PRODUCT_ID_KVM << GITS_IIDR_PRODUCTID_SHIFT) | IMPLEMENTER_ARM; + return val; +} + +static int vgic_mmio_uaccess_write_its_iidr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 rev = GITS_IIDR_REV(val); + + if (rev >= NR_ITS_ABIS) + return -EINVAL; + return vgic_its_set_abi(its, rev); +} + +static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + switch (addr & 0xffff) { + case GITS_PIDR0: + return 0x92; /* part number, bits[7:0] */ + case GITS_PIDR1: + return 0xb4; /* part number, bits[11:8] */ + case GITS_PIDR2: + return GIC_PIDR2_ARCH_GICv3 | 0x0b; + case GITS_PIDR4: + return 0x40; /* This is a 64K software visible page */ + /* The following are the ID registers for (any) GIC. */ + case GITS_CIDR0: + return 0x0d; + case GITS_CIDR1: + return 0xf0; + case GITS_CIDR2: + return 0x05; + case GITS_CIDR3: + return 0xb1; + } + + return 0; +} + +static struct vgic_irq *__vgic_its_check_cache(struct vgic_dist *dist, + phys_addr_t db, + u32 devid, u32 eventid) +{ + struct vgic_translation_cache_entry *cte; + + list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { + /* + * If we hit a NULL entry, there is nothing after this + * point. + */ + if (!cte->irq) + break; + + if (cte->db != db || cte->devid != devid || + cte->eventid != eventid) + continue; + + /* + * Move this entry to the head, as it is the most + * recently used. + */ + if (!list_is_first(&cte->entry, &dist->lpi_translation_cache)) + list_move(&cte->entry, &dist->lpi_translation_cache); + + return cte->irq; + } + + return NULL; +} + +static struct vgic_irq *vgic_its_check_cache(struct kvm *kvm, phys_addr_t db, + u32 devid, u32 eventid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + unsigned long flags; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + irq = __vgic_its_check_cache(dist, db, devid, eventid); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + return irq; +} + +static void vgic_its_cache_translation(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, + struct vgic_irq *irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte; + unsigned long flags; + phys_addr_t db; + + /* Do not cache a directly injected interrupt */ + if (irq->hw) + return; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + if (unlikely(list_empty(&dist->lpi_translation_cache))) + goto out; + + /* + * We could have raced with another CPU caching the same + * translation behind our back, so let's check it is not in + * already + */ + db = its->vgic_its_base + GITS_TRANSLATER; + if (__vgic_its_check_cache(dist, db, devid, eventid)) + goto out; + + /* Always reuse the last entry (LRU policy) */ + cte = list_last_entry(&dist->lpi_translation_cache, + typeof(*cte), entry); + + /* + * Caching the translation implies having an extra reference + * to the interrupt, so drop the potential reference on what + * was in the cache, and increment it on the new interrupt. + */ + if (cte->irq) + __vgic_put_lpi_locked(kvm, cte->irq); + + vgic_get_irq_kref(irq); + + cte->db = db; + cte->devid = devid; + cte->eventid = eventid; + cte->irq = irq; + + /* Move the new translation to the head of the list */ + list_move(&cte->entry, &dist->lpi_translation_cache); + +out: + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); +} + +void vgic_its_invalidate_cache(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte; + unsigned long flags; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + list_for_each_entry(cte, &dist->lpi_translation_cache, entry) { + /* + * If we hit a NULL entry, there is nothing after this + * point. + */ + if (!cte->irq) + break; + + __vgic_put_lpi_locked(kvm, cte->irq); + cte->irq = NULL; + } + + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); +} + +int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, struct vgic_irq **irq) +{ + struct kvm_vcpu *vcpu; + struct its_ite *ite; + + if (!its->enabled) + return -EBUSY; + + ite = find_ite(its, devid, eventid); + if (!ite || !its_is_collection_mapped(ite->collection)) + return E_ITS_INT_UNMAPPED_INTERRUPT; + + vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr); + if (!vcpu) + return E_ITS_INT_UNMAPPED_INTERRUPT; + + if (!vcpu->arch.vgic_cpu.lpis_enabled) + return -EBUSY; + + vgic_its_cache_translation(kvm, its, devid, eventid, ite->irq); + + *irq = ite->irq; + return 0; +} + +struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi) +{ + u64 address; + struct kvm_io_device *kvm_io_dev; + struct vgic_io_device *iodev; + + if (!vgic_has_its(kvm)) + return ERR_PTR(-ENODEV); + + if (!(msi->flags & KVM_MSI_VALID_DEVID)) + return ERR_PTR(-EINVAL); + + address = (u64)msi->address_hi << 32 | msi->address_lo; + + kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address); + if (!kvm_io_dev) + return ERR_PTR(-EINVAL); + + if (kvm_io_dev->ops != &kvm_io_gic_ops) + return ERR_PTR(-EINVAL); + + iodev = container_of(kvm_io_dev, struct vgic_io_device, dev); + if (iodev->iodev_type != IODEV_ITS) + return ERR_PTR(-EINVAL); + + return iodev->its; +} + +/* + * Find the target VCPU and the LPI number for a given devid/eventid pair + * and make this IRQ pending, possibly injecting it. + * Must be called with the its_lock mutex held. + * Returns 0 on success, a positive error value for any ITS mapping + * related errors and negative error values for generic errors. + */ +static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid) +{ + struct vgic_irq *irq = NULL; + unsigned long flags; + int err; + + err = vgic_its_resolve_lpi(kvm, its, devid, eventid, &irq); + if (err) + return err; + + if (irq->hw) + return irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, true); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + vgic_queue_irq_unlock(kvm, irq, flags); + + return 0; +} + +int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi) +{ + struct vgic_irq *irq; + unsigned long flags; + phys_addr_t db; + + db = (u64)msi->address_hi << 32 | msi->address_lo; + irq = vgic_its_check_cache(kvm, db, msi->devid, msi->data); + + if (!irq) + return -1; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + vgic_queue_irq_unlock(kvm, irq, flags); + + return 0; +} + +/* + * Queries the KVM IO bus framework to get the ITS pointer from the given + * doorbell address. + * We then call vgic_its_trigger_msi() with the decoded data. + * According to the KVM_SIGNAL_MSI API description returns 1 on success. + */ +int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi) +{ + struct vgic_its *its; + int ret; + + if (!vgic_its_inject_cached_translation(kvm, msi)) + return 1; + + its = vgic_msi_to_its(kvm, msi); + if (IS_ERR(its)) + return PTR_ERR(its); + + mutex_lock(&its->its_lock); + ret = vgic_its_trigger_msi(kvm, its, msi->devid, msi->data); + mutex_unlock(&its->its_lock); + + if (ret < 0) + return ret; + + /* + * KVM_SIGNAL_MSI demands a return value > 0 for success and 0 + * if the guest has blocked the MSI. So we map any LPI mapping + * related error to that. + */ + if (ret) + return 0; + else + return 1; +} + +/* Requires the its_lock to be held. */ +static void its_free_ite(struct kvm *kvm, struct its_ite *ite) +{ + list_del(&ite->ite_list); + + /* This put matches the get in vgic_add_lpi. */ + if (ite->irq) { + if (ite->irq->hw) + WARN_ON(its_unmap_vlpi(ite->irq->host_irq)); + + vgic_put_irq(kvm, ite->irq); + } + + kfree(ite); +} + +static u64 its_cmd_mask_field(u64 *its_cmd, int word, int shift, int size) +{ + return (le64_to_cpu(its_cmd[word]) >> shift) & (BIT_ULL(size) - 1); +} + +#define its_cmd_get_command(cmd) its_cmd_mask_field(cmd, 0, 0, 8) +#define its_cmd_get_deviceid(cmd) its_cmd_mask_field(cmd, 0, 32, 32) +#define its_cmd_get_size(cmd) (its_cmd_mask_field(cmd, 1, 0, 5) + 1) +#define its_cmd_get_id(cmd) its_cmd_mask_field(cmd, 1, 0, 32) +#define its_cmd_get_physical_id(cmd) its_cmd_mask_field(cmd, 1, 32, 32) +#define its_cmd_get_collection(cmd) its_cmd_mask_field(cmd, 2, 0, 16) +#define its_cmd_get_ittaddr(cmd) (its_cmd_mask_field(cmd, 2, 8, 44) << 8) +#define its_cmd_get_target_addr(cmd) its_cmd_mask_field(cmd, 2, 16, 32) +#define its_cmd_get_validbit(cmd) its_cmd_mask_field(cmd, 2, 63, 1) + +/* + * The DISCARD command frees an Interrupt Translation Table Entry (ITTE). + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_discard(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_ite *ite; + + ite = find_ite(its, device_id, event_id); + if (ite && its_is_collection_mapped(ite->collection)) { + /* + * Though the spec talks about removing the pending state, we + * don't bother here since we clear the ITTE anyway and the + * pending state is a property of the ITTE struct. + */ + vgic_its_invalidate_cache(kvm); + + its_free_ite(kvm, ite); + return 0; + } + + return E_ITS_DISCARD_UNMAPPED_INTERRUPT; +} + +/* + * The MOVI command moves an ITTE to a different collection. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + u32 coll_id = its_cmd_get_collection(its_cmd); + struct kvm_vcpu *vcpu; + struct its_ite *ite; + struct its_collection *collection; + + ite = find_ite(its, device_id, event_id); + if (!ite) + return E_ITS_MOVI_UNMAPPED_INTERRUPT; + + if (!its_is_collection_mapped(ite->collection)) + return E_ITS_MOVI_UNMAPPED_COLLECTION; + + collection = find_collection(its, coll_id); + if (!its_is_collection_mapped(collection)) + return E_ITS_MOVI_UNMAPPED_COLLECTION; + + ite->collection = collection; + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + vgic_its_invalidate_cache(kvm); + + return update_affinity(ite->irq, vcpu); +} + +/* + * Check whether an ID can be stored into the corresponding guest table. + * For a direct table this is pretty easy, but gets a bit nasty for + * indirect tables. We check whether the resulting guest physical address + * is actually valid (covered by a memslot and guest accessible). + * For this we have to read the respective first level entry. + */ +static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, + gpa_t *eaddr) +{ + int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + u64 indirect_ptr, type = GITS_BASER_TYPE(baser); + phys_addr_t base = GITS_BASER_ADDR_48_to_52(baser); + int esz = GITS_BASER_ENTRY_SIZE(baser); + int index, idx; + gfn_t gfn; + bool ret; + + switch (type) { + case GITS_BASER_TYPE_DEVICE: + if (id >= BIT_ULL(VITS_TYPER_DEVBITS)) + return false; + break; + case GITS_BASER_TYPE_COLLECTION: + /* as GITS_TYPER.CIL == 0, ITS supports 16-bit collection ID */ + if (id >= BIT_ULL(16)) + return false; + break; + default: + return false; + } + + if (!(baser & GITS_BASER_INDIRECT)) { + phys_addr_t addr; + + if (id >= (l1_tbl_size / esz)) + return false; + + addr = base + id * esz; + gfn = addr >> PAGE_SHIFT; + + if (eaddr) + *eaddr = addr; + + goto out; + } + + /* calculate and check the index into the 1st level */ + index = id / (SZ_64K / esz); + if (index >= (l1_tbl_size / sizeof(u64))) + return false; + + /* Each 1st level entry is represented by a 64-bit value. */ + if (kvm_read_guest_lock(its->dev->kvm, + base + index * sizeof(indirect_ptr), + &indirect_ptr, sizeof(indirect_ptr))) + return false; + + indirect_ptr = le64_to_cpu(indirect_ptr); + + /* check the valid bit of the first level entry */ + if (!(indirect_ptr & BIT_ULL(63))) + return false; + + /* Mask the guest physical address and calculate the frame number. */ + indirect_ptr &= GENMASK_ULL(51, 16); + + /* Find the address of the actual entry */ + index = id % (SZ_64K / esz); + indirect_ptr += index * esz; + gfn = indirect_ptr >> PAGE_SHIFT; + + if (eaddr) + *eaddr = indirect_ptr; + +out: + idx = srcu_read_lock(&its->dev->kvm->srcu); + ret = kvm_is_visible_gfn(its->dev->kvm, gfn); + srcu_read_unlock(&its->dev->kvm->srcu, idx); + return ret; +} + +static int vgic_its_alloc_collection(struct vgic_its *its, + struct its_collection **colp, + u32 coll_id) +{ + struct its_collection *collection; + + if (!vgic_its_check_id(its, its->baser_coll_table, coll_id, NULL)) + return E_ITS_MAPC_COLLECTION_OOR; + + collection = kzalloc(sizeof(*collection), GFP_KERNEL); + if (!collection) + return -ENOMEM; + + collection->collection_id = coll_id; + collection->target_addr = COLLECTION_NOT_MAPPED; + + list_add_tail(&collection->coll_list, &its->collection_list); + *colp = collection; + + return 0; +} + +static void vgic_its_free_collection(struct vgic_its *its, u32 coll_id) +{ + struct its_collection *collection; + struct its_device *device; + struct its_ite *ite; + + /* + * Clearing the mapping for that collection ID removes the + * entry from the list. If there wasn't any before, we can + * go home early. + */ + collection = find_collection(its, coll_id); + if (!collection) + return; + + for_each_lpi_its(device, ite, its) + if (ite->collection && + ite->collection->collection_id == coll_id) + ite->collection = NULL; + + list_del(&collection->coll_list); + kfree(collection); +} + +/* Must be called with its_lock mutex held */ +static struct its_ite *vgic_its_alloc_ite(struct its_device *device, + struct its_collection *collection, + u32 event_id) +{ + struct its_ite *ite; + + ite = kzalloc(sizeof(*ite), GFP_KERNEL); + if (!ite) + return ERR_PTR(-ENOMEM); + + ite->event_id = event_id; + ite->collection = collection; + + list_add_tail(&ite->ite_list, &device->itt_head); + return ite; +} + +/* + * The MAPTI and MAPI commands map LPIs to ITTEs. + * Must be called with its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapi(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + u32 coll_id = its_cmd_get_collection(its_cmd); + struct its_ite *ite; + struct kvm_vcpu *vcpu = NULL; + struct its_device *device; + struct its_collection *collection, *new_coll = NULL; + struct vgic_irq *irq; + int lpi_nr; + + device = find_its_device(its, device_id); + if (!device) + return E_ITS_MAPTI_UNMAPPED_DEVICE; + + if (event_id >= BIT_ULL(device->num_eventid_bits)) + return E_ITS_MAPTI_ID_OOR; + + if (its_cmd_get_command(its_cmd) == GITS_CMD_MAPTI) + lpi_nr = its_cmd_get_physical_id(its_cmd); + else + lpi_nr = event_id; + if (lpi_nr < GIC_LPI_OFFSET || + lpi_nr >= max_lpis_propbaser(kvm->arch.vgic.propbaser)) + return E_ITS_MAPTI_PHYSICALID_OOR; + + /* If there is an existing mapping, behavior is UNPREDICTABLE. */ + if (find_ite(its, device_id, event_id)) + return 0; + + collection = find_collection(its, coll_id); + if (!collection) { + int ret = vgic_its_alloc_collection(its, &collection, coll_id); + if (ret) + return ret; + new_coll = collection; + } + + ite = vgic_its_alloc_ite(device, collection, event_id); + if (IS_ERR(ite)) { + if (new_coll) + vgic_its_free_collection(its, coll_id); + return PTR_ERR(ite); + } + + if (its_is_collection_mapped(collection)) + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + irq = vgic_add_lpi(kvm, lpi_nr, vcpu); + if (IS_ERR(irq)) { + if (new_coll) + vgic_its_free_collection(its, coll_id); + its_free_ite(kvm, ite); + return PTR_ERR(irq); + } + ite->irq = irq; + + return 0; +} + +/* Requires the its_lock to be held. */ +static void vgic_its_free_device(struct kvm *kvm, struct its_device *device) +{ + struct its_ite *ite, *temp; + + /* + * The spec says that unmapping a device with still valid + * ITTEs associated is UNPREDICTABLE. We remove all ITTEs, + * since we cannot leave the memory unreferenced. + */ + list_for_each_entry_safe(ite, temp, &device->itt_head, ite_list) + its_free_ite(kvm, ite); + + vgic_its_invalidate_cache(kvm); + + list_del(&device->dev_list); + kfree(device); +} + +/* its lock must be held */ +static void vgic_its_free_device_list(struct kvm *kvm, struct vgic_its *its) +{ + struct its_device *cur, *temp; + + list_for_each_entry_safe(cur, temp, &its->device_list, dev_list) + vgic_its_free_device(kvm, cur); +} + +/* its lock must be held */ +static void vgic_its_free_collection_list(struct kvm *kvm, struct vgic_its *its) +{ + struct its_collection *cur, *temp; + + list_for_each_entry_safe(cur, temp, &its->collection_list, coll_list) + vgic_its_free_collection(its, cur->collection_id); +} + +/* Must be called with its_lock mutex held */ +static struct its_device *vgic_its_alloc_device(struct vgic_its *its, + u32 device_id, gpa_t itt_addr, + u8 num_eventid_bits) +{ + struct its_device *device; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) + return ERR_PTR(-ENOMEM); + + device->device_id = device_id; + device->itt_addr = itt_addr; + device->num_eventid_bits = num_eventid_bits; + INIT_LIST_HEAD(&device->itt_head); + + list_add_tail(&device->dev_list, &its->device_list); + return device; +} + +/* + * MAPD maps or unmaps a device ID to Interrupt Translation Tables (ITTs). + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapd(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + bool valid = its_cmd_get_validbit(its_cmd); + u8 num_eventid_bits = its_cmd_get_size(its_cmd); + gpa_t itt_addr = its_cmd_get_ittaddr(its_cmd); + struct its_device *device; + + if (!vgic_its_check_id(its, its->baser_device_table, device_id, NULL)) + return E_ITS_MAPD_DEVICE_OOR; + + if (valid && num_eventid_bits > VITS_TYPER_IDBITS) + return E_ITS_MAPD_ITTSIZE_OOR; + + device = find_its_device(its, device_id); + + /* + * The spec says that calling MAPD on an already mapped device + * invalidates all cached data for this device. We implement this + * by removing the mapping and re-establishing it. + */ + if (device) + vgic_its_free_device(kvm, device); + + /* + * The spec does not say whether unmapping a not-mapped device + * is an error, so we are done in any case. + */ + if (!valid) + return 0; + + device = vgic_its_alloc_device(its, device_id, itt_addr, + num_eventid_bits); + + return PTR_ERR_OR_ZERO(device); +} + +/* + * The MAPC command maps collection IDs to redistributors. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_mapc(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u16 coll_id; + u32 target_addr; + struct its_collection *collection; + bool valid; + + valid = its_cmd_get_validbit(its_cmd); + coll_id = its_cmd_get_collection(its_cmd); + target_addr = its_cmd_get_target_addr(its_cmd); + + if (target_addr >= atomic_read(&kvm->online_vcpus)) + return E_ITS_MAPC_PROCNUM_OOR; + + if (!valid) { + vgic_its_free_collection(its, coll_id); + vgic_its_invalidate_cache(kvm); + } else { + collection = find_collection(its, coll_id); + + if (!collection) { + int ret; + + ret = vgic_its_alloc_collection(its, &collection, + coll_id); + if (ret) + return ret; + collection->target_addr = target_addr; + } else { + collection->target_addr = target_addr; + update_affinity_collection(kvm, its, collection); + } + } + + return 0; +} + +/* + * The CLEAR command removes the pending state for a particular LPI. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_ite *ite; + + + ite = find_ite(its, device_id, event_id); + if (!ite) + return E_ITS_CLEAR_UNMAPPED_INTERRUPT; + + ite->irq->pending_latch = false; + + if (ite->irq->hw) + return irq_set_irqchip_state(ite->irq->host_irq, + IRQCHIP_STATE_PENDING, false); + + return 0; +} + +/* + * The INV command syncs the configuration bits from the memory table. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 device_id = its_cmd_get_deviceid(its_cmd); + u32 event_id = its_cmd_get_id(its_cmd); + struct its_ite *ite; + + + ite = find_ite(its, device_id, event_id); + if (!ite) + return E_ITS_INV_UNMAPPED_INTERRUPT; + + return update_lpi_config(kvm, ite->irq, NULL, true); +} + +/* + * The INVALL command requests flushing of all IRQ data in this collection. + * Find the VCPU mapped to that collection, then iterate over the VM's list + * of mapped LPIs and update the configuration for each IRQ which targets + * the specified vcpu. The configuration will be read from the in-memory + * configuration table. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 coll_id = its_cmd_get_collection(its_cmd); + struct its_collection *collection; + struct kvm_vcpu *vcpu; + struct vgic_irq *irq; + u32 *intids; + int irq_count, i; + + collection = find_collection(its, coll_id); + if (!its_is_collection_mapped(collection)) + return E_ITS_INVALL_UNMAPPED_COLLECTION; + + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + irq_count = vgic_copy_lpi_list(kvm, vcpu, &intids); + if (irq_count < 0) + return irq_count; + + for (i = 0; i < irq_count; i++) { + irq = vgic_get_irq(kvm, NULL, intids[i]); + if (!irq) + continue; + update_lpi_config(kvm, irq, vcpu, false); + vgic_put_irq(kvm, irq); + } + + kfree(intids); + + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm) + its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe); + + return 0; +} + +/* + * The MOVALL command moves the pending state of all IRQs targeting one + * redistributor to another. We don't hold the pending state in the VCPUs, + * but in the IRQs instead, so there is really not much to do for us here. + * However the spec says that no IRQ must target the old redistributor + * afterwards, so we make sure that no LPI is using the associated target_vcpu. + * This command affects all LPIs in the system that target that redistributor. + */ +static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 target1_addr = its_cmd_get_target_addr(its_cmd); + u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32); + struct kvm_vcpu *vcpu1, *vcpu2; + struct vgic_irq *irq; + u32 *intids; + int irq_count, i; + + if (target1_addr >= atomic_read(&kvm->online_vcpus) || + target2_addr >= atomic_read(&kvm->online_vcpus)) + return E_ITS_MOVALL_PROCNUM_OOR; + + if (target1_addr == target2_addr) + return 0; + + vcpu1 = kvm_get_vcpu(kvm, target1_addr); + vcpu2 = kvm_get_vcpu(kvm, target2_addr); + + irq_count = vgic_copy_lpi_list(kvm, vcpu1, &intids); + if (irq_count < 0) + return irq_count; + + for (i = 0; i < irq_count; i++) { + irq = vgic_get_irq(kvm, NULL, intids[i]); + + update_affinity(irq, vcpu2); + + vgic_put_irq(kvm, irq); + } + + vgic_its_invalidate_cache(kvm); + + kfree(intids); + return 0; +} + +/* + * The INT command injects the LPI associated with that DevID/EvID pair. + * Must be called with the its_lock mutex held. + */ +static int vgic_its_cmd_handle_int(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + u32 msi_data = its_cmd_get_id(its_cmd); + u64 msi_devid = its_cmd_get_deviceid(its_cmd); + + return vgic_its_trigger_msi(kvm, its, msi_devid, msi_data); +} + +/* + * This function is called with the its_cmd lock held, but the ITS data + * structure lock dropped. + */ +static int vgic_its_handle_command(struct kvm *kvm, struct vgic_its *its, + u64 *its_cmd) +{ + int ret = -ENODEV; + + mutex_lock(&its->its_lock); + switch (its_cmd_get_command(its_cmd)) { + case GITS_CMD_MAPD: + ret = vgic_its_cmd_handle_mapd(kvm, its, its_cmd); + break; + case GITS_CMD_MAPC: + ret = vgic_its_cmd_handle_mapc(kvm, its, its_cmd); + break; + case GITS_CMD_MAPI: + ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); + break; + case GITS_CMD_MAPTI: + ret = vgic_its_cmd_handle_mapi(kvm, its, its_cmd); + break; + case GITS_CMD_MOVI: + ret = vgic_its_cmd_handle_movi(kvm, its, its_cmd); + break; + case GITS_CMD_DISCARD: + ret = vgic_its_cmd_handle_discard(kvm, its, its_cmd); + break; + case GITS_CMD_CLEAR: + ret = vgic_its_cmd_handle_clear(kvm, its, its_cmd); + break; + case GITS_CMD_MOVALL: + ret = vgic_its_cmd_handle_movall(kvm, its, its_cmd); + break; + case GITS_CMD_INT: + ret = vgic_its_cmd_handle_int(kvm, its, its_cmd); + break; + case GITS_CMD_INV: + ret = vgic_its_cmd_handle_inv(kvm, its, its_cmd); + break; + case GITS_CMD_INVALL: + ret = vgic_its_cmd_handle_invall(kvm, its, its_cmd); + break; + case GITS_CMD_SYNC: + /* we ignore this command: we are in sync all of the time */ + ret = 0; + break; + } + mutex_unlock(&its->its_lock); + + return ret; +} + +static u64 vgic_sanitise_its_baser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GITS_BASER_SHAREABILITY_MASK, + GITS_BASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GITS_BASER_INNER_CACHEABILITY_MASK, + GITS_BASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GITS_BASER_OUTER_CACHEABILITY_MASK, + GITS_BASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + /* We support only one (ITS) page size: 64K */ + reg = (reg & ~GITS_BASER_PAGE_SIZE_MASK) | GITS_BASER_PAGE_SIZE_64K; + + return reg; +} + +static u64 vgic_sanitise_its_cbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GITS_CBASER_SHAREABILITY_MASK, + GITS_CBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GITS_CBASER_INNER_CACHEABILITY_MASK, + GITS_CBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GITS_CBASER_OUTER_CACHEABILITY_MASK, + GITS_CBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + /* Sanitise the physical address to be 64k aligned. */ + reg &= ~GENMASK_ULL(15, 12); + + return reg; +} + +static unsigned long vgic_mmio_read_its_cbaser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->cbaser, addr & 7, len); +} + +static void vgic_mmio_write_its_cbaser(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + /* When GITS_CTLR.Enable is 1, this register is RO. */ + if (its->enabled) + return; + + mutex_lock(&its->cmd_lock); + its->cbaser = update_64bit_reg(its->cbaser, addr & 7, len, val); + its->cbaser = vgic_sanitise_its_cbaser(its->cbaser); + its->creadr = 0; + /* + * CWRITER is architecturally UNKNOWN on reset, but we need to reset + * it to CREADR to make sure we start with an empty command buffer. + */ + its->cwriter = its->creadr; + mutex_unlock(&its->cmd_lock); +} + +#define ITS_CMD_BUFFER_SIZE(baser) ((((baser) & 0xff) + 1) << 12) +#define ITS_CMD_SIZE 32 +#define ITS_CMD_OFFSET(reg) ((reg) & GENMASK(19, 5)) + +/* Must be called with the cmd_lock held. */ +static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) +{ + gpa_t cbaser; + u64 cmd_buf[4]; + + /* Commands are only processed when the ITS is enabled. */ + if (!its->enabled) + return; + + cbaser = GITS_CBASER_ADDRESS(its->cbaser); + + while (its->cwriter != its->creadr) { + int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, + cmd_buf, ITS_CMD_SIZE); + /* + * If kvm_read_guest() fails, this could be due to the guest + * programming a bogus value in CBASER or something else going + * wrong from which we cannot easily recover. + * According to section 6.3.2 in the GICv3 spec we can just + * ignore that command then. + */ + if (!ret) + vgic_its_handle_command(kvm, its, cmd_buf); + + its->creadr += ITS_CMD_SIZE; + if (its->creadr == ITS_CMD_BUFFER_SIZE(its->cbaser)) + its->creadr = 0; + } +} + +/* + * By writing to CWRITER the guest announces new commands to be processed. + * To avoid any races in the first place, we take the its_cmd lock, which + * protects our ring buffer variables, so that there is only one user + * per ITS handling commands at a given time. + */ +static void vgic_mmio_write_its_cwriter(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u64 reg; + + if (!its) + return; + + mutex_lock(&its->cmd_lock); + + reg = update_64bit_reg(its->cwriter, addr & 7, len, val); + reg = ITS_CMD_OFFSET(reg); + if (reg >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + mutex_unlock(&its->cmd_lock); + return; + } + its->cwriter = reg; + + vgic_its_process_commands(kvm, its); + + mutex_unlock(&its->cmd_lock); +} + +static unsigned long vgic_mmio_read_its_cwriter(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->cwriter, addr & 0x7, len); +} + +static unsigned long vgic_mmio_read_its_creadr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + return extract_bytes(its->creadr, addr & 0x7, len); +} + +static int vgic_mmio_uaccess_write_its_creadr(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 cmd_offset; + int ret = 0; + + mutex_lock(&its->cmd_lock); + + if (its->enabled) { + ret = -EBUSY; + goto out; + } + + cmd_offset = ITS_CMD_OFFSET(val); + if (cmd_offset >= ITS_CMD_BUFFER_SIZE(its->cbaser)) { + ret = -EINVAL; + goto out; + } + + its->creadr = cmd_offset; +out: + mutex_unlock(&its->cmd_lock); + return ret; +} + +#define BASER_INDEX(addr) (((addr) / sizeof(u64)) & 0x7) +static unsigned long vgic_mmio_read_its_baser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u64 reg; + + switch (BASER_INDEX(addr)) { + case 0: + reg = its->baser_device_table; + break; + case 1: + reg = its->baser_coll_table; + break; + default: + reg = 0; + break; + } + + return extract_bytes(reg, addr & 7, len); +} + +#define GITS_BASER_RO_MASK (GENMASK_ULL(52, 48) | GENMASK_ULL(58, 56)) +static void vgic_mmio_write_its_baser(struct kvm *kvm, + struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 entry_size, table_type; + u64 reg, *regptr, clearbits = 0; + + /* When GITS_CTLR.Enable is 1, we ignore write accesses. */ + if (its->enabled) + return; + + switch (BASER_INDEX(addr)) { + case 0: + regptr = &its->baser_device_table; + entry_size = abi->dte_esz; + table_type = GITS_BASER_TYPE_DEVICE; + break; + case 1: + regptr = &its->baser_coll_table; + entry_size = abi->cte_esz; + table_type = GITS_BASER_TYPE_COLLECTION; + clearbits = GITS_BASER_INDIRECT; + break; + default: + return; + } + + reg = update_64bit_reg(*regptr, addr & 7, len, val); + reg &= ~GITS_BASER_RO_MASK; + reg &= ~clearbits; + + reg |= (entry_size - 1) << GITS_BASER_ENTRY_SIZE_SHIFT; + reg |= table_type << GITS_BASER_TYPE_SHIFT; + reg = vgic_sanitise_its_baser(reg); + + *regptr = reg; + + if (!(reg & GITS_BASER_VALID)) { + /* Take the its_lock to prevent a race with a save/restore */ + mutex_lock(&its->its_lock); + switch (table_type) { + case GITS_BASER_TYPE_DEVICE: + vgic_its_free_device_list(kvm, its); + break; + case GITS_BASER_TYPE_COLLECTION: + vgic_its_free_collection_list(kvm, its); + break; + } + mutex_unlock(&its->its_lock); + } +} + +static unsigned long vgic_mmio_read_its_ctlr(struct kvm *vcpu, + struct vgic_its *its, + gpa_t addr, unsigned int len) +{ + u32 reg = 0; + + mutex_lock(&its->cmd_lock); + if (its->creadr == its->cwriter) + reg |= GITS_CTLR_QUIESCENT; + if (its->enabled) + reg |= GITS_CTLR_ENABLE; + mutex_unlock(&its->cmd_lock); + + return reg; +} + +static void vgic_mmio_write_its_ctlr(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val) +{ + mutex_lock(&its->cmd_lock); + + /* + * It is UNPREDICTABLE to enable the ITS if any of the CBASER or + * device/collection BASER are invalid + */ + if (!its->enabled && (val & GITS_CTLR_ENABLE) && + (!(its->baser_device_table & GITS_BASER_VALID) || + !(its->baser_coll_table & GITS_BASER_VALID) || + !(its->cbaser & GITS_CBASER_VALID))) + goto out; + + its->enabled = !!(val & GITS_CTLR_ENABLE); + if (!its->enabled) + vgic_its_invalidate_cache(kvm); + + /* + * Try to process any pending commands. This function bails out early + * if the ITS is disabled or no commands have been queued. + */ + vgic_its_process_commands(kvm, its); + +out: + mutex_unlock(&its->cmd_lock); +} + +#define REGISTER_ITS_DESC(off, rd, wr, length, acc) \ +{ \ + .reg_offset = off, \ + .len = length, \ + .access_flags = acc, \ + .its_read = rd, \ + .its_write = wr, \ +} + +#define REGISTER_ITS_DESC_UACCESS(off, rd, wr, uwr, length, acc)\ +{ \ + .reg_offset = off, \ + .len = length, \ + .access_flags = acc, \ + .its_read = rd, \ + .its_write = wr, \ + .uaccess_its_write = uwr, \ +} + +static void its_mmio_write_wi(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, unsigned long val) +{ + /* Ignore */ +} + +static struct vgic_register_region its_registers[] = { + REGISTER_ITS_DESC(GITS_CTLR, + vgic_mmio_read_its_ctlr, vgic_mmio_write_its_ctlr, 4, + VGIC_ACCESS_32bit), + REGISTER_ITS_DESC_UACCESS(GITS_IIDR, + vgic_mmio_read_its_iidr, its_mmio_write_wi, + vgic_mmio_uaccess_write_its_iidr, 4, + VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_TYPER, + vgic_mmio_read_its_typer, its_mmio_write_wi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CBASER, + vgic_mmio_read_its_cbaser, vgic_mmio_write_its_cbaser, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_CWRITER, + vgic_mmio_read_its_cwriter, vgic_mmio_write_its_cwriter, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC_UACCESS(GITS_CREADR, + vgic_mmio_read_its_creadr, its_mmio_write_wi, + vgic_mmio_uaccess_write_its_creadr, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_BASER, + vgic_mmio_read_its_baser, vgic_mmio_write_its_baser, 0x40, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_ITS_DESC(GITS_IDREGS_BASE, + vgic_mmio_read_its_idregs, its_mmio_write_wi, 0x30, + VGIC_ACCESS_32bit), +}; + +/* This is called on setting the LPI enable bit in the redistributor. */ +void vgic_enable_lpis(struct kvm_vcpu *vcpu) +{ + if (!(vcpu->arch.vgic_cpu.pendbaser & GICR_PENDBASER_PTZ)) + its_sync_lpi_pending_table(vcpu); +} + +static int vgic_register_its_iodev(struct kvm *kvm, struct vgic_its *its, + u64 addr) +{ + struct vgic_io_device *iodev = &its->iodev; + int ret; + + mutex_lock(&kvm->slots_lock); + if (!IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { + ret = -EBUSY; + goto out; + } + + its->vgic_its_base = addr; + iodev->regions = its_registers; + iodev->nr_regions = ARRAY_SIZE(its_registers); + kvm_iodevice_init(&iodev->dev, &kvm_io_gic_ops); + + iodev->base_addr = its->vgic_its_base; + iodev->iodev_type = IODEV_ITS; + iodev->its = its; + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, iodev->base_addr, + KVM_VGIC_V3_ITS_SIZE, &iodev->dev); +out: + mutex_unlock(&kvm->slots_lock); + + return ret; +} + +/* Default is 16 cached LPIs per vcpu */ +#define LPI_DEFAULT_PCPU_CACHE_SIZE 16 + +void vgic_lpi_translation_cache_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + unsigned int sz; + int i; + + if (!list_empty(&dist->lpi_translation_cache)) + return; + + sz = atomic_read(&kvm->online_vcpus) * LPI_DEFAULT_PCPU_CACHE_SIZE; + + for (i = 0; i < sz; i++) { + struct vgic_translation_cache_entry *cte; + + /* An allocation failure is not fatal */ + cte = kzalloc(sizeof(*cte), GFP_KERNEL); + if (WARN_ON(!cte)) + break; + + INIT_LIST_HEAD(&cte->entry); + list_add(&cte->entry, &dist->lpi_translation_cache); + } +} + +void vgic_lpi_translation_cache_destroy(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_translation_cache_entry *cte, *tmp; + + vgic_its_invalidate_cache(kvm); + + list_for_each_entry_safe(cte, tmp, + &dist->lpi_translation_cache, entry) { + list_del(&cte->entry); + kfree(cte); + } +} + +#define INITIAL_BASER_VALUE \ + (GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) | \ + GITS_BASER_PAGE_SIZE_64K) + +#define INITIAL_PROPBASER_VALUE \ + (GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)) + +static int vgic_its_create(struct kvm_device *dev, u32 type) +{ + struct vgic_its *its; + + if (type != KVM_DEV_TYPE_ARM_VGIC_ITS) + return -ENODEV; + + its = kzalloc(sizeof(struct vgic_its), GFP_KERNEL); + if (!its) + return -ENOMEM; + + if (vgic_initialized(dev->kvm)) { + int ret = vgic_v4_init(dev->kvm); + if (ret < 0) { + kfree(its); + return ret; + } + + vgic_lpi_translation_cache_init(dev->kvm); + } + + mutex_init(&its->its_lock); + mutex_init(&its->cmd_lock); + + its->vgic_its_base = VGIC_ADDR_UNDEF; + + INIT_LIST_HEAD(&its->device_list); + INIT_LIST_HEAD(&its->collection_list); + + dev->kvm->arch.vgic.msis_require_devid = true; + dev->kvm->arch.vgic.has_its = true; + its->enabled = false; + its->dev = dev; + + its->baser_device_table = INITIAL_BASER_VALUE | + ((u64)GITS_BASER_TYPE_DEVICE << GITS_BASER_TYPE_SHIFT); + its->baser_coll_table = INITIAL_BASER_VALUE | + ((u64)GITS_BASER_TYPE_COLLECTION << GITS_BASER_TYPE_SHIFT); + dev->kvm->arch.vgic.propbaser = INITIAL_PROPBASER_VALUE; + + dev->private = its; + + return vgic_its_set_abi(its, NR_ITS_ABIS - 1); +} + +static void vgic_its_destroy(struct kvm_device *kvm_dev) +{ + struct kvm *kvm = kvm_dev->kvm; + struct vgic_its *its = kvm_dev->private; + + mutex_lock(&its->its_lock); + + vgic_its_free_device_list(kvm, its); + vgic_its_free_collection_list(kvm, its); + + mutex_unlock(&its->its_lock); + kfree(its); + kfree(kvm_dev);/* alloc by kvm_ioctl_create_device, free by .destroy */ +} + +static int vgic_its_has_attr_regs(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + const struct vgic_register_region *region; + gpa_t offset = attr->attr; + int align; + + align = (offset < GITS_TYPER) || (offset >= GITS_PIDR4) ? 0x3 : 0x7; + + if (offset & align) + return -EINVAL; + + region = vgic_find_mmio_region(its_registers, + ARRAY_SIZE(its_registers), + offset); + if (!region) + return -ENXIO; + + return 0; +} + +static int vgic_its_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u64 *reg, bool is_write) +{ + const struct vgic_register_region *region; + struct vgic_its *its; + gpa_t addr, offset; + unsigned int len; + int align, ret = 0; + + its = dev->private; + offset = attr->attr; + + /* + * Although the spec supports upper/lower 32-bit accesses to + * 64-bit ITS registers, the userspace ABI requires 64-bit + * accesses to all 64-bit wide registers. We therefore only + * support 32-bit accesses to GITS_CTLR, GITS_IIDR and GITS ID + * registers + */ + if ((offset < GITS_TYPER) || (offset >= GITS_PIDR4)) + align = 0x3; + else + align = 0x7; + + if (offset & align) + return -EINVAL; + + mutex_lock(&dev->kvm->lock); + + if (IS_VGIC_ADDR_UNDEF(its->vgic_its_base)) { + ret = -ENXIO; + goto out; + } + + region = vgic_find_mmio_region(its_registers, + ARRAY_SIZE(its_registers), + offset); + if (!region) { + ret = -ENXIO; + goto out; + } + + if (!lock_all_vcpus(dev->kvm)) { + ret = -EBUSY; + goto out; + } + + addr = its->vgic_its_base + offset; + + len = region->access_flags & VGIC_ACCESS_64bit ? 8 : 4; + + if (is_write) { + if (region->uaccess_its_write) + ret = region->uaccess_its_write(dev->kvm, its, addr, + len, *reg); + else + region->its_write(dev->kvm, its, addr, len, *reg); + } else { + *reg = region->its_read(dev->kvm, its, addr, len); + } + unlock_all_vcpus(dev->kvm); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + +static u32 compute_next_devid_offset(struct list_head *h, + struct its_device *dev) +{ + struct its_device *next; + u32 next_offset; + + if (list_is_last(&dev->dev_list, h)) + return 0; + next = list_next_entry(dev, dev_list); + next_offset = next->device_id - dev->device_id; + + return min_t(u32, next_offset, VITS_DTE_MAX_DEVID_OFFSET); +} + +static u32 compute_next_eventid_offset(struct list_head *h, struct its_ite *ite) +{ + struct its_ite *next; + u32 next_offset; + + if (list_is_last(&ite->ite_list, h)) + return 0; + next = list_next_entry(ite, ite_list); + next_offset = next->event_id - ite->event_id; + + return min_t(u32, next_offset, VITS_ITE_MAX_EVENTID_OFFSET); +} + +/** + * entry_fn_t - Callback called on a table entry restore path + * @its: its handle + * @id: id of the entry + * @entry: pointer to the entry + * @opaque: pointer to an opaque data + * + * Return: < 0 on error, 0 if last element was identified, id offset to next + * element otherwise + */ +typedef int (*entry_fn_t)(struct vgic_its *its, u32 id, void *entry, + void *opaque); + +/** + * scan_its_table - Scan a contiguous table in guest RAM and applies a function + * to each entry + * + * @its: its handle + * @base: base gpa of the table + * @size: size of the table in bytes + * @esz: entry size in bytes + * @start_id: the ID of the first entry in the table + * (non zero for 2d level tables) + * @fn: function to apply on each entry + * + * Return: < 0 on error, 0 if last element was identified, 1 otherwise + * (the last element may not be found on second level tables) + */ +static int scan_its_table(struct vgic_its *its, gpa_t base, int size, u32 esz, + int start_id, entry_fn_t fn, void *opaque) +{ + struct kvm *kvm = its->dev->kvm; + unsigned long len = size; + int id = start_id; + gpa_t gpa = base; + char entry[ESZ_MAX]; + int ret; + + memset(entry, 0, esz); + + while (len > 0) { + int next_offset; + size_t byte_offset; + + ret = kvm_read_guest_lock(kvm, gpa, entry, esz); + if (ret) + return ret; + + next_offset = fn(its, id, entry, opaque); + if (next_offset <= 0) + return next_offset; + + byte_offset = next_offset * esz; + id += next_offset; + gpa += byte_offset; + len -= byte_offset; + } + return 1; +} + +/** + * vgic_its_save_ite - Save an interrupt translation entry at @gpa + */ +static int vgic_its_save_ite(struct vgic_its *its, struct its_device *dev, + struct its_ite *ite, gpa_t gpa, int ite_esz) +{ + struct kvm *kvm = its->dev->kvm; + u32 next_offset; + u64 val; + + next_offset = compute_next_eventid_offset(&dev->itt_head, ite); + val = ((u64)next_offset << KVM_ITS_ITE_NEXT_SHIFT) | + ((u64)ite->irq->intid << KVM_ITS_ITE_PINTID_SHIFT) | + ite->collection->collection_id; + val = cpu_to_le64(val); + return kvm_write_guest_lock(kvm, gpa, &val, ite_esz); +} + +/** + * vgic_its_restore_ite - restore an interrupt translation entry + * @event_id: id used for indexing + * @ptr: pointer to the ITE entry + * @opaque: pointer to the its_device + */ +static int vgic_its_restore_ite(struct vgic_its *its, u32 event_id, + void *ptr, void *opaque) +{ + struct its_device *dev = (struct its_device *)opaque; + struct its_collection *collection; + struct kvm *kvm = its->dev->kvm; + struct kvm_vcpu *vcpu = NULL; + u64 val; + u64 *p = (u64 *)ptr; + struct vgic_irq *irq; + u32 coll_id, lpi_id; + struct its_ite *ite; + u32 offset; + + val = *p; + + val = le64_to_cpu(val); + + coll_id = val & KVM_ITS_ITE_ICID_MASK; + lpi_id = (val & KVM_ITS_ITE_PINTID_MASK) >> KVM_ITS_ITE_PINTID_SHIFT; + + if (!lpi_id) + return 1; /* invalid entry, no choice but to scan next entry */ + + if (lpi_id < VGIC_MIN_LPI) + return -EINVAL; + + offset = val >> KVM_ITS_ITE_NEXT_SHIFT; + if (event_id + offset >= BIT_ULL(dev->num_eventid_bits)) + return -EINVAL; + + collection = find_collection(its, coll_id); + if (!collection) + return -EINVAL; + + ite = vgic_its_alloc_ite(dev, collection, event_id); + if (IS_ERR(ite)) + return PTR_ERR(ite); + + if (its_is_collection_mapped(collection)) + vcpu = kvm_get_vcpu(kvm, collection->target_addr); + + irq = vgic_add_lpi(kvm, lpi_id, vcpu); + if (IS_ERR(irq)) + return PTR_ERR(irq); + ite->irq = irq; + + return offset; +} + +static int vgic_its_ite_cmp(void *priv, struct list_head *a, + struct list_head *b) +{ + struct its_ite *itea = container_of(a, struct its_ite, ite_list); + struct its_ite *iteb = container_of(b, struct its_ite, ite_list); + + if (itea->event_id < iteb->event_id) + return -1; + else + return 1; +} + +static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + gpa_t base = device->itt_addr; + struct its_ite *ite; + int ret; + int ite_esz = abi->ite_esz; + + list_sort(NULL, &device->itt_head, vgic_its_ite_cmp); + + list_for_each_entry(ite, &device->itt_head, ite_list) { + gpa_t gpa = base + ite->event_id * ite_esz; + + /* + * If an LPI carries the HW bit, this means that this + * interrupt is controlled by GICv4, and we do not + * have direct access to that state. Let's simply fail + * the save operation... + */ + if (ite->irq->hw) + return -EACCES; + + ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz); + if (ret) + return ret; + } + return 0; +} + +/** + * vgic_its_restore_itt - restore the ITT of a device + * + * @its: its handle + * @dev: device handle + * + * Return 0 on success, < 0 on error + */ +static int vgic_its_restore_itt(struct vgic_its *its, struct its_device *dev) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + gpa_t base = dev->itt_addr; + int ret; + int ite_esz = abi->ite_esz; + size_t max_size = BIT_ULL(dev->num_eventid_bits) * ite_esz; + + ret = scan_its_table(its, base, max_size, ite_esz, 0, + vgic_its_restore_ite, dev); + + /* scan_its_table returns +1 if all ITEs are invalid */ + if (ret > 0) + ret = 0; + + return ret; +} + +/** + * vgic_its_save_dte - Save a device table entry at a given GPA + * + * @its: ITS handle + * @dev: ITS device + * @ptr: GPA + */ +static int vgic_its_save_dte(struct vgic_its *its, struct its_device *dev, + gpa_t ptr, int dte_esz) +{ + struct kvm *kvm = its->dev->kvm; + u64 val, itt_addr_field; + u32 next_offset; + + itt_addr_field = dev->itt_addr >> 8; + next_offset = compute_next_devid_offset(&its->device_list, dev); + val = (1ULL << KVM_ITS_DTE_VALID_SHIFT | + ((u64)next_offset << KVM_ITS_DTE_NEXT_SHIFT) | + (itt_addr_field << KVM_ITS_DTE_ITTADDR_SHIFT) | + (dev->num_eventid_bits - 1)); + val = cpu_to_le64(val); + return kvm_write_guest_lock(kvm, ptr, &val, dte_esz); +} + +/** + * vgic_its_restore_dte - restore a device table entry + * + * @its: its handle + * @id: device id the DTE corresponds to + * @ptr: kernel VA where the 8 byte DTE is located + * @opaque: unused + * + * Return: < 0 on error, 0 if the dte is the last one, id offset to the + * next dte otherwise + */ +static int vgic_its_restore_dte(struct vgic_its *its, u32 id, + void *ptr, void *opaque) +{ + struct its_device *dev; + gpa_t itt_addr; + u8 num_eventid_bits; + u64 entry = *(u64 *)ptr; + bool valid; + u32 offset; + int ret; + + entry = le64_to_cpu(entry); + + valid = entry >> KVM_ITS_DTE_VALID_SHIFT; + num_eventid_bits = (entry & KVM_ITS_DTE_SIZE_MASK) + 1; + itt_addr = ((entry & KVM_ITS_DTE_ITTADDR_MASK) + >> KVM_ITS_DTE_ITTADDR_SHIFT) << 8; + + if (!valid) + return 1; + + /* dte entry is valid */ + offset = (entry & KVM_ITS_DTE_NEXT_MASK) >> KVM_ITS_DTE_NEXT_SHIFT; + + dev = vgic_its_alloc_device(its, id, itt_addr, num_eventid_bits); + if (IS_ERR(dev)) + return PTR_ERR(dev); + + ret = vgic_its_restore_itt(its, dev); + if (ret) { + vgic_its_free_device(its->dev->kvm, dev); + return ret; + } + + return offset; +} + +static int vgic_its_device_cmp(void *priv, struct list_head *a, + struct list_head *b) +{ + struct its_device *deva = container_of(a, struct its_device, dev_list); + struct its_device *devb = container_of(b, struct its_device, dev_list); + + if (deva->device_id < devb->device_id) + return -1; + else + return 1; +} + +/** + * vgic_its_save_device_tables - Save the device table and all ITT + * into guest RAM + * + * L1/L2 handling is hidden by vgic_its_check_id() helper which directly + * returns the GPA of the device entry + */ +static int vgic_its_save_device_tables(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_device_table; + struct its_device *dev; + int dte_esz = abi->dte_esz; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + list_sort(NULL, &its->device_list, vgic_its_device_cmp); + + list_for_each_entry(dev, &its->device_list, dev_list) { + int ret; + gpa_t eaddr; + + if (!vgic_its_check_id(its, baser, + dev->device_id, &eaddr)) + return -EINVAL; + + ret = vgic_its_save_itt(its, dev); + if (ret) + return ret; + + ret = vgic_its_save_dte(its, dev, eaddr, dte_esz); + if (ret) + return ret; + } + return 0; +} + +/** + * handle_l1_dte - callback used for L1 device table entries (2 stage case) + * + * @its: its handle + * @id: index of the entry in the L1 table + * @addr: kernel VA + * @opaque: unused + * + * L1 table entries are scanned by steps of 1 entry + * Return < 0 if error, 0 if last dte was found when scanning the L2 + * table, +1 otherwise (meaning next L1 entry must be scanned) + */ +static int handle_l1_dte(struct vgic_its *its, u32 id, void *addr, + void *opaque) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int l2_start_id = id * (SZ_64K / abi->dte_esz); + u64 entry = *(u64 *)addr; + int dte_esz = abi->dte_esz; + gpa_t gpa; + int ret; + + entry = le64_to_cpu(entry); + + if (!(entry & KVM_ITS_L1E_VALID_MASK)) + return 1; + + gpa = entry & KVM_ITS_L1E_ADDR_MASK; + + ret = scan_its_table(its, gpa, SZ_64K, dte_esz, + l2_start_id, vgic_its_restore_dte, NULL); + + return ret; +} + +/** + * vgic_its_restore_device_tables - Restore the device table and all ITT + * from guest RAM to internal data structs + */ +static int vgic_its_restore_device_tables(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_device_table; + int l1_esz, ret; + int l1_tbl_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + gpa_t l1_gpa; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + l1_gpa = GITS_BASER_ADDR_48_to_52(baser); + + if (baser & GITS_BASER_INDIRECT) { + l1_esz = GITS_LVL1_ENTRY_SIZE; + ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0, + handle_l1_dte, NULL); + } else { + l1_esz = abi->dte_esz; + ret = scan_its_table(its, l1_gpa, l1_tbl_size, l1_esz, 0, + vgic_its_restore_dte, NULL); + } + + /* scan_its_table returns +1 if all entries are invalid */ + if (ret > 0) + ret = 0; + + return ret; +} + +static int vgic_its_save_cte(struct vgic_its *its, + struct its_collection *collection, + gpa_t gpa, int esz) +{ + u64 val; + + val = (1ULL << KVM_ITS_CTE_VALID_SHIFT | + ((u64)collection->target_addr << KVM_ITS_CTE_RDBASE_SHIFT) | + collection->collection_id); + val = cpu_to_le64(val); + return kvm_write_guest_lock(its->dev->kvm, gpa, &val, esz); +} + +static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) +{ + struct its_collection *collection; + struct kvm *kvm = its->dev->kvm; + u32 target_addr, coll_id; + u64 val; + int ret; + + BUG_ON(esz > sizeof(val)); + ret = kvm_read_guest_lock(kvm, gpa, &val, esz); + if (ret) + return ret; + val = le64_to_cpu(val); + if (!(val & KVM_ITS_CTE_VALID_MASK)) + return 0; + + target_addr = (u32)(val >> KVM_ITS_CTE_RDBASE_SHIFT); + coll_id = val & KVM_ITS_CTE_ICID_MASK; + + if (target_addr != COLLECTION_NOT_MAPPED && + target_addr >= atomic_read(&kvm->online_vcpus)) + return -EINVAL; + + collection = find_collection(its, coll_id); + if (collection) + return -EEXIST; + ret = vgic_its_alloc_collection(its, &collection, coll_id); + if (ret) + return ret; + collection->target_addr = target_addr; + return 1; +} + +/** + * vgic_its_save_collection_table - Save the collection table into + * guest RAM + */ +static int vgic_its_save_collection_table(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_coll_table; + gpa_t gpa = GITS_BASER_ADDR_48_to_52(baser); + struct its_collection *collection; + u64 val; + size_t max_size, filled = 0; + int ret, cte_esz = abi->cte_esz; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + + list_for_each_entry(collection, &its->collection_list, coll_list) { + ret = vgic_its_save_cte(its, collection, gpa, cte_esz); + if (ret) + return ret; + gpa += cte_esz; + filled += cte_esz; + } + + if (filled == max_size) + return 0; + + /* + * table is not fully filled, add a last dummy element + * with valid bit unset + */ + val = 0; + BUG_ON(cte_esz > sizeof(val)); + ret = kvm_write_guest_lock(its->dev->kvm, gpa, &val, cte_esz); + return ret; +} + +/** + * vgic_its_restore_collection_table - reads the collection table + * in guest memory and restores the ITS internal state. Requires the + * BASER registers to be restored before. + */ +static int vgic_its_restore_collection_table(struct vgic_its *its) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + u64 baser = its->baser_coll_table; + int cte_esz = abi->cte_esz; + size_t max_size, read = 0; + gpa_t gpa; + int ret; + + if (!(baser & GITS_BASER_VALID)) + return 0; + + gpa = GITS_BASER_ADDR_48_to_52(baser); + + max_size = GITS_BASER_NR_PAGES(baser) * SZ_64K; + + while (read < max_size) { + ret = vgic_its_restore_cte(its, gpa, cte_esz); + if (ret <= 0) + break; + gpa += cte_esz; + read += cte_esz; + } + + if (ret > 0) + return 0; + + return ret; +} + +/** + * vgic_its_save_tables_v0 - Save the ITS tables into guest ARM + * according to v0 ABI + */ +static int vgic_its_save_tables_v0(struct vgic_its *its) +{ + int ret; + + ret = vgic_its_save_device_tables(its); + if (ret) + return ret; + + return vgic_its_save_collection_table(its); +} + +/** + * vgic_its_restore_tables_v0 - Restore the ITS tables from guest RAM + * to internal data structs according to V0 ABI + * + */ +static int vgic_its_restore_tables_v0(struct vgic_its *its) +{ + int ret; + + ret = vgic_its_restore_collection_table(its); + if (ret) + return ret; + + return vgic_its_restore_device_tables(its); +} + +static int vgic_its_commit_v0(struct vgic_its *its) +{ + const struct vgic_its_abi *abi; + + abi = vgic_its_get_abi(its); + its->baser_coll_table &= ~GITS_BASER_ENTRY_SIZE_MASK; + its->baser_device_table &= ~GITS_BASER_ENTRY_SIZE_MASK; + + its->baser_coll_table |= (GIC_ENCODE_SZ(abi->cte_esz, 5) + << GITS_BASER_ENTRY_SIZE_SHIFT); + + its->baser_device_table |= (GIC_ENCODE_SZ(abi->dte_esz, 5) + << GITS_BASER_ENTRY_SIZE_SHIFT); + return 0; +} + +static void vgic_its_reset(struct kvm *kvm, struct vgic_its *its) +{ + /* We need to keep the ABI specific field values */ + its->baser_coll_table &= ~GITS_BASER_VALID; + its->baser_device_table &= ~GITS_BASER_VALID; + its->cbaser = 0; + its->creadr = 0; + its->cwriter = 0; + its->enabled = 0; + vgic_its_free_device_list(kvm, its); + vgic_its_free_collection_list(kvm, its); +} + +static int vgic_its_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_ITS_ADDR_TYPE: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + case KVM_DEV_ARM_ITS_CTRL_RESET: + return 0; + case KVM_DEV_ARM_ITS_SAVE_TABLES: + return 0; + case KVM_DEV_ARM_ITS_RESTORE_TABLES: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: + return vgic_its_has_attr_regs(dev, attr); + } + return -ENXIO; +} + +static int vgic_its_ctrl(struct kvm *kvm, struct vgic_its *its, u64 attr) +{ + const struct vgic_its_abi *abi = vgic_its_get_abi(its); + int ret = 0; + + if (attr == KVM_DEV_ARM_VGIC_CTRL_INIT) /* Nothing to do */ + return 0; + + mutex_lock(&kvm->lock); + mutex_lock(&its->its_lock); + + if (!lock_all_vcpus(kvm)) { + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + return -EBUSY; + } + + switch (attr) { + case KVM_DEV_ARM_ITS_CTRL_RESET: + vgic_its_reset(kvm, its); + break; + case KVM_DEV_ARM_ITS_SAVE_TABLES: + ret = abi->save_tables(its); + break; + case KVM_DEV_ARM_ITS_RESTORE_TABLES: + ret = abi->restore_tables(its); + break; + } + + unlock_all_vcpus(kvm); + mutex_unlock(&its->its_lock); + mutex_unlock(&kvm->lock); + return ret; +} + +static int vgic_its_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + struct vgic_its *its = dev->private; + int ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + unsigned long type = (unsigned long)attr->attr; + u64 addr; + + if (type != KVM_VGIC_ITS_ADDR_TYPE) + return -ENODEV; + + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + ret = vgic_check_ioaddr(dev->kvm, &its->vgic_its_base, + addr, SZ_64K); + if (ret) + return ret; + + return vgic_register_its_iodev(dev->kvm, its, addr); + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: + return vgic_its_ctrl(dev->kvm, its, attr->attr); + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_its_attr_regs_access(dev, attr, ®, true); + } + } + return -ENXIO; +} + +static int vgic_its_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + struct vgic_its *its = dev->private; + u64 addr = its->vgic_its_base; + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + unsigned long type = (unsigned long)attr->attr; + + if (type != KVM_VGIC_ITS_ADDR_TYPE) + return -ENODEV; + + if (copy_to_user(uaddr, &addr, sizeof(addr))) + return -EFAULT; + break; + } + case KVM_DEV_ARM_VGIC_GRP_ITS_REGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + int ret; + + ret = vgic_its_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } + default: + return -ENXIO; + } + + return 0; +} + +static struct kvm_device_ops kvm_arm_vgic_its_ops = { + .name = "kvm-arm-vgic-its", + .create = vgic_its_create, + .destroy = vgic_its_destroy, + .set_attr = vgic_its_set_attr, + .get_attr = vgic_its_get_attr, + .has_attr = vgic_its_has_attr, +}; + +int kvm_vgic_register_its_device(void) +{ + return kvm_register_device_ops(&kvm_arm_vgic_its_ops, + KVM_DEV_TYPE_ARM_VGIC_ITS); +} diff --git a/arch/arm64/kvm/vgic/vgic-kvm-device.c b/arch/arm64/kvm/vgic/vgic-kvm-device.c new file mode 100644 index 000000000000..44419679f91a --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-kvm-device.c @@ -0,0 +1,741 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGIC: KVM DEVICE API + * + * Copyright (C) 2015 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <linux/uaccess.h> +#include <asm/kvm_mmu.h> +#include <asm/cputype.h> +#include "vgic.h" + +/* common helpers */ + +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, + phys_addr_t addr, phys_addr_t alignment) +{ + if (addr & ~kvm_phys_mask(kvm)) + return -E2BIG; + + if (!IS_ALIGNED(addr, alignment)) + return -EINVAL; + + if (!IS_VGIC_ADDR_UNDEF(*ioaddr)) + return -EEXIST; + + return 0; +} + +static int vgic_check_type(struct kvm *kvm, int type_needed) +{ + if (kvm->arch.vgic.vgic_model != type_needed) + return -ENODEV; + else + return 0; +} + +/** + * kvm_vgic_addr - set or get vgic VM base addresses + * @kvm: pointer to the vm struct + * @type: the VGIC addr type, one of KVM_VGIC_V[23]_ADDR_TYPE_XXX + * @addr: pointer to address value + * @write: if true set the address in the VM address space, if false read the + * address + * + * Set or get the vgic base addresses for the distributor and the virtual CPU + * interface in the VM physical address space. These addresses are properties + * of the emulated core/SoC and therefore user space initially knows this + * information. + * Check them for sanity (alignment, double assignment). We can't check for + * overlapping regions in case of a virtual GICv3 here, since we don't know + * the number of VCPUs yet, so we defer this check to map_resources(). + */ +int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) +{ + int r = 0; + struct vgic_dist *vgic = &kvm->arch.vgic; + phys_addr_t *addr_ptr, alignment; + u64 undef_value = VGIC_ADDR_UNDEF; + + mutex_lock(&kvm->lock); + switch (type) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + addr_ptr = &vgic->vgic_dist_base; + alignment = SZ_4K; + break; + case KVM_VGIC_V2_ADDR_TYPE_CPU: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V2); + addr_ptr = &vgic->vgic_cpu_base; + alignment = SZ_4K; + break; + case KVM_VGIC_V3_ADDR_TYPE_DIST: + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + addr_ptr = &vgic->vgic_dist_base; + alignment = SZ_64K; + break; + case KVM_VGIC_V3_ADDR_TYPE_REDIST: { + struct vgic_redist_region *rdreg; + + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + if (r) + break; + if (write) { + r = vgic_v3_set_redist_base(kvm, 0, *addr, 0); + goto out; + } + rdreg = list_first_entry(&vgic->rd_regions, + struct vgic_redist_region, list); + if (!rdreg) + addr_ptr = &undef_value; + else + addr_ptr = &rdreg->base; + break; + } + case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION: + { + struct vgic_redist_region *rdreg; + u8 index; + + r = vgic_check_type(kvm, KVM_DEV_TYPE_ARM_VGIC_V3); + if (r) + break; + + index = *addr & KVM_VGIC_V3_RDIST_INDEX_MASK; + + if (write) { + gpa_t base = *addr & KVM_VGIC_V3_RDIST_BASE_MASK; + u32 count = (*addr & KVM_VGIC_V3_RDIST_COUNT_MASK) + >> KVM_VGIC_V3_RDIST_COUNT_SHIFT; + u8 flags = (*addr & KVM_VGIC_V3_RDIST_FLAGS_MASK) + >> KVM_VGIC_V3_RDIST_FLAGS_SHIFT; + + if (!count || flags) + r = -EINVAL; + else + r = vgic_v3_set_redist_base(kvm, index, + base, count); + goto out; + } + + rdreg = vgic_v3_rdist_region_from_index(kvm, index); + if (!rdreg) { + r = -ENOENT; + goto out; + } + + *addr = index; + *addr |= rdreg->base; + *addr |= (u64)rdreg->count << KVM_VGIC_V3_RDIST_COUNT_SHIFT; + goto out; + } + default: + r = -ENODEV; + } + + if (r) + goto out; + + if (write) { + r = vgic_check_ioaddr(kvm, addr_ptr, *addr, alignment); + if (!r) + *addr_ptr = *addr; + } else { + *addr = *addr_ptr; + } + +out: + mutex_unlock(&kvm->lock); + return r; +} + +static int vgic_set_common_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int r; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 addr; + unsigned long type = (unsigned long)attr->attr; + + if (copy_from_user(&addr, uaddr, sizeof(addr))) + return -EFAULT; + + r = kvm_vgic_addr(dev->kvm, type, &addr, true); + return (r == -ENODEV) ? -ENXIO : r; + } + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 val; + int ret = 0; + + if (get_user(val, uaddr)) + return -EFAULT; + + /* + * We require: + * - at least 32 SPIs on top of the 16 SGIs and 16 PPIs + * - at most 1024 interrupts + * - a multiple of 32 interrupts + */ + if (val < (VGIC_NR_PRIVATE_IRQS + 32) || + val > VGIC_MAX_RESERVED || + (val & 31)) + return -EINVAL; + + mutex_lock(&dev->kvm->lock); + + if (vgic_ready(dev->kvm) || dev->kvm->arch.vgic.nr_spis) + ret = -EBUSY; + else + dev->kvm->arch.vgic.nr_spis = + val - VGIC_NR_PRIVATE_IRQS; + + mutex_unlock(&dev->kvm->lock); + + return ret; + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: { + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + mutex_lock(&dev->kvm->lock); + r = vgic_init(dev->kvm); + mutex_unlock(&dev->kvm->lock); + return r; + } + break; + } + } + + return -ENXIO; +} + +static int vgic_get_common_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int r = -ENXIO; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 addr; + unsigned long type = (unsigned long)attr->attr; + + r = kvm_vgic_addr(dev->kvm, type, &addr, false); + if (r) + return (r == -ENODEV) ? -ENXIO : r; + + if (copy_to_user(uaddr, &addr, sizeof(addr))) + return -EFAULT; + break; + } + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + + r = put_user(dev->kvm->arch.vgic.nr_spis + + VGIC_NR_PRIVATE_IRQS, uaddr); + break; + } + } + + return r; +} + +static int vgic_create(struct kvm_device *dev, u32 type) +{ + return kvm_vgic_create(dev->kvm, type); +} + +static void vgic_destroy(struct kvm_device *dev) +{ + kfree(dev); +} + +int kvm_register_vgic_device(unsigned long type) +{ + int ret = -ENODEV; + + switch (type) { + case KVM_DEV_TYPE_ARM_VGIC_V2: + ret = kvm_register_device_ops(&kvm_arm_vgic_v2_ops, + KVM_DEV_TYPE_ARM_VGIC_V2); + break; + case KVM_DEV_TYPE_ARM_VGIC_V3: + ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops, + KVM_DEV_TYPE_ARM_VGIC_V3); + + if (ret) + break; + ret = kvm_vgic_register_its_device(); + break; + } + + return ret; +} + +int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr) +{ + int cpuid; + + cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> + KVM_DEV_ARM_VGIC_CPUID_SHIFT; + + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) + return -EINVAL; + + reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid); + reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + + return 0; +} + +/* unlocks vcpus from @vcpu_lock_idx and smaller */ +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) +{ + struct kvm_vcpu *tmp_vcpu; + + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { + tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); + mutex_unlock(&tmp_vcpu->mutex); + } +} + +void unlock_all_vcpus(struct kvm *kvm) +{ + unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); +} + +/* Returns true if all vcpus were locked, false otherwise */ +bool lock_all_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *tmp_vcpu; + int c; + + /* + * Any time a vcpu is run, vcpu_load is called which tries to grab the + * vcpu->mutex. By grabbing the vcpu->mutex of all VCPUs we ensure + * that no other VCPUs are run and fiddle with the vgic state while we + * access it. + */ + kvm_for_each_vcpu(c, tmp_vcpu, kvm) { + if (!mutex_trylock(&tmp_vcpu->mutex)) { + unlock_vcpus(kvm, c - 1); + return false; + } + } + + return true; +} + +/** + * vgic_v2_attr_regs_access - allows user space to access VGIC v2 state + * + * @dev: kvm device handle + * @attr: kvm device attribute + * @reg: address the value is read or written + * @is_write: true if userspace is writing a register + */ +static int vgic_v2_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u32 *reg, bool is_write) +{ + struct vgic_reg_attr reg_attr; + gpa_t addr; + struct kvm_vcpu *vcpu; + int ret; + + ret = vgic_v2_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + mutex_lock(&dev->kvm->lock); + + ret = vgic_init(dev->kvm); + if (ret) + goto out; + + if (!lock_all_vcpus(dev->kvm)) { + ret = -EBUSY; + goto out; + } + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + ret = vgic_v2_cpuif_uaccess(vcpu, is_write, addr, reg); + break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + ret = vgic_v2_dist_uaccess(vcpu, is_write, addr, reg); + break; + default: + ret = -EINVAL; + break; + } + + unlock_all_vcpus(dev->kvm); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + +static int vgic_v2_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_set_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_v2_attr_regs_access(dev, attr, ®, true); + } + } + + return -ENXIO; +} + +static int vgic_v2_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_get_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 reg = 0; + + ret = vgic_v2_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } + } + + return -ENXIO; +} + +static int vgic_v2_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_V2_ADDR_TYPE_DIST: + case KVM_VGIC_V2_ADDR_TYPE_CPU: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + return vgic_v2_has_attr_regs(dev, attr); + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return 0; + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + } + } + return -ENXIO; +} + +struct kvm_device_ops kvm_arm_vgic_v2_ops = { + .name = "kvm-arm-vgic-v2", + .create = vgic_create, + .destroy = vgic_destroy, + .set_attr = vgic_v2_set_attr, + .get_attr = vgic_v2_get_attr, + .has_attr = vgic_v2_has_attr, +}; + +int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr) +{ + unsigned long vgic_mpidr, mpidr_reg; + + /* + * For KVM_DEV_ARM_VGIC_GRP_DIST_REGS group, + * attr might not hold MPIDR. Hence assume vcpu0. + */ + if (attr->group != KVM_DEV_ARM_VGIC_GRP_DIST_REGS) { + vgic_mpidr = (attr->attr & KVM_DEV_ARM_VGIC_V3_MPIDR_MASK) >> + KVM_DEV_ARM_VGIC_V3_MPIDR_SHIFT; + + mpidr_reg = VGIC_TO_MPIDR(vgic_mpidr); + reg_attr->vcpu = kvm_mpidr_to_vcpu(dev->kvm, mpidr_reg); + } else { + reg_attr->vcpu = kvm_get_vcpu(dev->kvm, 0); + } + + if (!reg_attr->vcpu) + return -EINVAL; + + reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; + + return 0; +} + +/* + * vgic_v3_attr_regs_access - allows user space to access VGIC v3 state + * + * @dev: kvm device handle + * @attr: kvm device attribute + * @reg: address the value is read or written + * @is_write: true if userspace is writing a register + */ +static int vgic_v3_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + u64 *reg, bool is_write) +{ + struct vgic_reg_attr reg_attr; + gpa_t addr; + struct kvm_vcpu *vcpu; + int ret; + u32 tmp32; + + ret = vgic_v3_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + mutex_lock(&dev->kvm->lock); + + if (unlikely(!vgic_initialized(dev->kvm))) { + ret = -EBUSY; + goto out; + } + + if (!lock_all_vcpus(dev->kvm)) { + ret = -EBUSY; + goto out; + } + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + if (is_write) + tmp32 = *reg; + + ret = vgic_v3_dist_uaccess(vcpu, is_write, addr, &tmp32); + if (!is_write) + *reg = tmp32; + break; + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + if (is_write) + tmp32 = *reg; + + ret = vgic_v3_redist_uaccess(vcpu, is_write, addr, &tmp32); + if (!is_write) + *reg = tmp32; + break; + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 regid; + + regid = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); + ret = vgic_v3_cpu_sysregs_uaccess(vcpu, is_write, + regid, reg); + break; + } + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + unsigned int info, intid; + + info = (attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> + KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT; + if (info == VGIC_LEVEL_INFO_LINE_LEVEL) { + intid = attr->attr & + KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK; + ret = vgic_v3_line_level_info_uaccess(vcpu, is_write, + intid, reg); + } else { + ret = -EINVAL; + } + break; + } + default: + ret = -EINVAL; + break; + } + + unlock_all_vcpus(dev->kvm); +out: + mutex_unlock(&dev->kvm->lock); + return ret; +} + +static int vgic_v3_set_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_set_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u32 tmp32; + u64 reg; + + if (get_user(tmp32, uaddr)) + return -EFAULT; + + reg = tmp32; + return vgic_v3_attr_regs_access(dev, attr, ®, true); + } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + if (get_user(reg, uaddr)) + return -EFAULT; + + return vgic_v3_attr_regs_access(dev, attr, ®, true); + } + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u64 reg; + u32 tmp32; + + if (get_user(tmp32, uaddr)) + return -EFAULT; + + reg = tmp32; + return vgic_v3_attr_regs_access(dev, attr, ®, true); + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: { + int ret; + + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: + mutex_lock(&dev->kvm->lock); + + if (!lock_all_vcpus(dev->kvm)) { + mutex_unlock(&dev->kvm->lock); + return -EBUSY; + } + ret = vgic_v3_save_pending_tables(dev->kvm); + unlock_all_vcpus(dev->kvm); + mutex_unlock(&dev->kvm->lock); + return ret; + } + break; + } + } + return -ENXIO; +} + +static int vgic_v3_get_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + int ret; + + ret = vgic_get_common_attr(dev, attr); + if (ret != -ENXIO) + return ret; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u64 reg; + u32 tmp32; + + ret = vgic_v3_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + tmp32 = reg; + return put_user(tmp32, uaddr); + } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 __user *uaddr = (u64 __user *)(long)attr->addr; + u64 reg; + + ret = vgic_v3_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + return put_user(reg, uaddr); + } + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + u32 __user *uaddr = (u32 __user *)(long)attr->addr; + u64 reg; + u32 tmp32; + + ret = vgic_v3_attr_regs_access(dev, attr, ®, false); + if (ret) + return ret; + tmp32 = reg; + return put_user(tmp32, uaddr); + } + } + return -ENXIO; +} + +static int vgic_v3_has_attr(struct kvm_device *dev, + struct kvm_device_attr *attr) +{ + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_ADDR: + switch (attr->attr) { + case KVM_VGIC_V3_ADDR_TYPE_DIST: + case KVM_VGIC_V3_ADDR_TYPE_REDIST: + case KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION: + return 0; + } + break; + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: + return vgic_v3_has_attr_regs(dev, attr); + case KVM_DEV_ARM_VGIC_GRP_NR_IRQS: + return 0; + case KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO: { + if (((attr->attr & KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK) >> + KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) == + VGIC_LEVEL_INFO_LINE_LEVEL) + return 0; + break; + } + case KVM_DEV_ARM_VGIC_GRP_CTRL: + switch (attr->attr) { + case KVM_DEV_ARM_VGIC_CTRL_INIT: + return 0; + case KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES: + return 0; + } + } + return -ENXIO; +} + +struct kvm_device_ops kvm_arm_vgic_v3_ops = { + .name = "kvm-arm-vgic-v3", + .create = vgic_create, + .destroy = vgic_destroy, + .set_attr = vgic_v3_set_attr, + .get_attr = vgic_v3_get_attr, + .has_attr = vgic_v3_has_attr, +}; diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v2.c b/arch/arm64/kvm/vgic/vgic-mmio-v2.c new file mode 100644 index 000000000000..a016f07adc28 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio-v2.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGICv2 MMIO handling functions + */ + +#include <linux/irqchip/arm-gic.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/nospec.h> + +#include <kvm/iodev.h> +#include <kvm/arm_vgic.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +/* + * The Revision field in the IIDR have the following meanings: + * + * Revision 1: Report GICv2 interrupts as group 0 instead of group 1 + * Revision 2: Interrupt groups are guest-configurable and signaled using + * their configured groups. + */ + +static unsigned long vgic_mmio_read_v2_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + u32 value; + + switch (addr & 0x0c) { + case GIC_DIST_CTRL: + value = vgic->enabled ? GICD_ENABLE : 0; + break; + case GIC_DIST_CTR: + value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS; + value = (value >> 5) - 1; + value |= (atomic_read(&vcpu->kvm->online_vcpus) - 1) << 5; + break; + case GIC_DIST_IIDR: + value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) | + (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) | + (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT); + break; + default: + return 0; + } + + return value; +} + +static void vgic_mmio_write_v2_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + bool was_enabled = dist->enabled; + + switch (addr & 0x0c) { + case GIC_DIST_CTRL: + dist->enabled = val & GICD_ENABLE; + if (!was_enabled && dist->enabled) + vgic_kick_vcpus(vcpu->kvm); + break; + case GIC_DIST_CTR: + case GIC_DIST_IIDR: + /* Nothing to do */ + return; + } +} + +static int vgic_mmio_uaccess_write_v2_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + switch (addr & 0x0c) { + case GIC_DIST_IIDR: + if (val != vgic_mmio_read_v2_misc(vcpu, addr, len)) + return -EINVAL; + + /* + * If we observe a write to GICD_IIDR we know that userspace + * has been updated and has had a chance to cope with older + * kernels (VGICv2 IIDR.Revision == 0) incorrectly reporting + * interrupts as group 1, and therefore we now allow groups to + * be user writable. Doing this by default would break + * migration from old kernels to new kernels with legacy + * userspace. + */ + vcpu->kvm->arch.vgic.v2_groups_user_writable = true; + return 0; + } + + vgic_mmio_write_v2_misc(vcpu, addr, len, val); + return 0; +} + +static int vgic_mmio_uaccess_write_v2_group(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + if (vcpu->kvm->arch.vgic.v2_groups_user_writable) + vgic_mmio_write_group(vcpu, addr, len, val); + + return 0; +} + +static void vgic_mmio_write_sgir(struct kvm_vcpu *source_vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + int nr_vcpus = atomic_read(&source_vcpu->kvm->online_vcpus); + int intid = val & 0xf; + int targets = (val >> 16) & 0xff; + int mode = (val >> 24) & 0x03; + int c; + struct kvm_vcpu *vcpu; + unsigned long flags; + + switch (mode) { + case 0x0: /* as specified by targets */ + break; + case 0x1: + targets = (1U << nr_vcpus) - 1; /* all, ... */ + targets &= ~(1U << source_vcpu->vcpu_id); /* but self */ + break; + case 0x2: /* this very vCPU only */ + targets = (1U << source_vcpu->vcpu_id); + break; + case 0x3: /* reserved */ + return; + } + + kvm_for_each_vcpu(c, vcpu, source_vcpu->kvm) { + struct vgic_irq *irq; + + if (!(targets & (1U << c))) + continue; + + irq = vgic_get_irq(source_vcpu->kvm, vcpu, intid); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + irq->source |= 1U << source_vcpu->vcpu_id; + + vgic_queue_irq_unlock(source_vcpu->kvm, irq, flags); + vgic_put_irq(source_vcpu->kvm, irq); + } +} + +static unsigned long vgic_mmio_read_target(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + int i; + u64 val = 0; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + val |= (u64)irq->targets << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); + } + + return val; +} + +static void vgic_mmio_write_target(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + u8 cpu_mask = GENMASK(atomic_read(&vcpu->kvm->online_vcpus) - 1, 0); + int i; + unsigned long flags; + + /* GICD_ITARGETSR[0-7] are read-only */ + if (intid < VGIC_NR_PRIVATE_IRQS) + return; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid + i); + int target; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + irq->targets = (val >> (i * 8)) & cpu_mask; + target = irq->targets ? __ffs(irq->targets) : 0; + irq->target_vcpu = kvm_get_vcpu(vcpu->kvm, target); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +static unsigned long vgic_mmio_read_sgipend(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = addr & 0x0f; + int i; + u64 val = 0; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + val |= (u64)irq->source << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); + } + return val; +} + +static void vgic_mmio_write_sgipendc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = addr & 0x0f; + int i; + unsigned long flags; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + irq->source &= ~((val >> (i * 8)) & 0xff); + if (!irq->source) + irq->pending_latch = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +static void vgic_mmio_write_sgipends(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = addr & 0x0f; + int i; + unsigned long flags; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + irq->source |= (val >> (i * 8)) & 0xff; + + if (irq->source) { + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + vgic_put_irq(vcpu->kvm, irq); + } +} + +#define GICC_ARCH_VERSION_V2 0x2 + +/* These are for userland accesses only, there is no guest-facing emulation. */ +static unsigned long vgic_mmio_read_vcpuif(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_vmcr vmcr; + u32 val; + + vgic_get_vmcr(vcpu, &vmcr); + + switch (addr & 0xff) { + case GIC_CPU_CTRL: + val = vmcr.grpen0 << GIC_CPU_CTRL_EnableGrp0_SHIFT; + val |= vmcr.grpen1 << GIC_CPU_CTRL_EnableGrp1_SHIFT; + val |= vmcr.ackctl << GIC_CPU_CTRL_AckCtl_SHIFT; + val |= vmcr.fiqen << GIC_CPU_CTRL_FIQEn_SHIFT; + val |= vmcr.cbpr << GIC_CPU_CTRL_CBPR_SHIFT; + val |= vmcr.eoim << GIC_CPU_CTRL_EOImodeNS_SHIFT; + + break; + case GIC_CPU_PRIMASK: + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * the PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + val = (vmcr.pmr & GICV_PMR_PRIORITY_MASK) >> + GICV_PMR_PRIORITY_SHIFT; + break; + case GIC_CPU_BINPOINT: + val = vmcr.bpr; + break; + case GIC_CPU_ALIAS_BINPOINT: + val = vmcr.abpr; + break; + case GIC_CPU_IDENT: + val = ((PRODUCT_ID_KVM << 20) | + (GICC_ARCH_VERSION_V2 << 16) | + IMPLEMENTER_ARM); + break; + default: + return 0; + } + + return val; +} + +static void vgic_mmio_write_vcpuif(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_vmcr vmcr; + + vgic_get_vmcr(vcpu, &vmcr); + + switch (addr & 0xff) { + case GIC_CPU_CTRL: + vmcr.grpen0 = !!(val & GIC_CPU_CTRL_EnableGrp0); + vmcr.grpen1 = !!(val & GIC_CPU_CTRL_EnableGrp1); + vmcr.ackctl = !!(val & GIC_CPU_CTRL_AckCtl); + vmcr.fiqen = !!(val & GIC_CPU_CTRL_FIQEn); + vmcr.cbpr = !!(val & GIC_CPU_CTRL_CBPR); + vmcr.eoim = !!(val & GIC_CPU_CTRL_EOImodeNS); + + break; + case GIC_CPU_PRIMASK: + /* + * Our KVM_DEV_TYPE_ARM_VGIC_V2 device ABI exports the + * the PMR field as GICH_VMCR.VMPriMask rather than + * GICC_PMR.Priority, so we expose the upper five bits of + * priority mask to userspace using the lower bits in the + * unsigned long. + */ + vmcr.pmr = (val << GICV_PMR_PRIORITY_SHIFT) & + GICV_PMR_PRIORITY_MASK; + break; + case GIC_CPU_BINPOINT: + vmcr.bpr = val; + break; + case GIC_CPU_ALIAS_BINPOINT: + vmcr.abpr = val; + break; + } + + vgic_set_vmcr(vcpu, &vmcr); +} + +static unsigned long vgic_mmio_read_apr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + int n; /* which APRn is this */ + + n = (addr >> 2) & 0x3; + + if (kvm_vgic_global_state.type == VGIC_V2) { + /* GICv2 hardware systems support max. 32 groups */ + if (n != 0) + return 0; + return vcpu->arch.vgic_cpu.vgic_v2.vgic_apr; + } else { + struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; + + if (n > vgic_v3_max_apr_idx(vcpu)) + return 0; + + n = array_index_nospec(n, 4); + + /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ + return vgicv3->vgic_ap1r[n]; + } +} + +static void vgic_mmio_write_apr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + int n; /* which APRn is this */ + + n = (addr >> 2) & 0x3; + + if (kvm_vgic_global_state.type == VGIC_V2) { + /* GICv2 hardware systems support max. 32 groups */ + if (n != 0) + return; + vcpu->arch.vgic_cpu.vgic_v2.vgic_apr = val; + } else { + struct vgic_v3_cpu_if *vgicv3 = &vcpu->arch.vgic_cpu.vgic_v3; + + if (n > vgic_v3_max_apr_idx(vcpu)) + return; + + n = array_index_nospec(n, 4); + + /* GICv3 only uses ICH_AP1Rn for memory mapped (GICv2) guests */ + vgicv3->vgic_ap1r[n] = val; + } +} + +static const struct vgic_register_region vgic_v2_dist_registers[] = { + REGISTER_DESC_WITH_LENGTH_UACCESS(GIC_DIST_CTRL, + vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, + NULL, vgic_mmio_uaccess_write_v2_misc, + 12, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, + vgic_mmio_read_group, vgic_mmio_write_group, + NULL, vgic_mmio_uaccess_write_v2_group, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, + vgic_mmio_read_pending, vgic_mmio_write_spending, + NULL, vgic_uaccess_write_spending, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + NULL, vgic_uaccess_write_cpending, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, + vgic_mmio_read_active, vgic_mmio_write_sactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, + vgic_mmio_read_active, vgic_mmio_write_cactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, + vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, + 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET, + vgic_mmio_read_target, vgic_mmio_write_target, NULL, NULL, 8, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG, + vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT, + vgic_mmio_read_raz, vgic_mmio_write_sgir, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_CLEAR, + vgic_mmio_read_sgipend, vgic_mmio_write_sgipendc, 16, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_LENGTH(GIC_DIST_SGI_PENDING_SET, + vgic_mmio_read_sgipend, vgic_mmio_write_sgipends, 16, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), +}; + +static const struct vgic_register_region vgic_v2_cpu_registers[] = { + REGISTER_DESC_WITH_LENGTH(GIC_CPU_CTRL, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_PRIMASK, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_BINPOINT, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_ALIAS_BINPOINT, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_ACTIVEPRIO, + vgic_mmio_read_apr, vgic_mmio_write_apr, 16, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GIC_CPU_IDENT, + vgic_mmio_read_vcpuif, vgic_mmio_write_vcpuif, 4, + VGIC_ACCESS_32bit), +}; + +unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev) +{ + dev->regions = vgic_v2_dist_registers; + dev->nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); + + kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); + + return SZ_4K; +} + +int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + const struct vgic_register_region *region; + struct vgic_io_device iodev; + struct vgic_reg_attr reg_attr; + struct kvm_vcpu *vcpu; + gpa_t addr; + int ret; + + ret = vgic_v2_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + iodev.regions = vgic_v2_dist_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v2_dist_registers); + iodev.base_addr = 0; + break; + case KVM_DEV_ARM_VGIC_GRP_CPU_REGS: + iodev.regions = vgic_v2_cpu_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers); + iodev.base_addr = 0; + break; + default: + return -ENXIO; + } + + /* We only support aligned 32-bit accesses. */ + if (addr & 3) + return -ENXIO; + + region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32)); + if (!region) + return -ENXIO; + + return 0; +} + +int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device dev = { + .regions = vgic_v2_cpu_registers, + .nr_regions = ARRAY_SIZE(vgic_v2_cpu_registers), + .iodev_type = IODEV_CPUIF, + }; + + return vgic_uaccess(vcpu, &dev, is_write, offset, val); +} + +int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device dev = { + .regions = vgic_v2_dist_registers, + .nr_regions = ARRAY_SIZE(vgic_v2_dist_registers), + .iodev_type = IODEV_DIST, + }; + + return vgic_uaccess(vcpu, &dev, is_write, offset, val); +} diff --git a/arch/arm64/kvm/vgic/vgic-mmio-v3.c b/arch/arm64/kvm/vgic/vgic-mmio-v3.c new file mode 100644 index 000000000000..d2339a2b9fb9 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio-v3.c @@ -0,0 +1,1063 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGICv3 MMIO handling functions + */ + +#include <linux/bitfield.h> +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/interrupt.h> +#include <kvm/iodev.h> +#include <kvm/arm_vgic.h> + +#include <asm/kvm_emulate.h> +#include <asm/kvm_arm.h> +#include <asm/kvm_mmu.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +/* extract @num bytes at @offset bytes offset in data */ +unsigned long extract_bytes(u64 data, unsigned int offset, + unsigned int num) +{ + return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0); +} + +/* allows updates of any half of a 64-bit register (or the whole thing) */ +u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, + unsigned long val) +{ + int lower = (offset & 4) * 8; + int upper = lower + 8 * len - 1; + + reg &= ~GENMASK_ULL(upper, lower); + val &= GENMASK_ULL(len * 8 - 1, 0); + + return reg | ((u64)val << lower); +} + +bool vgic_has_its(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + if (dist->vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3) + return false; + + return dist->has_its; +} + +bool vgic_supports_direct_msis(struct kvm *kvm) +{ + return (kvm_vgic_global_state.has_gicv4_1 || + (kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm))); +} + +/* + * The Revision field in the IIDR have the following meanings: + * + * Revision 2: Interrupt groups are guest-configurable and signaled using + * their configured groups. + */ + +static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_dist *vgic = &vcpu->kvm->arch.vgic; + u32 value = 0; + + switch (addr & 0x0c) { + case GICD_CTLR: + if (vgic->enabled) + value |= GICD_CTLR_ENABLE_SS_G1; + value |= GICD_CTLR_ARE_NS | GICD_CTLR_DS; + if (vgic->nassgireq) + value |= GICD_CTLR_nASSGIreq; + break; + case GICD_TYPER: + value = vgic->nr_spis + VGIC_NR_PRIVATE_IRQS; + value = (value >> 5) - 1; + if (vgic_has_its(vcpu->kvm)) { + value |= (INTERRUPT_ID_BITS_ITS - 1) << 19; + value |= GICD_TYPER_LPIS; + } else { + value |= (INTERRUPT_ID_BITS_SPIS - 1) << 19; + } + break; + case GICD_TYPER2: + if (kvm_vgic_global_state.has_gicv4_1) + value = GICD_TYPER2_nASSGIcap; + break; + case GICD_IIDR: + value = (PRODUCT_ID_KVM << GICD_IIDR_PRODUCT_ID_SHIFT) | + (vgic->implementation_rev << GICD_IIDR_REVISION_SHIFT) | + (IMPLEMENTER_ARM << GICD_IIDR_IMPLEMENTER_SHIFT); + break; + default: + return 0; + } + + return value; +} + +static void vgic_mmio_write_v3_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + switch (addr & 0x0c) { + case GICD_CTLR: { + bool was_enabled, is_hwsgi; + + mutex_lock(&vcpu->kvm->lock); + + was_enabled = dist->enabled; + is_hwsgi = dist->nassgireq; + + dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; + + /* Not a GICv4.1? No HW SGIs */ + if (!kvm_vgic_global_state.has_gicv4_1) + val &= ~GICD_CTLR_nASSGIreq; + + /* Dist stays enabled? nASSGIreq is RO */ + if (was_enabled && dist->enabled) { + val &= ~GICD_CTLR_nASSGIreq; + val |= FIELD_PREP(GICD_CTLR_nASSGIreq, is_hwsgi); + } + + /* Switching HW SGIs? */ + dist->nassgireq = val & GICD_CTLR_nASSGIreq; + if (is_hwsgi != dist->nassgireq) + vgic_v4_configure_vsgis(vcpu->kvm); + + if (kvm_vgic_global_state.has_gicv4_1 && + was_enabled != dist->enabled) + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_RELOAD_GICv4); + else if (!was_enabled && dist->enabled) + vgic_kick_vcpus(vcpu->kvm); + + mutex_unlock(&vcpu->kvm->lock); + break; + } + case GICD_TYPER: + case GICD_TYPER2: + case GICD_IIDR: + /* This is at best for documentation purposes... */ + return; + } +} + +static int vgic_mmio_uaccess_write_v3_misc(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + switch (addr & 0x0c) { + case GICD_TYPER2: + case GICD_IIDR: + if (val != vgic_mmio_read_v3_misc(vcpu, addr, len)) + return -EINVAL; + return 0; + case GICD_CTLR: + /* Not a GICv4.1? No HW SGIs */ + if (!kvm_vgic_global_state.has_gicv4_1) + val &= ~GICD_CTLR_nASSGIreq; + + dist->enabled = val & GICD_CTLR_ENABLE_SS_G1; + dist->nassgireq = val & GICD_CTLR_nASSGIreq; + return 0; + } + + vgic_mmio_write_v3_misc(vcpu, addr, len, val); + return 0; +} + +static unsigned long vgic_mmio_read_irouter(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + int intid = VGIC_ADDR_TO_INTID(addr, 64); + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, NULL, intid); + unsigned long ret = 0; + + if (!irq) + return 0; + + /* The upper word is RAZ for us. */ + if (!(addr & 4)) + ret = extract_bytes(READ_ONCE(irq->mpidr), addr & 7, len); + + vgic_put_irq(vcpu->kvm, irq); + return ret; +} + +static void vgic_mmio_write_irouter(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + int intid = VGIC_ADDR_TO_INTID(addr, 64); + struct vgic_irq *irq; + unsigned long flags; + + /* The upper word is WI for us since we don't implement Aff3. */ + if (addr & 4) + return; + + irq = vgic_get_irq(vcpu->kvm, NULL, intid); + + if (!irq) + return; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + /* We only care about and preserve Aff0, Aff1 and Aff2. */ + irq->mpidr = val & GENMASK(23, 0); + irq->target_vcpu = kvm_mpidr_to_vcpu(vcpu->kvm, irq->mpidr); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); +} + +static unsigned long vgic_mmio_read_v3r_ctlr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + return vgic_cpu->lpis_enabled ? GICR_CTLR_ENABLE_LPIS : 0; +} + + +static void vgic_mmio_write_v3r_ctlr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + bool was_enabled = vgic_cpu->lpis_enabled; + + if (!vgic_has_its(vcpu->kvm)) + return; + + vgic_cpu->lpis_enabled = val & GICR_CTLR_ENABLE_LPIS; + + if (was_enabled && !vgic_cpu->lpis_enabled) { + vgic_flush_pending_lpis(vcpu); + vgic_its_invalidate_cache(vcpu->kvm); + } + + if (!was_enabled && vgic_cpu->lpis_enabled) + vgic_enable_lpis(vcpu); +} + +static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + unsigned long mpidr = kvm_vcpu_get_mpidr_aff(vcpu); + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_redist_region *rdreg = vgic_cpu->rdreg; + int target_vcpu_id = vcpu->vcpu_id; + gpa_t last_rdist_typer = rdreg->base + GICR_TYPER + + (rdreg->free_index - 1) * KVM_VGIC_V3_REDIST_SIZE; + u64 value; + + value = (u64)(mpidr & GENMASK(23, 0)) << 32; + value |= ((target_vcpu_id & 0xffff) << 8); + + if (addr == last_rdist_typer) + value |= GICR_TYPER_LAST; + if (vgic_has_its(vcpu->kvm)) + value |= GICR_TYPER_PLPIS; + + return extract_bytes(value, addr & 7, len); +} + +static unsigned long vgic_mmio_read_v3r_iidr(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return (PRODUCT_ID_KVM << 24) | (IMPLEMENTER_ARM << 0); +} + +static unsigned long vgic_mmio_read_v3_idregs(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + switch (addr & 0xffff) { + case GICD_PIDR2: + /* report a GICv3 compliant implementation */ + return 0x3b; + } + + return 0; +} + +static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* + * pending state of interrupt is latched in pending_latch variable. + * Userspace will save and restore pending state and line_level + * separately. + * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst + * for handling of ISPENDR and ICPENDR. + */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + bool state = irq->pending_latch; + + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + int err; + + err = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &state); + WARN_ON(err); + } + + if (state) + value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (test_bit(i, &val)) { + /* + * pending_latch is set irrespective of irq type + * (level or edge) to avoid dependency that VM should + * restore irq config before pending info. + */ + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + irq->pending_latch = false; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +/* We want to avoid outer shareable. */ +u64 vgic_sanitise_shareability(u64 field) +{ + switch (field) { + case GIC_BASER_OuterShareable: + return GIC_BASER_InnerShareable; + default: + return field; + } +} + +/* Avoid any inner non-cacheable mapping. */ +u64 vgic_sanitise_inner_cacheability(u64 field) +{ + switch (field) { + case GIC_BASER_CACHE_nCnB: + case GIC_BASER_CACHE_nC: + return GIC_BASER_CACHE_RaWb; + default: + return field; + } +} + +/* Non-cacheable or same-as-inner are OK. */ +u64 vgic_sanitise_outer_cacheability(u64 field) +{ + switch (field) { + case GIC_BASER_CACHE_SameAsInner: + case GIC_BASER_CACHE_nC: + return field; + default: + return GIC_BASER_CACHE_nC; + } +} + +u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, + u64 (*sanitise_fn)(u64)) +{ + u64 field = (reg & field_mask) >> field_shift; + + field = sanitise_fn(field) << field_shift; + return (reg & ~field_mask) | field; +} + +#define PROPBASER_RES0_MASK \ + (GENMASK_ULL(63, 59) | GENMASK_ULL(55, 52) | GENMASK_ULL(6, 5)) +#define PENDBASER_RES0_MASK \ + (BIT_ULL(63) | GENMASK_ULL(61, 59) | GENMASK_ULL(55, 52) | \ + GENMASK_ULL(15, 12) | GENMASK_ULL(6, 0)) + +static u64 vgic_sanitise_pendbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GICR_PENDBASER_SHAREABILITY_MASK, + GICR_PENDBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GICR_PENDBASER_INNER_CACHEABILITY_MASK, + GICR_PENDBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GICR_PENDBASER_OUTER_CACHEABILITY_MASK, + GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + reg &= ~PENDBASER_RES0_MASK; + + return reg; +} + +static u64 vgic_sanitise_propbaser(u64 reg) +{ + reg = vgic_sanitise_field(reg, GICR_PROPBASER_SHAREABILITY_MASK, + GICR_PROPBASER_SHAREABILITY_SHIFT, + vgic_sanitise_shareability); + reg = vgic_sanitise_field(reg, GICR_PROPBASER_INNER_CACHEABILITY_MASK, + GICR_PROPBASER_INNER_CACHEABILITY_SHIFT, + vgic_sanitise_inner_cacheability); + reg = vgic_sanitise_field(reg, GICR_PROPBASER_OUTER_CACHEABILITY_MASK, + GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT, + vgic_sanitise_outer_cacheability); + + reg &= ~PROPBASER_RES0_MASK; + return reg; +} + +static unsigned long vgic_mmio_read_propbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + + return extract_bytes(dist->propbaser, addr & 7, len); +} + +static void vgic_mmio_write_propbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 old_propbaser, propbaser; + + /* Storing a value with LPIs already enabled is undefined */ + if (vgic_cpu->lpis_enabled) + return; + + do { + old_propbaser = READ_ONCE(dist->propbaser); + propbaser = old_propbaser; + propbaser = update_64bit_reg(propbaser, addr & 4, len, val); + propbaser = vgic_sanitise_propbaser(propbaser); + } while (cmpxchg64(&dist->propbaser, old_propbaser, + propbaser) != old_propbaser); +} + +static unsigned long vgic_mmio_read_pendbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 value = vgic_cpu->pendbaser; + + value &= ~GICR_PENDBASER_PTZ; + + return extract_bytes(value, addr & 7, len); +} + +static void vgic_mmio_write_pendbase(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 old_pendbaser, pendbaser; + + /* Storing a value with LPIs already enabled is undefined */ + if (vgic_cpu->lpis_enabled) + return; + + do { + old_pendbaser = READ_ONCE(vgic_cpu->pendbaser); + pendbaser = old_pendbaser; + pendbaser = update_64bit_reg(pendbaser, addr & 4, len, val); + pendbaser = vgic_sanitise_pendbaser(pendbaser); + } while (cmpxchg64(&vgic_cpu->pendbaser, old_pendbaser, + pendbaser) != old_pendbaser); +} + +/* + * The GICv3 per-IRQ registers are split to control PPIs and SGIs in the + * redistributors, while SPIs are covered by registers in the distributor + * block. Trying to set private IRQs in this block gets ignored. + * We take some special care here to fix the calculation of the register + * offset. + */ +#define REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(off, rd, wr, ur, uw, bpi, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = bpi, \ + .len = (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ + .access_flags = acc, \ + .read = vgic_mmio_read_raz, \ + .write = vgic_mmio_write_wi, \ + }, { \ + .reg_offset = off + (bpi * VGIC_NR_PRIVATE_IRQS) / 8, \ + .bits_per_irq = bpi, \ + .len = (bpi * (1024 - VGIC_NR_PRIVATE_IRQS)) / 8, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + .uaccess_read = ur, \ + .uaccess_write = uw, \ + } + +static const struct vgic_register_region vgic_v3_dist_registers[] = { + REGISTER_DESC_WITH_LENGTH_UACCESS(GICD_CTLR, + vgic_mmio_read_v3_misc, vgic_mmio_write_v3_misc, + NULL, vgic_mmio_uaccess_write_v3_misc, + 16, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICD_STATUSR, + vgic_mmio_read_rao, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGROUPR, + vgic_mmio_read_group, vgic_mmio_write_group, NULL, NULL, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISENABLER, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICENABLER, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR, + vgic_mmio_read_pending, vgic_mmio_write_spending, + vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, + vgic_mmio_read_active, vgic_mmio_write_sactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, + vgic_mmio_read_active, vgic_mmio_write_cactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, + 1, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, + vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, + 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ITARGETSR, + vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 8, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICFGR, + vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IGRPMODR, + vgic_mmio_read_raz, vgic_mmio_write_wi, NULL, NULL, 1, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IROUTER, + vgic_mmio_read_irouter, vgic_mmio_write_irouter, NULL, NULL, 64, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICD_IDREGS, + vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, + VGIC_ACCESS_32bit), +}; + +static const struct vgic_register_region vgic_v3_rd_registers[] = { + /* RD_base registers */ + REGISTER_DESC_WITH_LENGTH(GICR_CTLR, + vgic_mmio_read_v3r_ctlr, vgic_mmio_write_v3r_ctlr, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_STATUSR, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_IIDR, + vgic_mmio_read_v3r_iidr, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_TYPER, + vgic_mmio_read_v3r_typer, vgic_mmio_write_wi, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_WAKER, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_PROPBASER, + vgic_mmio_read_propbase, vgic_mmio_write_propbase, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_PENDBASER, + vgic_mmio_read_pendbase, vgic_mmio_write_pendbase, 8, + VGIC_ACCESS_64bit | VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(GICR_IDREGS, + vgic_mmio_read_v3_idregs, vgic_mmio_write_wi, 48, + VGIC_ACCESS_32bit), + /* SGI_base registers */ + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGROUPR0, + vgic_mmio_read_group, vgic_mmio_write_group, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISENABLER0, + vgic_mmio_read_enable, vgic_mmio_write_senable, + NULL, vgic_uaccess_write_senable, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICENABLER0, + vgic_mmio_read_enable, vgic_mmio_write_cenable, + NULL, vgic_uaccess_write_cenable, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0, + vgic_mmio_read_pending, vgic_mmio_write_spending, + vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0, + vgic_mmio_read_pending, vgic_mmio_write_cpending, + vgic_mmio_read_raz, vgic_mmio_uaccess_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_sactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_sactive, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_cactive, + vgic_uaccess_read_active, vgic_mmio_uaccess_write_cactive, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IPRIORITYR0, + vgic_mmio_read_priority, vgic_mmio_write_priority, 32, + VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_ICFGR0, + vgic_mmio_read_config, vgic_mmio_write_config, 8, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_IGRPMODR0, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH(SZ_64K + GICR_NSACR, + vgic_mmio_read_raz, vgic_mmio_write_wi, 4, + VGIC_ACCESS_32bit), +}; + +unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev) +{ + dev->regions = vgic_v3_dist_registers; + dev->nr_regions = ARRAY_SIZE(vgic_v3_dist_registers); + + kvm_iodevice_init(&dev->dev, &kvm_io_gic_ops); + + return SZ_64K; +} + +/** + * vgic_register_redist_iodev - register a single redist iodev + * @vcpu: The VCPU to which the redistributor belongs + * + * Register a KVM iodev for this VCPU's redistributor using the address + * provided. + * + * Return 0 on success, -ERRNO otherwise. + */ +int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct vgic_dist *vgic = &kvm->arch.vgic; + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; + struct vgic_redist_region *rdreg; + gpa_t rd_base; + int ret; + + if (!IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) + return 0; + + /* + * We may be creating VCPUs before having set the base address for the + * redistributor region, in which case we will come back to this + * function for all VCPUs when the base address is set. Just return + * without doing any work for now. + */ + rdreg = vgic_v3_rdist_free_slot(&vgic->rd_regions); + if (!rdreg) + return 0; + + if (!vgic_v3_check_base(kvm)) + return -EINVAL; + + vgic_cpu->rdreg = rdreg; + + rd_base = rdreg->base + rdreg->free_index * KVM_VGIC_V3_REDIST_SIZE; + + kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); + rd_dev->base_addr = rd_base; + rd_dev->iodev_type = IODEV_REDIST; + rd_dev->regions = vgic_v3_rd_registers; + rd_dev->nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); + rd_dev->redist_vcpu = vcpu; + + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, rd_base, + 2 * SZ_64K, &rd_dev->dev); + mutex_unlock(&kvm->slots_lock); + + if (ret) + return ret; + + rdreg->free_index++; + return 0; +} + +static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) +{ + struct vgic_io_device *rd_dev = &vcpu->arch.vgic_cpu.rd_iodev; + + kvm_io_bus_unregister_dev(vcpu->kvm, KVM_MMIO_BUS, &rd_dev->dev); +} + +static int vgic_register_all_redist_iodevs(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int c, ret = 0; + + kvm_for_each_vcpu(c, vcpu, kvm) { + ret = vgic_register_redist_iodev(vcpu); + if (ret) + break; + } + + if (ret) { + /* The current c failed, so we start with the previous one. */ + mutex_lock(&kvm->slots_lock); + for (c--; c >= 0; c--) { + vcpu = kvm_get_vcpu(kvm, c); + vgic_unregister_redist_iodev(vcpu); + } + mutex_unlock(&kvm->slots_lock); + } + + return ret; +} + +/** + * vgic_v3_insert_redist_region - Insert a new redistributor region + * + * Performs various checks before inserting the rdist region in the list. + * Those tests depend on whether the size of the rdist region is known + * (ie. count != 0). The list is sorted by rdist region index. + * + * @kvm: kvm handle + * @index: redist region index + * @base: base of the new rdist region + * @count: number of redistributors the region is made of (0 in the old style + * single region, whose size is induced from the number of vcpus) + * + * Return 0 on success, < 0 otherwise + */ +static int vgic_v3_insert_redist_region(struct kvm *kvm, uint32_t index, + gpa_t base, uint32_t count) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + struct list_head *rd_regions = &d->rd_regions; + size_t size = count * KVM_VGIC_V3_REDIST_SIZE; + int ret; + + /* single rdist region already set ?*/ + if (!count && !list_empty(rd_regions)) + return -EINVAL; + + /* cross the end of memory ? */ + if (base + size < base) + return -EINVAL; + + if (list_empty(rd_regions)) { + if (index != 0) + return -EINVAL; + } else { + rdreg = list_last_entry(rd_regions, + struct vgic_redist_region, list); + if (index != rdreg->index + 1) + return -EINVAL; + + /* Cannot add an explicitly sized regions after legacy region */ + if (!rdreg->count) + return -EINVAL; + } + + /* + * For legacy single-region redistributor regions (!count), + * check that the redistributor region does not overlap with the + * distributor's address space. + */ + if (!count && !IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && + vgic_dist_overlap(kvm, base, size)) + return -EINVAL; + + /* collision with any other rdist region? */ + if (vgic_v3_rdist_overlap(kvm, base, size)) + return -EINVAL; + + rdreg = kzalloc(sizeof(*rdreg), GFP_KERNEL); + if (!rdreg) + return -ENOMEM; + + rdreg->base = VGIC_ADDR_UNDEF; + + ret = vgic_check_ioaddr(kvm, &rdreg->base, base, SZ_64K); + if (ret) + goto free; + + rdreg->base = base; + rdreg->count = count; + rdreg->free_index = 0; + rdreg->index = index; + + list_add_tail(&rdreg->list, rd_regions); + return 0; +free: + kfree(rdreg); + return ret; +} + +int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count) +{ + int ret; + + ret = vgic_v3_insert_redist_region(kvm, index, addr, count); + if (ret) + return ret; + + /* + * Register iodevs for each existing VCPU. Adding more VCPUs + * afterwards will register the iodevs when needed. + */ + ret = vgic_register_all_redist_iodevs(kvm); + if (ret) + return ret; + + return 0; +} + +int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr) +{ + const struct vgic_register_region *region; + struct vgic_io_device iodev; + struct vgic_reg_attr reg_attr; + struct kvm_vcpu *vcpu; + gpa_t addr; + int ret; + + ret = vgic_v3_parse_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + switch (attr->group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + iodev.regions = vgic_v3_dist_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v3_dist_registers); + iodev.base_addr = 0; + break; + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:{ + iodev.regions = vgic_v3_rd_registers; + iodev.nr_regions = ARRAY_SIZE(vgic_v3_rd_registers); + iodev.base_addr = 0; + break; + } + case KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS: { + u64 reg, id; + + id = (attr->attr & KVM_DEV_ARM_VGIC_SYSREG_INSTR_MASK); + return vgic_v3_has_cpu_sysregs_attr(vcpu, 0, id, ®); + } + default: + return -ENXIO; + } + + /* We only support aligned 32-bit accesses. */ + if (addr & 3) + return -ENXIO; + + region = vgic_get_mmio_region(vcpu, &iodev, addr, sizeof(u32)); + if (!region) + return -ENXIO; + + return 0; +} +/* + * Compare a given affinity (level 1-3 and a level 0 mask, from the SGI + * generation register ICC_SGI1R_EL1) with a given VCPU. + * If the VCPU's MPIDR matches, return the level0 affinity, otherwise + * return -1. + */ +static int match_mpidr(u64 sgi_aff, u16 sgi_cpu_mask, struct kvm_vcpu *vcpu) +{ + unsigned long affinity; + int level0; + + /* + * Split the current VCPU's MPIDR into affinity level 0 and the + * rest as this is what we have to compare against. + */ + affinity = kvm_vcpu_get_mpidr_aff(vcpu); + level0 = MPIDR_AFFINITY_LEVEL(affinity, 0); + affinity &= ~MPIDR_LEVEL_MASK; + + /* bail out if the upper three levels don't match */ + if (sgi_aff != affinity) + return -1; + + /* Is this VCPU's bit set in the mask ? */ + if (!(sgi_cpu_mask & BIT(level0))) + return -1; + + return level0; +} + +/* + * The ICC_SGI* registers encode the affinity differently from the MPIDR, + * so provide a wrapper to use the existing defines to isolate a certain + * affinity level. + */ +#define SGI_AFFINITY_LEVEL(reg, level) \ + ((((reg) & ICC_SGI1R_AFFINITY_## level ##_MASK) \ + >> ICC_SGI1R_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) + +/** + * vgic_v3_dispatch_sgi - handle SGI requests from VCPUs + * @vcpu: The VCPU requesting a SGI + * @reg: The value written into ICC_{ASGI1,SGI0,SGI1}R by that VCPU + * @allow_group1: Does the sysreg access allow generation of G1 SGIs + * + * With GICv3 (and ARE=1) CPUs trigger SGIs by writing to a system register. + * This will trap in sys_regs.c and call this function. + * This ICC_SGI1R_EL1 register contains the upper three affinity levels of the + * target processors as well as a bitmask of 16 Aff0 CPUs. + * If the interrupt routing mode bit is not set, we iterate over all VCPUs to + * check for matching ones. If this bit is set, we signal all, but not the + * calling VCPU. + */ +void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg, bool allow_group1) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_vcpu *c_vcpu; + u16 target_cpus; + u64 mpidr; + int sgi, c; + int vcpu_id = vcpu->vcpu_id; + bool broadcast; + unsigned long flags; + + sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; + broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); + target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; + mpidr = SGI_AFFINITY_LEVEL(reg, 3); + mpidr |= SGI_AFFINITY_LEVEL(reg, 2); + mpidr |= SGI_AFFINITY_LEVEL(reg, 1); + + /* + * We iterate over all VCPUs to find the MPIDRs matching the request. + * If we have handled one CPU, we clear its bit to detect early + * if we are already finished. This avoids iterating through all + * VCPUs when most of the times we just signal a single VCPU. + */ + kvm_for_each_vcpu(c, c_vcpu, kvm) { + struct vgic_irq *irq; + + /* Exit early if we have dealt with all requested CPUs */ + if (!broadcast && target_cpus == 0) + break; + + /* Don't signal the calling VCPU */ + if (broadcast && c == vcpu_id) + continue; + + if (!broadcast) { + int level0; + + level0 = match_mpidr(mpidr, target_cpus, c_vcpu); + if (level0 == -1) + continue; + + /* remove this matching VCPU from the mask */ + target_cpus &= ~BIT(level0); + } + + irq = vgic_get_irq(vcpu->kvm, c_vcpu, sgi); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + /* + * An access targetting Group0 SGIs can only generate + * those, while an access targetting Group1 SGIs can + * generate interrupts of either group. + */ + if (!irq->group || allow_group1) { + if (!irq->hw) { + irq->pending_latch = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } else { + /* HW SGI? Ask the GIC to inject it */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + true); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + } else { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } + + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device dev = { + .regions = vgic_v3_dist_registers, + .nr_regions = ARRAY_SIZE(vgic_v3_dist_registers), + }; + + return vgic_uaccess(vcpu, &dev, is_write, offset, val); +} + +int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val) +{ + struct vgic_io_device rd_dev = { + .regions = vgic_v3_rd_registers, + .nr_regions = ARRAY_SIZE(vgic_v3_rd_registers), + }; + + return vgic_uaccess(vcpu, &rd_dev, is_write, offset, val); +} + +int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u32 intid, u64 *val) +{ + if (intid % 32) + return -EINVAL; + + if (is_write) + vgic_write_irq_line_level_info(vcpu, intid, *val); + else + *val = vgic_read_irq_line_level_info(vcpu, intid); + + return 0; +} diff --git a/arch/arm64/kvm/vgic/vgic-mmio.c b/arch/arm64/kvm/vgic/vgic-mmio.c new file mode 100644 index 000000000000..b2d73fc0d1ef --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio.c @@ -0,0 +1,1088 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * VGIC MMIO handling functions + */ + +#include <linux/bitops.h> +#include <linux/bsearch.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <kvm/iodev.h> +#include <kvm/arm_arch_timer.h> +#include <kvm/arm_vgic.h> + +#include "vgic.h" +#include "vgic-mmio.h" + +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return 0; +} + +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return -1UL; +} + +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + /* Ignore */ +} + +int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + /* Ignore */ + return 0; +} + +unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->group) + value |= BIT(i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +static void vgic_update_vsgi(struct vgic_irq *irq) +{ + WARN_ON(its_prop_update_vsgi(irq->host_irq, irq->priority, irq->group)); +} + +void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->group = !!(val & BIT(i)); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + vgic_update_vsgi(irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + } else { + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + } + + vgic_put_irq(vcpu->kvm, irq); + } +} + +/* + * Read accesses to both GICD_ICENABLER and GICD_ISENABLER return the value + * of the enabled bit, so there is only one function for both here. + */ +unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->enabled) + value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + if (!irq->enabled) { + struct irq_data *data; + + irq->enabled = true; + data = &irq_to_desc(irq->host_irq)->irq_data; + while (irqd_irq_disabled(data)) + enable_irq(irq->host_irq); + } + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } else if (vgic_irq_is_mapped_level(irq)) { + bool was_high = irq->line_level; + + /* + * We need to update the state of the interrupt because + * the guest might have changed the state of the device + * while the interrupt was disabled at the VGIC level. + */ + irq->line_level = vgic_get_phys_line_level(irq); + /* + * Deactivate the physical interrupt so the GIC will let + * us know when it is asserted again. + */ + if (!irq->active && was_high && !irq->line_level) + vgic_irq_set_phys_active(irq, false); + } + irq->enabled = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } +} + +void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid) && irq->enabled) + disable_irq_nosync(irq->host_irq); + + irq->enabled = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->enabled = true; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->enabled = false; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + unsigned long flags; + bool val; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + int err; + + val = false; + err = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &val); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + } else { + val = irq_is_pending(irq); + } + + value |= ((u32)val << i); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq) +{ + return (vgic_irq_is_sgi(irq->intid) && + vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V2); +} + +void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* GICD_ISPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* HW SGI? Ask the GIC to inject it */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + true); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } + + irq->pending_latch = true; + if (irq->hw) + vgic_irq_set_phys_active(irq, true); + + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->pending_latch = true; + + /* + * GICv2 SGIs are terribly broken. We can't restore + * the source of the interrupt, so just pick the vcpu + * itself as the source... + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source |= BIT(vcpu->vcpu_id); + + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_cpending(struct kvm_vcpu *vcpu, struct vgic_irq *irq) +{ + irq->pending_latch = false; + + /* + * We don't want the guest to effectively mask the physical + * interrupt by doing a write to SPENDR followed by a write to + * CPENDR for HW interrupts, so we clear the active state on + * the physical side if the virtual interrupt is not active. + * This may lead to taking an additional interrupt on the + * host, but that should not be a problem as the worst that + * can happen is an additional vgic injection. We also clear + * the pending state to maintain proper semantics for edge HW + * interrupts. + */ + vgic_irq_set_phys_pending(irq, false); + if (!irq->active) + vgic_irq_set_phys_active(irq, false); +} + +void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* GICD_ICPENDR0 SGI bits are WI */ + if (is_vgic_v2_sgi(vcpu, irq)) { + vgic_put_irq(vcpu->kvm, irq); + continue; + } + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* HW SGI? Ask the GIC to clear its pending bit */ + int err; + err = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + false); + WARN_RATELIMIT(err, "IRQ %d", irq->host_irq); + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + continue; + } + + if (irq->hw) + vgic_hw_irq_cpending(vcpu, irq); + else + irq->pending_latch = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + unsigned long flags; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + /* + * More fun with GICv2 SGIs! If we're clearing one of them + * from userspace, which source vcpu to clear? Let's not + * even think of it, and blow the whole set. + */ + if (is_vgic_v2_sgi(vcpu, irq)) + irq->source = 0; + + irq->pending_latch = false; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } + + return 0; +} + +/* + * If we are fiddling with an IRQ's active state, we have to make sure the IRQ + * is not queued on some running VCPU's LRs, because then the change to the + * active state can be overwritten when the VCPU's state is synced coming back + * from the guest. + * + * For shared interrupts as well as GICv3 private interrupts, we have to + * stop all the VCPUs because interrupts can be migrated while we don't hold + * the IRQ locks and we don't want to be chasing moving targets. + * + * For GICv2 private interrupts we don't have to do anything because + * userspace accesses to the VGIC state already require all VCPUs to be + * stopped, and only the VCPU itself can modify its private interrupts + * active state, which guarantees that the VCPU is not running. + */ +static void vgic_access_active_prepare(struct kvm_vcpu *vcpu, u32 intid) +{ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || + intid >= VGIC_NR_PRIVATE_IRQS) + kvm_arm_halt_guest(vcpu->kvm); +} + +/* See vgic_access_active_prepare */ +static void vgic_access_active_finish(struct kvm_vcpu *vcpu, u32 intid) +{ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3 || + intid >= VGIC_NR_PRIVATE_IRQS) + kvm_arm_resume_guest(vcpu->kvm); +} + +static unsigned long __vgic_mmio_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 value = 0; + int i; + + /* Loop over all IRQs affected by this read */ + for (i = 0; i < len * 8; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* + * Even for HW interrupts, don't evaluate the HW state as + * all the guest is interested in is the virtual state. + */ + if (irq->active) + value |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + u32 val; + + mutex_lock(&vcpu->kvm->lock); + vgic_access_active_prepare(vcpu, intid); + + val = __vgic_mmio_read_active(vcpu, addr, len); + + vgic_access_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); + + return val; +} + +unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + return __vgic_mmio_read_active(vcpu, addr, len); +} + +/* Must be called with irq->irq_lock held */ +static void vgic_hw_irq_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool active, bool is_uaccess) +{ + if (is_uaccess) + return; + + irq->active = active; + vgic_irq_set_phys_active(irq, active); +} + +static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + bool active) +{ + unsigned long flags; + struct kvm_vcpu *requester_vcpu = kvm_get_running_vcpu(); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw && !vgic_irq_is_sgi(irq->intid)) { + vgic_hw_irq_change_active(vcpu, irq, active, !requester_vcpu); + } else if (irq->hw && vgic_irq_is_sgi(irq->intid)) { + /* + * GICv4.1 VSGI feature doesn't track an active state, + * so let's not kid ourselves, there is nothing we can + * do here. + */ + irq->active = false; + } else { + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u8 active_source; + + irq->active = active; + + /* + * The GICv2 architecture indicates that the source CPUID for + * an SGI should be provided during an EOI which implies that + * the active state is stored somewhere, but at the same time + * this state is not architecturally exposed anywhere and we + * have no way of knowing the right source. + * + * This may lead to a VCPU not being able to receive + * additional instances of a particular SGI after migration + * for a GICv2 VM on some GIC implementations. Oh well. + */ + active_source = (requester_vcpu) ? requester_vcpu->vcpu_id : 0; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2 && + active && vgic_irq_is_sgi(irq->intid)) + irq->active_source = active_source; + } + + if (irq->active) + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + else + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); +} + +static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + vgic_mmio_change_active(vcpu, irq, false); + vgic_put_irq(vcpu->kvm, irq); + } +} + +void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + + mutex_lock(&vcpu->kvm->lock); + vgic_access_active_prepare(vcpu, intid); + + __vgic_mmio_write_cactive(vcpu, addr, len, val); + + vgic_access_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); +} + +int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_cactive(vcpu, addr, len, val); + return 0; +} + +static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + + for_each_set_bit(i, &val, len * 8) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + vgic_mmio_change_active(vcpu, irq, true); + vgic_put_irq(vcpu->kvm, irq); + } +} + +void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + + mutex_lock(&vcpu->kvm->lock); + vgic_access_active_prepare(vcpu, intid); + + __vgic_mmio_write_sactive(vcpu, addr, len, val); + + vgic_access_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); +} + +int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_sactive(vcpu, addr, len, val); + return 0; +} + +unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + int i; + u64 val = 0; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + val |= (u64)irq->priority << (i * 8); + + vgic_put_irq(vcpu->kvm, irq); + } + + return val; +} + +/* + * We currently don't handle changing the priority of an interrupt that + * is already pending on a VCPU. If there is a need for this, we would + * need to make this VCPU exit and re-evaluate the priorities, potentially + * leading to this interrupt getting presented now to the guest (if it has + * been masked by the priority mask before). + */ +void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 8); + int i; + unsigned long flags; + + for (i = 0; i < len; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + /* Narrow the priority range to what we actually support */ + irq->priority = (val >> (i * 8)) & GENMASK(7, 8 - VGIC_PRI_BITS); + if (irq->hw && vgic_irq_is_sgi(irq->intid)) + vgic_update_vsgi(irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } +} + +unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 2); + u32 value = 0; + int i; + + for (i = 0; i < len * 4; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + if (irq->config == VGIC_CONFIG_EDGE) + value |= (2U << (i * 2)); + + vgic_put_irq(vcpu->kvm, irq); + } + + return value; +} + +void vgic_mmio_write_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 2); + int i; + unsigned long flags; + + for (i = 0; i < len * 4; i++) { + struct vgic_irq *irq; + + /* + * The configuration cannot be changed for SGIs in general, + * for PPIs this is IMPLEMENTATION DEFINED. The arch timer + * code relies on PPIs being level triggered, so we also + * make them read-only here. + */ + if (intid + i < VGIC_NR_PRIVATE_IRQS) + continue; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (test_bit(i * 2 + 1, &val)) + irq->config = VGIC_CONFIG_EDGE; + else + irq->config = VGIC_CONFIG_LEVEL; + + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid) +{ + int i; + u64 val = 0; + int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + + for (i = 0; i < 32; i++) { + struct vgic_irq *irq; + + if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) + continue; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + if (irq->config == VGIC_CONFIG_LEVEL && irq->line_level) + val |= (1U << i); + + vgic_put_irq(vcpu->kvm, irq); + } + + return val; +} + +void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, + const u64 val) +{ + int i; + int nr_irqs = vcpu->kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + unsigned long flags; + + for (i = 0; i < 32; i++) { + struct vgic_irq *irq; + bool new_level; + + if ((intid + i) < VGIC_NR_SGIS || (intid + i) >= nr_irqs) + continue; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); + + /* + * Line level is set irrespective of irq type + * (level or edge) to avoid dependency that VM should + * restore irq config before line level. + */ + new_level = !!(val & (1U << i)); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->line_level = new_level; + if (new_level) + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + else + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + vgic_put_irq(vcpu->kvm, irq); + } +} + +static int match_region(const void *key, const void *elt) +{ + const unsigned int offset = (unsigned long)key; + const struct vgic_register_region *region = elt; + + if (offset < region->reg_offset) + return -1; + + if (offset >= region->reg_offset + region->len) + return 1; + + return 0; +} + +const struct vgic_register_region * +vgic_find_mmio_region(const struct vgic_register_region *regions, + int nr_regions, unsigned int offset) +{ + return bsearch((void *)(uintptr_t)offset, regions, nr_regions, + sizeof(regions[0]), match_region); +} + +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_vmcr(vcpu, vmcr); + else + vgic_v3_set_vmcr(vcpu, vmcr); +} + +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_get_vmcr(vcpu, vmcr); + else + vgic_v3_get_vmcr(vcpu, vmcr); +} + +/* + * kvm_mmio_read_buf() returns a value in a format where it can be converted + * to a byte array and be directly observed as the guest wanted it to appear + * in memory if it had done the store itself, which is LE for the GIC, as the + * guest knows the GIC is always LE. + * + * We convert this value to the CPUs native format to deal with it as a data + * value. + */ +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len) +{ + unsigned long data = kvm_mmio_read_buf(val, len); + + switch (len) { + case 1: + return data; + case 2: + return le16_to_cpu(data); + case 4: + return le32_to_cpu(data); + default: + return le64_to_cpu(data); + } +} + +/* + * kvm_mmio_write_buf() expects a value in a format such that if converted to + * a byte array it is observed as the guest would see it if it could perform + * the load directly. Since the GIC is LE, and the guest knows this, the + * guest expects a value in little endian format. + * + * We convert the data value from the CPUs native format to LE so that the + * value is returned in the proper format. + */ +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, + unsigned long data) +{ + switch (len) { + case 1: + break; + case 2: + data = cpu_to_le16(data); + break; + case 4: + data = cpu_to_le32(data); + break; + default: + data = cpu_to_le64(data); + } + + kvm_mmio_write_buf(buf, len, data); +} + +static +struct vgic_io_device *kvm_to_vgic_iodev(const struct kvm_io_device *dev) +{ + return container_of(dev, struct vgic_io_device, dev); +} + +static bool check_region(const struct kvm *kvm, + const struct vgic_register_region *region, + gpa_t addr, int len) +{ + int flags, nr_irqs = kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS; + + switch (len) { + case sizeof(u8): + flags = VGIC_ACCESS_8bit; + break; + case sizeof(u32): + flags = VGIC_ACCESS_32bit; + break; + case sizeof(u64): + flags = VGIC_ACCESS_64bit; + break; + default: + return false; + } + + if ((region->access_flags & flags) && IS_ALIGNED(addr, len)) { + if (!region->bits_per_irq) + return true; + + /* Do we access a non-allocated IRQ? */ + return VGIC_ADDR_TO_INTID(addr, region->bits_per_irq) < nr_irqs; + } + + return false; +} + +const struct vgic_register_region * +vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, + gpa_t addr, int len) +{ + const struct vgic_register_region *region; + + region = vgic_find_mmio_region(iodev->regions, iodev->nr_regions, + addr - iodev->base_addr); + if (!region || !check_region(vcpu->kvm, region, addr, len)) + return NULL; + + return region; +} + +static int vgic_uaccess_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, u32 *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + struct kvm_vcpu *r_vcpu; + + region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32)); + if (!region) { + *val = 0; + return 0; + } + + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; + if (region->uaccess_read) + *val = region->uaccess_read(r_vcpu, addr, sizeof(u32)); + else + *val = region->read(r_vcpu, addr, sizeof(u32)); + + return 0; +} + +static int vgic_uaccess_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, const u32 *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + struct kvm_vcpu *r_vcpu; + + region = vgic_get_mmio_region(vcpu, iodev, addr, sizeof(u32)); + if (!region) + return 0; + + r_vcpu = iodev->redist_vcpu ? iodev->redist_vcpu : vcpu; + if (region->uaccess_write) + return region->uaccess_write(r_vcpu, addr, sizeof(u32), *val); + + region->write(r_vcpu, addr, sizeof(u32), *val); + return 0; +} + +/* + * Userland access to VGIC registers. + */ +int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, + bool is_write, int offset, u32 *val) +{ + if (is_write) + return vgic_uaccess_write(vcpu, &dev->dev, offset, val); + else + return vgic_uaccess_read(vcpu, &dev->dev, offset, val); +} + +static int dispatch_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, void *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + unsigned long data = 0; + + region = vgic_get_mmio_region(vcpu, iodev, addr, len); + if (!region) { + memset(val, 0, len); + return 0; + } + + switch (iodev->iodev_type) { + case IODEV_CPUIF: + data = region->read(vcpu, addr, len); + break; + case IODEV_DIST: + data = region->read(vcpu, addr, len); + break; + case IODEV_REDIST: + data = region->read(iodev->redist_vcpu, addr, len); + break; + case IODEV_ITS: + data = region->its_read(vcpu->kvm, iodev->its, addr, len); + break; + } + + vgic_data_host_to_mmio_bus(val, len, data); + return 0; +} + +static int dispatch_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *dev, + gpa_t addr, int len, const void *val) +{ + struct vgic_io_device *iodev = kvm_to_vgic_iodev(dev); + const struct vgic_register_region *region; + unsigned long data = vgic_data_mmio_bus_to_host(val, len); + + region = vgic_get_mmio_region(vcpu, iodev, addr, len); + if (!region) + return 0; + + switch (iodev->iodev_type) { + case IODEV_CPUIF: + region->write(vcpu, addr, len, data); + break; + case IODEV_DIST: + region->write(vcpu, addr, len, data); + break; + case IODEV_REDIST: + region->write(iodev->redist_vcpu, addr, len, data); + break; + case IODEV_ITS: + region->its_write(vcpu->kvm, iodev->its, addr, len, data); + break; + } + + return 0; +} + +struct kvm_io_device_ops kvm_io_gic_ops = { + .read = dispatch_mmio_read, + .write = dispatch_mmio_write, +}; + +int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, + enum vgic_type type) +{ + struct vgic_io_device *io_device = &kvm->arch.vgic.dist_iodev; + int ret = 0; + unsigned int len; + + switch (type) { + case VGIC_V2: + len = vgic_v2_init_dist_iodev(io_device); + break; + case VGIC_V3: + len = vgic_v3_init_dist_iodev(io_device); + break; + default: + BUG_ON(1); + } + + io_device->base_addr = dist_base_address; + io_device->iodev_type = IODEV_DIST; + io_device->redist_vcpu = NULL; + + mutex_lock(&kvm->slots_lock); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, dist_base_address, + len, &io_device->dev); + mutex_unlock(&kvm->slots_lock); + + return ret; +} diff --git a/arch/arm64/kvm/vgic/vgic-mmio.h b/arch/arm64/kvm/vgic/vgic-mmio.h new file mode 100644 index 000000000000..fefcca2b14dc --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-mmio.h @@ -0,0 +1,227 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ +#ifndef __KVM_ARM_VGIC_MMIO_H__ +#define __KVM_ARM_VGIC_MMIO_H__ + +struct vgic_register_region { + unsigned int reg_offset; + unsigned int len; + unsigned int bits_per_irq; + unsigned int access_flags; + union { + unsigned long (*read)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + unsigned long (*its_read)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len); + }; + union { + void (*write)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + void (*its_write)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val); + }; + unsigned long (*uaccess_read)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + union { + int (*uaccess_write)(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + int (*uaccess_its_write)(struct kvm *kvm, struct vgic_its *its, + gpa_t addr, unsigned int len, + unsigned long val); + }; +}; + +extern struct kvm_io_device_ops kvm_io_gic_ops; + +#define VGIC_ACCESS_8bit 1 +#define VGIC_ACCESS_32bit 2 +#define VGIC_ACCESS_64bit 4 + +/* + * Generate a mask that covers the number of bytes required to address + * up to 1024 interrupts, each represented by <bits> bits. This assumes + * that <bits> is a power of two. + */ +#define VGIC_ADDR_IRQ_MASK(bits) (((bits) * 1024 / 8) - 1) + +/* + * (addr & mask) gives us the _byte_ offset for the INT ID. + * We multiply this by 8 the get the _bit_ offset, then divide this by + * the number of bits to learn the actual INT ID. + * But instead of a division (which requires a "long long div" implementation), + * we shift by the binary logarithm of <bits>. + * This assumes that <bits> is a power of two. + */ +#define VGIC_ADDR_TO_INTID(addr, bits) (((addr) & VGIC_ADDR_IRQ_MASK(bits)) * \ + 8 >> ilog2(bits)) + +/* + * Some VGIC registers store per-IRQ information, with a different number + * of bits per IRQ. For those registers this macro is used. + * The _WITH_LENGTH version instantiates registers with a fixed length + * and is mutually exclusive with the _PER_IRQ version. + */ +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, ur, uw, bpi, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = bpi, \ + .len = bpi * 1024 / 8, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + .uaccess_read = ur, \ + .uaccess_write = uw, \ + } + +#define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = 0, \ + .len = length, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + } + +#define REGISTER_DESC_WITH_LENGTH_UACCESS(off, rd, wr, urd, uwr, length, acc) \ + { \ + .reg_offset = off, \ + .bits_per_irq = 0, \ + .len = length, \ + .access_flags = acc, \ + .read = rd, \ + .write = wr, \ + .uaccess_read = urd, \ + .uaccess_write = uwr, \ + } + +unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); + +void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, + unsigned long data); + +unsigned long extract_bytes(u64 data, unsigned int offset, + unsigned int num); + +u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_raz(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +unsigned long vgic_mmio_read_rao(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + +int vgic_mmio_uaccess_write_wi(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + +unsigned long vgic_mmio_read_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len); + +void vgic_mmio_write_group(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned int len, unsigned long val); + +unsigned long vgic_mmio_read_enable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_senable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_cenable(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_spending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess_write_cpending(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +unsigned long vgic_uaccess_read_active(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_priority(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +unsigned long vgic_mmio_read_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len); + +void vgic_mmio_write_config(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +int vgic_uaccess(struct kvm_vcpu *vcpu, struct vgic_io_device *dev, + bool is_write, int offset, u32 *val); + +u64 vgic_read_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid); + +void vgic_write_irq_line_level_info(struct kvm_vcpu *vcpu, u32 intid, + const u64 val); + +unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); + +unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); + +u64 vgic_sanitise_outer_cacheability(u64 reg); +u64 vgic_sanitise_inner_cacheability(u64 reg); +u64 vgic_sanitise_shareability(u64 reg); +u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, + u64 (*sanitise_fn)(u64)); + +/* Find the proper register handler entry given a certain address offset */ +const struct vgic_register_region * +vgic_find_mmio_region(const struct vgic_register_region *regions, + int nr_regions, unsigned int offset); + +#endif diff --git a/arch/arm64/kvm/vgic/vgic-v2.c b/arch/arm64/kvm/vgic/vgic-v2.c new file mode 100644 index 000000000000..ebf53a4e1296 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v2.c @@ -0,0 +1,504 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/irqchip/arm-gic.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_mmu.h> + +#include "vgic.h" + +static inline void vgic_v2_write_lr(int lr, u32 val) +{ + void __iomem *base = kvm_vgic_global_state.vctrl_base; + + writel_relaxed(val, base + GICH_LR0 + (lr * 4)); +} + +void vgic_v2_init_lrs(void) +{ + int i; + + for (i = 0; i < kvm_vgic_global_state.nr_lr; i++) + vgic_v2_write_lr(i, 0); +} + +void vgic_v2_set_underflow(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v2; + + cpuif->vgic_hcr |= GICH_HCR_UIE; +} + +static bool lr_signals_eoi_mi(u32 lr_val) +{ + return !(lr_val & GICH_LR_STATE) && (lr_val & GICH_LR_EOI) && + !(lr_val & GICH_LR_HW); +} + +/* + * transfer the content of the LRs back into the corresponding ap_list: + * - active bit is transferred as is + * - pending bit is + * - transferred as is in case of edge sensitive IRQs + * - set to the line-level (resample time) for level sensitive IRQs + */ +void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v2_cpu_if *cpuif = &vgic_cpu->vgic_v2; + int lr; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + + cpuif->vgic_hcr &= ~GICH_HCR_UIE; + + for (lr = 0; lr < vgic_cpu->vgic_v2.used_lrs; lr++) { + u32 val = cpuif->vgic_lr[lr]; + u32 cpuid, intid = val & GICH_LR_VIRTUALID; + struct vgic_irq *irq; + + /* Extract the source vCPU id from the LR */ + cpuid = val & GICH_LR_PHYSID_CPUID; + cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; + cpuid &= 7; + + /* Notify fds when the guest EOI'ed a level-triggered SPI */ + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + + raw_spin_lock(&irq->irq_lock); + + /* Always preserve the active bit */ + irq->active = !!(val & GICH_LR_ACTIVE_BIT); + + if (irq->active && vgic_irq_is_sgi(intid)) + irq->active_source = cpuid; + + /* Edge is the only case where we preserve the pending bit */ + if (irq->config == VGIC_CONFIG_EDGE && + (val & GICH_LR_PENDING_BIT)) { + irq->pending_latch = true; + + if (vgic_irq_is_sgi(intid)) + irq->source |= (1 << cpuid); + } + + /* + * Clear soft pending state when level irqs have been acked. + */ + if (irq->config == VGIC_CONFIG_LEVEL && !(val & GICH_LR_STATE)) + irq->pending_latch = false; + + /* + * Level-triggered mapped IRQs are special because we only + * observe rising edges as input to the VGIC. + * + * If the guest never acked the interrupt we have to sample + * the physical line and set the line level, because the + * device state could have changed or we simply need to + * process the still pending interrupt later. + * + * If this causes us to lower the level, we have to also clear + * the physical active state, since we will otherwise never be + * told when the interrupt becomes asserted again. + */ + if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) { + irq->line_level = vgic_get_phys_line_level(irq); + + if (!irq->line_level) + vgic_irq_set_phys_active(irq, false); + } + + raw_spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); + } + + cpuif->used_lrs = 0; +} + +/* + * Populates the particular LR with the state of a given IRQ: + * - for an edge sensitive IRQ the pending state is cleared in struct vgic_irq + * - for a level sensitive IRQ the pending state value is unchanged; + * it is dictated directly by the input level + * + * If @irq describes an SGI with multiple sources, we choose the + * lowest-numbered source VCPU and clear that bit in the source bitmap. + * + * The irq_lock must be held by the caller. + */ +void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) +{ + u32 val = irq->intid; + bool allow_pending = true; + + if (irq->active) { + val |= GICH_LR_ACTIVE_BIT; + if (vgic_irq_is_sgi(irq->intid)) + val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT; + if (vgic_irq_is_multi_sgi(irq)) { + allow_pending = false; + val |= GICH_LR_EOI; + } + } + + if (irq->group) + val |= GICH_LR_GROUP1; + + if (irq->hw) { + val |= GICH_LR_HW; + val |= irq->hwintid << GICH_LR_PHYSID_CPUID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= GICH_LR_EOI; + + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { + val |= GICH_LR_PENDING_BIT; + + if (irq->config == VGIC_CONFIG_EDGE) + irq->pending_latch = false; + + if (vgic_irq_is_sgi(irq->intid)) { + u32 src = ffs(irq->source); + + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; + irq->source &= ~(1 << (src - 1)); + if (irq->source) { + irq->pending_latch = true; + val |= GICH_LR_EOI; + } + } + } + + /* + * Level-triggered mapped IRQs are special because we only observe + * rising edges as input to the VGIC. We therefore lower the line + * level here, so that we can take new virtual IRQs. See + * vgic_v2_fold_lr_state for more info. + */ + if (vgic_irq_is_mapped_level(irq) && (val & GICH_LR_PENDING_BIT)) + irq->line_level = false; + + /* The GICv2 LR only holds five bits of priority. */ + val |= (irq->priority >> 3) << GICH_LR_PRIORITY_SHIFT; + + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = val; +} + +void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr) +{ + vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = 0; +} + +void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u32 vmcr; + + vmcr = (vmcrp->grpen0 << GICH_VMCR_ENABLE_GRP0_SHIFT) & + GICH_VMCR_ENABLE_GRP0_MASK; + vmcr |= (vmcrp->grpen1 << GICH_VMCR_ENABLE_GRP1_SHIFT) & + GICH_VMCR_ENABLE_GRP1_MASK; + vmcr |= (vmcrp->ackctl << GICH_VMCR_ACK_CTL_SHIFT) & + GICH_VMCR_ACK_CTL_MASK; + vmcr |= (vmcrp->fiqen << GICH_VMCR_FIQ_EN_SHIFT) & + GICH_VMCR_FIQ_EN_MASK; + vmcr |= (vmcrp->cbpr << GICH_VMCR_CBPR_SHIFT) & + GICH_VMCR_CBPR_MASK; + vmcr |= (vmcrp->eoim << GICH_VMCR_EOI_MODE_SHIFT) & + GICH_VMCR_EOI_MODE_MASK; + vmcr |= (vmcrp->abpr << GICH_VMCR_ALIAS_BINPOINT_SHIFT) & + GICH_VMCR_ALIAS_BINPOINT_MASK; + vmcr |= (vmcrp->bpr << GICH_VMCR_BINPOINT_SHIFT) & + GICH_VMCR_BINPOINT_MASK; + vmcr |= ((vmcrp->pmr >> GICV_PMR_PRIORITY_SHIFT) << + GICH_VMCR_PRIMASK_SHIFT) & GICH_VMCR_PRIMASK_MASK; + + cpu_if->vgic_vmcr = vmcr; +} + +void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u32 vmcr; + + vmcr = cpu_if->vgic_vmcr; + + vmcrp->grpen0 = (vmcr & GICH_VMCR_ENABLE_GRP0_MASK) >> + GICH_VMCR_ENABLE_GRP0_SHIFT; + vmcrp->grpen1 = (vmcr & GICH_VMCR_ENABLE_GRP1_MASK) >> + GICH_VMCR_ENABLE_GRP1_SHIFT; + vmcrp->ackctl = (vmcr & GICH_VMCR_ACK_CTL_MASK) >> + GICH_VMCR_ACK_CTL_SHIFT; + vmcrp->fiqen = (vmcr & GICH_VMCR_FIQ_EN_MASK) >> + GICH_VMCR_FIQ_EN_SHIFT; + vmcrp->cbpr = (vmcr & GICH_VMCR_CBPR_MASK) >> + GICH_VMCR_CBPR_SHIFT; + vmcrp->eoim = (vmcr & GICH_VMCR_EOI_MODE_MASK) >> + GICH_VMCR_EOI_MODE_SHIFT; + + vmcrp->abpr = (vmcr & GICH_VMCR_ALIAS_BINPOINT_MASK) >> + GICH_VMCR_ALIAS_BINPOINT_SHIFT; + vmcrp->bpr = (vmcr & GICH_VMCR_BINPOINT_MASK) >> + GICH_VMCR_BINPOINT_SHIFT; + vmcrp->pmr = ((vmcr & GICH_VMCR_PRIMASK_MASK) >> + GICH_VMCR_PRIMASK_SHIFT) << GICV_PMR_PRIORITY_SHIFT; +} + +void vgic_v2_enable(struct kvm_vcpu *vcpu) +{ + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + + /* Get the show on the road... */ + vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; +} + +/* check for overlapping regions and for regions crossing the end of memory */ +static bool vgic_v2_check_base(gpa_t dist_base, gpa_t cpu_base) +{ + if (dist_base + KVM_VGIC_V2_DIST_SIZE < dist_base) + return false; + if (cpu_base + KVM_VGIC_V2_CPU_SIZE < cpu_base) + return false; + + if (dist_base + KVM_VGIC_V2_DIST_SIZE <= cpu_base) + return true; + if (cpu_base + KVM_VGIC_V2_CPU_SIZE <= dist_base) + return true; + + return false; +} + +int vgic_v2_map_resources(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + int ret = 0; + + if (vgic_ready(kvm)) + goto out; + + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base) || + IS_VGIC_ADDR_UNDEF(dist->vgic_cpu_base)) { + kvm_err("Need to set vgic cpu and dist addresses first\n"); + ret = -ENXIO; + goto out; + } + + if (!vgic_v2_check_base(dist->vgic_dist_base, dist->vgic_cpu_base)) { + kvm_err("VGIC CPU and dist frames overlap\n"); + ret = -EINVAL; + goto out; + } + + /* + * Initialize the vgic if this hasn't already been done on demand by + * accessing the vgic state from userspace. + */ + ret = vgic_init(kvm); + if (ret) { + kvm_err("Unable to initialize VGIC dynamic data structures\n"); + goto out; + } + + ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V2); + if (ret) { + kvm_err("Unable to register VGIC MMIO regions\n"); + goto out; + } + + if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { + ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, + kvm_vgic_global_state.vcpu_base, + KVM_VGIC_V2_CPU_SIZE, true); + if (ret) { + kvm_err("Unable to remap VGIC CPU to VCPU\n"); + goto out; + } + } + + dist->ready = true; + +out: + return ret; +} + +DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap); + +/** + * vgic_v2_probe - probe for a VGICv2 compatible interrupt controller + * @info: pointer to the GIC description + * + * Returns 0 if the VGICv2 has been probed successfully, returns an error code + * otherwise + */ +int vgic_v2_probe(const struct gic_kvm_info *info) +{ + int ret; + u32 vtr; + + if (!info->vctrl.start) { + kvm_err("GICH not present in the firmware table\n"); + return -ENXIO; + } + + if (!PAGE_ALIGNED(info->vcpu.start) || + !PAGE_ALIGNED(resource_size(&info->vcpu))) { + kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n"); + + ret = create_hyp_io_mappings(info->vcpu.start, + resource_size(&info->vcpu), + &kvm_vgic_global_state.vcpu_base_va, + &kvm_vgic_global_state.vcpu_hyp_va); + if (ret) { + kvm_err("Cannot map GICV into hyp\n"); + goto out; + } + + static_branch_enable(&vgic_v2_cpuif_trap); + } + + ret = create_hyp_io_mappings(info->vctrl.start, + resource_size(&info->vctrl), + &kvm_vgic_global_state.vctrl_base, + &kvm_vgic_global_state.vctrl_hyp); + if (ret) { + kvm_err("Cannot map VCTRL into hyp\n"); + goto out; + } + + vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); + kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; + + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device\n"); + goto out; + } + + kvm_vgic_global_state.can_emulate_gicv2 = true; + kvm_vgic_global_state.vcpu_base = info->vcpu.start; + kvm_vgic_global_state.type = VGIC_V2; + kvm_vgic_global_state.max_gic_vcpus = VGIC_V2_MAX_CPUS; + + kvm_debug("vgic-v2@%llx\n", info->vctrl.start); + + return 0; +out: + if (kvm_vgic_global_state.vctrl_base) + iounmap(kvm_vgic_global_state.vctrl_base); + if (kvm_vgic_global_state.vcpu_base_va) + iounmap(kvm_vgic_global_state.vcpu_base_va); + + return ret; +} + +static void save_lrs(struct kvm_vcpu *vcpu, void __iomem *base) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + u64 used_lrs = cpu_if->used_lrs; + u64 elrsr; + int i; + + elrsr = readl_relaxed(base + GICH_ELRSR0); + if (unlikely(used_lrs > 32)) + elrsr |= ((u64)readl_relaxed(base + GICH_ELRSR1)) << 32; + + for (i = 0; i < used_lrs; i++) { + if (elrsr & (1UL << i)) + cpu_if->vgic_lr[i] &= ~GICH_LR_STATE; + else + cpu_if->vgic_lr[i] = readl_relaxed(base + GICH_LR0 + (i * 4)); + + writel_relaxed(0, base + GICH_LR0 + (i * 4)); + } +} + +void vgic_v2_save_state(struct kvm_vcpu *vcpu) +{ + void __iomem *base = kvm_vgic_global_state.vctrl_base; + u64 used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; + + if (!base) + return; + + if (used_lrs) { + save_lrs(vcpu, base); + writel_relaxed(0, base + GICH_HCR); + } +} + +void vgic_v2_restore_state(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + void __iomem *base = kvm_vgic_global_state.vctrl_base; + u64 used_lrs = cpu_if->used_lrs; + int i; + + if (!base) + return; + + if (used_lrs) { + writel_relaxed(cpu_if->vgic_hcr, base + GICH_HCR); + for (i = 0; i < used_lrs; i++) { + writel_relaxed(cpu_if->vgic_lr[i], + base + GICH_LR0 + (i * 4)); + } + } +} + +void vgic_v2_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + + writel_relaxed(cpu_if->vgic_vmcr, + kvm_vgic_global_state.vctrl_base + GICH_VMCR); + writel_relaxed(cpu_if->vgic_apr, + kvm_vgic_global_state.vctrl_base + GICH_APR); +} + +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + + cpu_if->vgic_vmcr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VMCR); +} + +void vgic_v2_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v2_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v2; + + vgic_v2_vmcr_sync(vcpu); + cpu_if->vgic_apr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_APR); +} diff --git a/arch/arm64/kvm/vgic/vgic-v3.c b/arch/arm64/kvm/vgic/vgic-v3.c new file mode 100644 index 000000000000..76e2d85789ed --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v3.c @@ -0,0 +1,693 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/irqchip/arm-gic-v3.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <kvm/arm_vgic.h> +#include <asm/kvm_hyp.h> +#include <asm/kvm_mmu.h> +#include <asm/kvm_asm.h> + +#include "vgic.h" + +static bool group0_trap; +static bool group1_trap; +static bool common_trap; +static bool gicv4_enable; + +void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; + + cpuif->vgic_hcr |= ICH_HCR_UIE; +} + +static bool lr_signals_eoi_mi(u64 lr_val) +{ + return !(lr_val & ICH_LR_STATE) && (lr_val & ICH_LR_EOI) && + !(lr_val & ICH_LR_HW); +} + +void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_v3_cpu_if *cpuif = &vgic_cpu->vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + int lr; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + + cpuif->vgic_hcr &= ~ICH_HCR_UIE; + + for (lr = 0; lr < cpuif->used_lrs; lr++) { + u64 val = cpuif->vgic_lr[lr]; + u32 intid, cpuid; + struct vgic_irq *irq; + bool is_v2_sgi = false; + + cpuid = val & GICH_LR_PHYSID_CPUID; + cpuid >>= GICH_LR_PHYSID_CPUID_SHIFT; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V3) { + intid = val & ICH_LR_VIRTUAL_ID_MASK; + } else { + intid = val & GICH_LR_VIRTUALID; + is_v2_sgi = vgic_irq_is_sgi(intid); + } + + /* Notify fds when the guest EOI'ed a level-triggered IRQ */ + if (lr_signals_eoi_mi(val) && vgic_valid_spi(vcpu->kvm, intid)) + kvm_notify_acked_irq(vcpu->kvm, 0, + intid - VGIC_NR_PRIVATE_IRQS); + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + if (!irq) /* An LPI could have been unmapped. */ + continue; + + raw_spin_lock(&irq->irq_lock); + + /* Always preserve the active bit */ + irq->active = !!(val & ICH_LR_ACTIVE_BIT); + + if (irq->active && is_v2_sgi) + irq->active_source = cpuid; + + /* Edge is the only case where we preserve the pending bit */ + if (irq->config == VGIC_CONFIG_EDGE && + (val & ICH_LR_PENDING_BIT)) { + irq->pending_latch = true; + + if (is_v2_sgi) + irq->source |= (1 << cpuid); + } + + /* + * Clear soft pending state when level irqs have been acked. + */ + if (irq->config == VGIC_CONFIG_LEVEL && !(val & ICH_LR_STATE)) + irq->pending_latch = false; + + /* + * Level-triggered mapped IRQs are special because we only + * observe rising edges as input to the VGIC. + * + * If the guest never acked the interrupt we have to sample + * the physical line and set the line level, because the + * device state could have changed or we simply need to + * process the still pending interrupt later. + * + * If this causes us to lower the level, we have to also clear + * the physical active state, since we will otherwise never be + * told when the interrupt becomes asserted again. + */ + if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) { + irq->line_level = vgic_get_phys_line_level(irq); + + if (!irq->line_level) + vgic_irq_set_phys_active(irq, false); + } + + raw_spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); + } + + cpuif->used_lrs = 0; +} + +/* Requires the irq to be locked already */ +void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) +{ + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u64 val = irq->intid; + bool allow_pending = true, is_v2_sgi; + + is_v2_sgi = (vgic_irq_is_sgi(irq->intid) && + model == KVM_DEV_TYPE_ARM_VGIC_V2); + + if (irq->active) { + val |= ICH_LR_ACTIVE_BIT; + if (is_v2_sgi) + val |= irq->active_source << GICH_LR_PHYSID_CPUID_SHIFT; + if (vgic_irq_is_multi_sgi(irq)) { + allow_pending = false; + val |= ICH_LR_EOI; + } + } + + if (irq->hw) { + val |= ICH_LR_HW; + val |= ((u64)irq->hwintid) << ICH_LR_PHYS_ID_SHIFT; + /* + * Never set pending+active on a HW interrupt, as the + * pending state is kept at the physical distributor + * level. + */ + if (irq->active) + allow_pending = false; + } else { + if (irq->config == VGIC_CONFIG_LEVEL) { + val |= ICH_LR_EOI; + + /* + * Software resampling doesn't work very well + * if we allow P+A, so let's not do that. + */ + if (irq->active) + allow_pending = false; + } + } + + if (allow_pending && irq_is_pending(irq)) { + val |= ICH_LR_PENDING_BIT; + + if (irq->config == VGIC_CONFIG_EDGE) + irq->pending_latch = false; + + if (vgic_irq_is_sgi(irq->intid) && + model == KVM_DEV_TYPE_ARM_VGIC_V2) { + u32 src = ffs(irq->source); + + if (WARN_RATELIMIT(!src, "No SGI source for INTID %d\n", + irq->intid)) + return; + + val |= (src - 1) << GICH_LR_PHYSID_CPUID_SHIFT; + irq->source &= ~(1 << (src - 1)); + if (irq->source) { + irq->pending_latch = true; + val |= ICH_LR_EOI; + } + } + } + + /* + * Level-triggered mapped IRQs are special because we only observe + * rising edges as input to the VGIC. We therefore lower the line + * level here, so that we can take new virtual IRQs. See + * vgic_v3_fold_lr_state for more info. + */ + if (vgic_irq_is_mapped_level(irq) && (val & ICH_LR_PENDING_BIT)) + irq->line_level = false; + + if (irq->group) + val |= ICH_LR_GROUP; + + val |= (u64)irq->priority << ICH_LR_PRIORITY_SHIFT; + + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = val; +} + +void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) +{ + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = 0; +} + +void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u32 vmcr; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { + vmcr = (vmcrp->ackctl << ICH_VMCR_ACK_CTL_SHIFT) & + ICH_VMCR_ACK_CTL_MASK; + vmcr |= (vmcrp->fiqen << ICH_VMCR_FIQ_EN_SHIFT) & + ICH_VMCR_FIQ_EN_MASK; + } else { + /* + * When emulating GICv3 on GICv3 with SRE=1 on the + * VFIQEn bit is RES1 and the VAckCtl bit is RES0. + */ + vmcr = ICH_VMCR_FIQ_EN_MASK; + } + + vmcr |= (vmcrp->cbpr << ICH_VMCR_CBPR_SHIFT) & ICH_VMCR_CBPR_MASK; + vmcr |= (vmcrp->eoim << ICH_VMCR_EOIM_SHIFT) & ICH_VMCR_EOIM_MASK; + vmcr |= (vmcrp->abpr << ICH_VMCR_BPR1_SHIFT) & ICH_VMCR_BPR1_MASK; + vmcr |= (vmcrp->bpr << ICH_VMCR_BPR0_SHIFT) & ICH_VMCR_BPR0_MASK; + vmcr |= (vmcrp->pmr << ICH_VMCR_PMR_SHIFT) & ICH_VMCR_PMR_MASK; + vmcr |= (vmcrp->grpen0 << ICH_VMCR_ENG0_SHIFT) & ICH_VMCR_ENG0_MASK; + vmcr |= (vmcrp->grpen1 << ICH_VMCR_ENG1_SHIFT) & ICH_VMCR_ENG1_MASK; + + cpu_if->vgic_vmcr = vmcr; +} + +void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcrp) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + u32 model = vcpu->kvm->arch.vgic.vgic_model; + u32 vmcr; + + vmcr = cpu_if->vgic_vmcr; + + if (model == KVM_DEV_TYPE_ARM_VGIC_V2) { + vmcrp->ackctl = (vmcr & ICH_VMCR_ACK_CTL_MASK) >> + ICH_VMCR_ACK_CTL_SHIFT; + vmcrp->fiqen = (vmcr & ICH_VMCR_FIQ_EN_MASK) >> + ICH_VMCR_FIQ_EN_SHIFT; + } else { + /* + * When emulating GICv3 on GICv3 with SRE=1 on the + * VFIQEn bit is RES1 and the VAckCtl bit is RES0. + */ + vmcrp->fiqen = 1; + vmcrp->ackctl = 0; + } + + vmcrp->cbpr = (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + vmcrp->eoim = (vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT; + vmcrp->abpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + vmcrp->bpr = (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; + vmcrp->pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + vmcrp->grpen0 = (vmcr & ICH_VMCR_ENG0_MASK) >> ICH_VMCR_ENG0_SHIFT; + vmcrp->grpen1 = (vmcr & ICH_VMCR_ENG1_MASK) >> ICH_VMCR_ENG1_SHIFT; +} + +#define INITIAL_PENDBASER_VALUE \ + (GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb) | \ + GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, SameAsInner) | \ + GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)) + +void vgic_v3_enable(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *vgic_v3 = &vcpu->arch.vgic_cpu.vgic_v3; + + /* + * By forcing VMCR to zero, the GIC will restore the binary + * points to their reset values. Anything else resets to zero + * anyway. + */ + vgic_v3->vgic_vmcr = 0; + + /* + * If we are emulating a GICv3, we do it in an non-GICv2-compatible + * way, so we force SRE to 1 to demonstrate this to the guest. + * Also, we don't support any form of IRQ/FIQ bypass. + * This goes with the spec allowing the value to be RAO/WI. + */ + if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + vgic_v3->vgic_sre = (ICC_SRE_EL1_DIB | + ICC_SRE_EL1_DFB | + ICC_SRE_EL1_SRE); + vcpu->arch.vgic_cpu.pendbaser = INITIAL_PENDBASER_VALUE; + } else { + vgic_v3->vgic_sre = 0; + } + + vcpu->arch.vgic_cpu.num_id_bits = (kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_ID_BITS_MASK) >> + ICH_VTR_ID_BITS_SHIFT; + vcpu->arch.vgic_cpu.num_pri_bits = ((kvm_vgic_global_state.ich_vtr_el2 & + ICH_VTR_PRI_BITS_MASK) >> + ICH_VTR_PRI_BITS_SHIFT) + 1; + + /* Get the show on the road... */ + vgic_v3->vgic_hcr = ICH_HCR_EN; + if (group0_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL0; + if (group1_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL1; + if (common_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TC; +} + +int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) +{ + struct kvm_vcpu *vcpu; + int byte_offset, bit_nr; + gpa_t pendbase, ptr; + bool status; + u8 val; + int ret; + unsigned long flags; + +retry: + vcpu = irq->target_vcpu; + if (!vcpu) + return 0; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + + byte_offset = irq->intid / BITS_PER_BYTE; + bit_nr = irq->intid % BITS_PER_BYTE; + ptr = pendbase + byte_offset; + + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + + status = val & (1 << bit_nr); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->target_vcpu != vcpu) { + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + goto retry; + } + irq->pending_latch = status; + vgic_queue_irq_unlock(vcpu->kvm, irq, flags); + + if (status) { + /* clear consumed data */ + val &= ~(1 << bit_nr); + ret = kvm_write_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + } + return 0; +} + +/** + * vgic_v3_save_pending_tables - Save the pending tables into guest RAM + * kvm lock and all vcpu lock must be held + */ +int vgic_v3_save_pending_tables(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq; + gpa_t last_ptr = ~(gpa_t)0; + int ret; + u8 val; + + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + int byte_offset, bit_nr; + struct kvm_vcpu *vcpu; + gpa_t pendbase, ptr; + bool stored; + + vcpu = irq->target_vcpu; + if (!vcpu) + continue; + + pendbase = GICR_PENDBASER_ADDRESS(vcpu->arch.vgic_cpu.pendbaser); + + byte_offset = irq->intid / BITS_PER_BYTE; + bit_nr = irq->intid % BITS_PER_BYTE; + ptr = pendbase + byte_offset; + + if (ptr != last_ptr) { + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + last_ptr = ptr; + } + + stored = val & (1U << bit_nr); + if (stored == irq->pending_latch) + continue; + + if (irq->pending_latch) + val |= 1 << bit_nr; + else + val &= ~(1 << bit_nr); + + ret = kvm_write_guest_lock(kvm, ptr, &val, 1); + if (ret) + return ret; + } + return 0; +} + +/** + * vgic_v3_rdist_overlap - check if a region overlaps with any + * existing redistributor region + * + * @kvm: kvm handle + * @base: base of the region + * @size: size of region + * + * Return: true if there is an overlap + */ +bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, &d->rd_regions, list) { + if ((base + size > rdreg->base) && + (base < rdreg->base + vgic_v3_rd_region_size(kvm, rdreg))) + return true; + } + return false; +} + +/* + * Check for overlapping regions and for regions crossing the end of memory + * for base addresses which have already been set. + */ +bool vgic_v3_check_base(struct kvm *kvm) +{ + struct vgic_dist *d = &kvm->arch.vgic; + struct vgic_redist_region *rdreg; + + if (!IS_VGIC_ADDR_UNDEF(d->vgic_dist_base) && + d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE < d->vgic_dist_base) + return false; + + list_for_each_entry(rdreg, &d->rd_regions, list) { + if (rdreg->base + vgic_v3_rd_region_size(kvm, rdreg) < + rdreg->base) + return false; + } + + if (IS_VGIC_ADDR_UNDEF(d->vgic_dist_base)) + return true; + + return !vgic_v3_rdist_overlap(kvm, d->vgic_dist_base, + KVM_VGIC_V3_DIST_SIZE); +} + +/** + * vgic_v3_rdist_free_slot - Look up registered rdist regions and identify one + * which has free space to put a new rdist region. + * + * @rd_regions: redistributor region list head + * + * A redistributor regions maps n redistributors, n = region size / (2 x 64kB). + * Stride between redistributors is 0 and regions are filled in the index order. + * + * Return: the redist region handle, if any, that has space to map a new rdist + * region. + */ +struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rd_regions) +{ + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (!vgic_v3_redist_region_full(rdreg)) + return rdreg; + } + return NULL; +} + +struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, + u32 index) +{ + struct list_head *rd_regions = &kvm->arch.vgic.rd_regions; + struct vgic_redist_region *rdreg; + + list_for_each_entry(rdreg, rd_regions, list) { + if (rdreg->index == index) + return rdreg; + } + return NULL; +} + + +int vgic_v3_map_resources(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int ret = 0; + int c; + + if (vgic_ready(kvm)) + goto out; + + kvm_for_each_vcpu(c, vcpu, kvm) { + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + if (IS_VGIC_ADDR_UNDEF(vgic_cpu->rd_iodev.base_addr)) { + kvm_debug("vcpu %d redistributor base not set\n", c); + ret = -ENXIO; + goto out; + } + } + + if (IS_VGIC_ADDR_UNDEF(dist->vgic_dist_base)) { + kvm_err("Need to set vgic distributor addresses first\n"); + ret = -ENXIO; + goto out; + } + + if (!vgic_v3_check_base(kvm)) { + kvm_err("VGIC redist and dist frames overlap\n"); + ret = -EINVAL; + goto out; + } + + /* + * For a VGICv3 we require the userland to explicitly initialize + * the VGIC before we need to use it. + */ + if (!vgic_initialized(kvm)) { + ret = -EBUSY; + goto out; + } + + ret = vgic_register_dist_iodev(kvm, dist->vgic_dist_base, VGIC_V3); + if (ret) { + kvm_err("Unable to register VGICv3 dist MMIO regions\n"); + goto out; + } + + if (kvm_vgic_global_state.has_gicv4_1) + vgic_v4_configure_vsgis(kvm); + dist->ready = true; + +out: + return ret; +} + +DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); + +static int __init early_group0_trap_cfg(char *buf) +{ + return strtobool(buf, &group0_trap); +} +early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg); + +static int __init early_group1_trap_cfg(char *buf) +{ + return strtobool(buf, &group1_trap); +} +early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); + +static int __init early_common_trap_cfg(char *buf) +{ + return strtobool(buf, &common_trap); +} +early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); + +static int __init early_gicv4_enable(char *buf) +{ + return strtobool(buf, &gicv4_enable); +} +early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable); + +/** + * vgic_v3_probe - probe for a VGICv3 compatible interrupt controller + * @info: pointer to the GIC description + * + * Returns 0 if the VGICv3 has been probed successfully, returns an error code + * otherwise + */ +int vgic_v3_probe(const struct gic_kvm_info *info) +{ + u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2); + int ret; + + /* + * The ListRegs field is 5 bits, but there is an architectural + * maximum of 16 list registers. Just ignore bit 4... + */ + kvm_vgic_global_state.nr_lr = (ich_vtr_el2 & 0xf) + 1; + kvm_vgic_global_state.can_emulate_gicv2 = false; + kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2; + + /* GICv4 support? */ + if (info->has_v4) { + kvm_vgic_global_state.has_gicv4 = gicv4_enable; + kvm_vgic_global_state.has_gicv4_1 = info->has_v4_1 && gicv4_enable; + kvm_info("GICv4%s support %sabled\n", + kvm_vgic_global_state.has_gicv4_1 ? ".1" : "", + gicv4_enable ? "en" : "dis"); + } + + if (!info->vcpu.start) { + kvm_info("GICv3: no GICV resource entry\n"); + kvm_vgic_global_state.vcpu_base = 0; + } else if (!PAGE_ALIGNED(info->vcpu.start)) { + pr_warn("GICV physical address 0x%llx not page aligned\n", + (unsigned long long)info->vcpu.start); + kvm_vgic_global_state.vcpu_base = 0; + } else { + kvm_vgic_global_state.vcpu_base = info->vcpu.start; + kvm_vgic_global_state.can_emulate_gicv2 = true; + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device.\n"); + return ret; + } + kvm_info("vgic-v2@%llx\n", info->vcpu.start); + } + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V3); + if (ret) { + kvm_err("Cannot register GICv3 KVM device.\n"); + kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2); + return ret; + } + + if (kvm_vgic_global_state.vcpu_base == 0) + kvm_info("disabling GICv2 emulation\n"); + + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) { + group0_trap = true; + group1_trap = true; + } + + if (group0_trap || group1_trap || common_trap) { + kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", + group0_trap ? "G0" : "", + group1_trap ? "G1" : "", + common_trap ? "C" : ""); + static_branch_enable(&vgic_v3_cpuif_trap); + } + + kvm_vgic_global_state.vctrl_base = NULL; + kvm_vgic_global_state.type = VGIC_V3; + kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; + + return 0; +} + +void vgic_v3_load(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + /* + * If dealing with a GICv2 emulation on GICv3, VMCR_EL2.VFIQen + * is dependent on ICC_SRE_EL1.SRE, and we have to perform the + * VMCR_EL2 save/restore in the world switch. + */ + if (likely(cpu_if->vgic_sre)) + kvm_call_hyp(__vgic_v3_write_vmcr, cpu_if->vgic_vmcr); + + kvm_call_hyp(__vgic_v3_restore_aprs, kern_hyp_va(cpu_if)); + + if (has_vhe()) + __vgic_v3_activate_traps(cpu_if); + + WARN_ON(vgic_v4_load(vcpu)); +} + +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + if (likely(cpu_if->vgic_sre)) + cpu_if->vgic_vmcr = kvm_call_hyp_ret(__vgic_v3_read_vmcr); +} + +void vgic_v3_put(struct kvm_vcpu *vcpu) +{ + struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; + + WARN_ON(vgic_v4_put(vcpu, false)); + + vgic_v3_vmcr_sync(vcpu); + + kvm_call_hyp(__vgic_v3_save_aprs, kern_hyp_va(cpu_if)); + + if (has_vhe()) + __vgic_v3_deactivate_traps(cpu_if); +} diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c new file mode 100644 index 000000000000..27ac833e5ec7 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -0,0 +1,453 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2017 ARM Ltd. + * Author: Marc Zyngier <marc.zyngier@arm.com> + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/irqdomain.h> +#include <linux/kvm_host.h> +#include <linux/irqchip/arm-gic-v3.h> + +#include "vgic.h" + +/* + * How KVM uses GICv4 (insert rude comments here): + * + * The vgic-v4 layer acts as a bridge between several entities: + * - The GICv4 ITS representation offered by the ITS driver + * - VFIO, which is in charge of the PCI endpoint + * - The virtual ITS, which is the only thing the guest sees + * + * The configuration of VLPIs is triggered by a callback from VFIO, + * instructing KVM that a PCI device has been configured to deliver + * MSIs to a vITS. + * + * kvm_vgic_v4_set_forwarding() is thus called with the routing entry, + * and this is used to find the corresponding vITS data structures + * (ITS instance, device, event and irq) using a process that is + * extremely similar to the injection of an MSI. + * + * At this stage, we can link the guest's view of an LPI (uniquely + * identified by the routing entry) and the host irq, using the GICv4 + * driver mapping operation. Should the mapping succeed, we've then + * successfully upgraded the guest's LPI to a VLPI. We can then start + * with updating GICv4's view of the property table and generating an + * INValidation in order to kickstart the delivery of this VLPI to the + * guest directly, without software intervention. Well, almost. + * + * When the PCI endpoint is deconfigured, this operation is reversed + * with VFIO calling kvm_vgic_v4_unset_forwarding(). + * + * Once the VLPI has been mapped, it needs to follow any change the + * guest performs on its LPI through the vITS. For that, a number of + * command handlers have hooks to communicate these changes to the HW: + * - Any invalidation triggers a call to its_prop_update_vlpi() + * - The INT command results in a irq_set_irqchip_state(), which + * generates an INT on the corresponding VLPI. + * - The CLEAR command results in a irq_set_irqchip_state(), which + * generates an CLEAR on the corresponding VLPI. + * - DISCARD translates into an unmap, similar to a call to + * kvm_vgic_v4_unset_forwarding(). + * - MOVI is translated by an update of the existing mapping, changing + * the target vcpu, resulting in a VMOVI being generated. + * - MOVALL is translated by a string of mapping updates (similar to + * the handling of MOVI). MOVALL is horrible. + * + * Note that a DISCARD/MAPTI sequence emitted from the guest without + * reprogramming the PCI endpoint after MAPTI does not result in a + * VLPI being mapped, as there is no callback from VFIO (the guest + * will get the interrupt via the normal SW injection). Fixing this is + * not trivial, and requires some horrible messing with the VFIO + * internals. Not fun. Don't do that. + * + * Then there is the scheduling. Each time a vcpu is about to run on a + * physical CPU, KVM must tell the corresponding redistributor about + * it. And if we've migrated our vcpu from one CPU to another, we must + * tell the ITS (so that the messages reach the right redistributor). + * This is done in two steps: first issue a irq_set_affinity() on the + * irq corresponding to the vcpu, then call its_make_vpe_resident(). + * You must be in a non-preemptible context. On exit, a call to + * its_make_vpe_non_resident() tells the redistributor that we're done + * with the vcpu. + * + * Finally, the doorbell handling: Each vcpu is allocated an interrupt + * which will fire each time a VLPI is made pending whilst the vcpu is + * not running. Each time the vcpu gets blocked, the doorbell + * interrupt gets enabled. When the vcpu is unblocked (for whatever + * reason), the doorbell interrupt is disabled. + */ + +#define DB_IRQ_FLAGS (IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING) + +static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) +{ + struct kvm_vcpu *vcpu = info; + + /* We got the message, no need to fire again */ + if (!kvm_vgic_global_state.has_gicv4_1 && + !irqd_irq_disabled(&irq_to_desc(irq)->irq_data)) + disable_irq_nosync(irq); + + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + + return IRQ_HANDLED; +} + +static void vgic_v4_sync_sgi_config(struct its_vpe *vpe, struct vgic_irq *irq) +{ + vpe->sgi_config[irq->intid].enabled = irq->enabled; + vpe->sgi_config[irq->intid].group = irq->group; + vpe->sgi_config[irq->intid].priority = irq->priority; +} + +static void vgic_v4_enable_vsgis(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + int i; + + /* + * With GICv4.1, every virtual SGI can be directly injected. So + * let's pretend that they are HW interrupts, tied to a host + * IRQ. The SGI code will do its magic. + */ + for (i = 0; i < VGIC_NR_SGIS; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct irq_desc *desc; + unsigned long flags; + int ret; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (irq->hw) + goto unlock; + + irq->hw = true; + irq->host_irq = irq_find_mapping(vpe->sgi_domain, i); + + /* Transfer the full irq state to the vPE */ + vgic_v4_sync_sgi_config(vpe, irq); + desc = irq_to_desc(irq->host_irq); + ret = irq_domain_activate_irq(irq_desc_get_irq_data(desc), + false); + if (!WARN_ON(ret)) { + /* Transfer pending state */ + ret = irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + irq->pending_latch); + WARN_ON(ret); + irq->pending_latch = false; + } + unlock: + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +static void vgic_v4_disable_vsgis(struct kvm_vcpu *vcpu) +{ + int i; + + for (i = 0; i < VGIC_NR_SGIS; i++) { + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, i); + struct irq_desc *desc; + unsigned long flags; + int ret; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (!irq->hw) + goto unlock; + + irq->hw = false; + ret = irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &irq->pending_latch); + WARN_ON(ret); + + desc = irq_to_desc(irq->host_irq); + irq_domain_deactivate_irq(irq_desc_get_irq_data(desc)); + unlock: + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + } +} + +/* Must be called with the kvm lock held */ +void vgic_v4_configure_vsgis(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i; + + kvm_arm_halt_guest(kvm); + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (dist->nassgireq) + vgic_v4_enable_vsgis(vcpu); + else + vgic_v4_disable_vsgis(vcpu); + } + + kvm_arm_resume_guest(kvm); +} + +/** + * vgic_v4_init - Initialize the GICv4 data structures + * @kvm: Pointer to the VM being initialized + * + * We may be called each time a vITS is created, or when the + * vgic is initialized. This relies on kvm->lock to be + * held. In both cases, the number of vcpus should now be + * fixed. + */ +int vgic_v4_init(struct kvm *kvm) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct kvm_vcpu *vcpu; + int i, nr_vcpus, ret; + + if (!kvm_vgic_global_state.has_gicv4) + return 0; /* Nothing to see here... move along. */ + + if (dist->its_vm.vpes) + return 0; + + nr_vcpus = atomic_read(&kvm->online_vcpus); + + dist->its_vm.vpes = kcalloc(nr_vcpus, sizeof(*dist->its_vm.vpes), + GFP_KERNEL); + if (!dist->its_vm.vpes) + return -ENOMEM; + + dist->its_vm.nr_vpes = nr_vcpus; + + kvm_for_each_vcpu(i, vcpu, kvm) + dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + ret = its_alloc_vcpu_irqs(&dist->its_vm); + if (ret < 0) { + kvm_err("VPE IRQ allocation failure\n"); + kfree(dist->its_vm.vpes); + dist->its_vm.nr_vpes = 0; + dist->its_vm.vpes = NULL; + return ret; + } + + kvm_for_each_vcpu(i, vcpu, kvm) { + int irq = dist->its_vm.vpes[i]->irq; + unsigned long irq_flags = DB_IRQ_FLAGS; + + /* + * Don't automatically enable the doorbell, as we're + * flipping it back and forth when the vcpu gets + * blocked. Also disable the lazy disabling, as the + * doorbell could kick us out of the guest too + * early... + * + * On GICv4.1, the doorbell is managed in HW and must + * be left enabled. + */ + if (kvm_vgic_global_state.has_gicv4_1) + irq_flags &= ~IRQ_NOAUTOEN; + irq_set_status_flags(irq, irq_flags); + + ret = request_irq(irq, vgic_v4_doorbell_handler, + 0, "vcpu", vcpu); + if (ret) { + kvm_err("failed to allocate vcpu IRQ%d\n", irq); + /* + * Trick: adjust the number of vpes so we know + * how many to nuke on teardown... + */ + dist->its_vm.nr_vpes = i; + break; + } + } + + if (ret) + vgic_v4_teardown(kvm); + + return ret; +} + +/** + * vgic_v4_teardown - Free the GICv4 data structures + * @kvm: Pointer to the VM being destroyed + * + * Relies on kvm->lock to be held. + */ +void vgic_v4_teardown(struct kvm *kvm) +{ + struct its_vm *its_vm = &kvm->arch.vgic.its_vm; + int i; + + if (!its_vm->vpes) + return; + + for (i = 0; i < its_vm->nr_vpes; i++) { + struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i); + int irq = its_vm->vpes[i]->irq; + + irq_clear_status_flags(irq, DB_IRQ_FLAGS); + free_irq(irq, vcpu); + } + + its_free_vcpu_irqs(its_vm); + kfree(its_vm->vpes); + its_vm->nr_vpes = 0; + its_vm->vpes = NULL; +} + +int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + + if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident) + return 0; + + return its_make_vpe_non_resident(vpe, need_db); +} + +int vgic_v4_load(struct kvm_vcpu *vcpu) +{ + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + int err; + + if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident) + return 0; + + /* + * Before making the VPE resident, make sure the redistributor + * corresponding to our current CPU expects us here. See the + * doc in drivers/irqchip/irq-gic-v4.c to understand how this + * turns into a VMOVP command at the ITS level. + */ + err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id())); + if (err) + return err; + + err = its_make_vpe_resident(vpe, false, vcpu->kvm->arch.vgic.enabled); + if (err) + return err; + + /* + * Now that the VPE is resident, let's get rid of a potential + * doorbell interrupt that would still be pending. This is a + * GICv4.0 only "feature"... + */ + if (!kvm_vgic_global_state.has_gicv4_1) + err = irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false); + + return err; +} + +static struct vgic_its *vgic_get_its(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct kvm_msi msi = (struct kvm_msi) { + .address_lo = irq_entry->msi.address_lo, + .address_hi = irq_entry->msi.address_hi, + .data = irq_entry->msi.data, + .flags = irq_entry->msi.flags, + .devid = irq_entry->msi.devid, + }; + + return vgic_msi_to_its(kvm, &msi); +} + +int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct vgic_its *its; + struct vgic_irq *irq; + struct its_vlpi_map map; + int ret; + + if (!vgic_supports_direct_msis(kvm)) + return 0; + + /* + * Get the ITS, and escape early on error (not a valid + * doorbell for any of our vITSs). + */ + its = vgic_get_its(kvm, irq_entry); + if (IS_ERR(its)) + return 0; + + mutex_lock(&its->its_lock); + + /* Perform the actual DevID/EventID -> LPI translation. */ + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, + irq_entry->msi.data, &irq); + if (ret) + goto out; + + /* + * Emit the mapping request. If it fails, the ITS probably + * isn't v4 compatible, so let's silently bail out. Holding + * the ITS lock should ensure that nothing can modify the + * target vcpu. + */ + map = (struct its_vlpi_map) { + .vm = &kvm->arch.vgic.its_vm, + .vpe = &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe, + .vintid = irq->intid, + .properties = ((irq->priority & 0xfc) | + (irq->enabled ? LPI_PROP_ENABLED : 0) | + LPI_PROP_GROUP1), + .db_enabled = true, + }; + + ret = its_map_vlpi(virq, &map); + if (ret) + goto out; + + irq->hw = true; + irq->host_irq = virq; + atomic_inc(&map.vpe->vlpi_count); + +out: + mutex_unlock(&its->its_lock); + return ret; +} + +int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, + struct kvm_kernel_irq_routing_entry *irq_entry) +{ + struct vgic_its *its; + struct vgic_irq *irq; + int ret; + + if (!vgic_supports_direct_msis(kvm)) + return 0; + + /* + * Get the ITS, and escape early on error (not a valid + * doorbell for any of our vITSs). + */ + its = vgic_get_its(kvm, irq_entry); + if (IS_ERR(its)) + return 0; + + mutex_lock(&its->its_lock); + + ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, + irq_entry->msi.data, &irq); + if (ret) + goto out; + + WARN_ON(!(irq->hw && irq->host_irq == virq)); + if (irq->hw) { + atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count); + irq->hw = false; + ret = its_unmap_vlpi(virq); + } + +out: + mutex_unlock(&its->its_lock); + return ret; +} diff --git a/arch/arm64/kvm/vgic/vgic.c b/arch/arm64/kvm/vgic/vgic.c new file mode 100644 index 000000000000..c3643b7f101b --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic.c @@ -0,0 +1,1020 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ + +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kvm.h> +#include <linux/kvm_host.h> +#include <linux/list_sort.h> +#include <linux/nospec.h> + +#include <asm/kvm_hyp.h> + +#include "vgic.h" + +#define CREATE_TRACE_POINTS +#include "trace.h" + +struct vgic_global kvm_vgic_global_state __ro_after_init = { + .gicv3_cpuif = STATIC_KEY_FALSE_INIT, +}; + +/* + * Locking order is always: + * kvm->lock (mutex) + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock must be taken with IRQs disabled + * kvm->lpi_list_lock must be taken with IRQs disabled + * vgic_irq->irq_lock must be taken with IRQs disabled + * + * As the ap_list_lock might be taken from the timer interrupt handler, + * we have to disable IRQs before taking this lock and everything lower + * than it. + * + * If you need to take multiple locks, always take the upper lock first, + * then the lower ones, e.g. first take the its_lock, then the irq_lock. + * If you are already holding a lock and need to take a higher one, you + * have to drop the lower ranking lock first and re-aquire it after having + * taken the upper one. + * + * When taking more than one ap_list_lock at the same time, always take the + * lowest numbered VCPU's ap_list_lock first, so: + * vcpuX->vcpu_id < vcpuY->vcpu_id: + * raw_spin_lock(vcpuX->arch.vgic_cpu.ap_list_lock); + * raw_spin_lock(vcpuY->arch.vgic_cpu.ap_list_lock); + * + * Since the VGIC must support injecting virtual interrupts from ISRs, we have + * to use the raw_spin_lock_irqsave/raw_spin_unlock_irqrestore versions of outer + * spinlocks for any lock that may be taken while injecting an interrupt. + */ + +/* + * Iterate over the VM's list of mapped LPIs to find the one with a + * matching interrupt ID and return a reference to the IRQ structure. + */ +static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + struct vgic_irq *irq = NULL; + unsigned long flags; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + + list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { + if (irq->intid != intid) + continue; + + /* + * This increases the refcount, the caller is expected to + * call vgic_put_irq() later once it's finished with the IRQ. + */ + vgic_get_irq_kref(irq); + goto out_unlock; + } + irq = NULL; + +out_unlock: + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); + + return irq; +} + +/* + * This looks up the virtual interrupt ID to get the corresponding + * struct vgic_irq. It also increases the refcount, so any caller is expected + * to call vgic_put_irq() once it's finished with this IRQ. + */ +struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, + u32 intid) +{ + /* SGIs and PPIs */ + if (intid <= VGIC_MAX_PRIVATE) { + intid = array_index_nospec(intid, VGIC_MAX_PRIVATE + 1); + return &vcpu->arch.vgic_cpu.private_irqs[intid]; + } + + /* SPIs */ + if (intid < (kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS)) { + intid = array_index_nospec(intid, kvm->arch.vgic.nr_spis + VGIC_NR_PRIVATE_IRQS); + return &kvm->arch.vgic.spis[intid - VGIC_NR_PRIVATE_IRQS]; + } + + /* LPIs */ + if (intid >= VGIC_MIN_LPI) + return vgic_get_lpi(kvm, intid); + + WARN(1, "Looking up struct vgic_irq for reserved INTID"); + return NULL; +} + +/* + * We can't do anything in here, because we lack the kvm pointer to + * lock and remove the item from the lpi_list. So we keep this function + * empty and use the return value of kref_put() to trigger the freeing. + */ +static void vgic_irq_release(struct kref *ref) +{ +} + +/* + * Drop the refcount on the LPI. Must be called with lpi_list_lock held. + */ +void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + + if (!kref_put(&irq->refcount, vgic_irq_release)) + return; + + list_del(&irq->lpi_list); + dist->lpi_list_count--; + + kfree(irq); +} + +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) +{ + struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long flags; + + if (irq->intid < VGIC_MIN_LPI) + return; + + raw_spin_lock_irqsave(&dist->lpi_list_lock, flags); + __vgic_put_lpi_locked(kvm, irq); + raw_spin_unlock_irqrestore(&dist->lpi_list_lock, flags); +} + +void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq, *tmp; + unsigned long flags; + + raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); + + list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { + if (irq->intid >= VGIC_MIN_LPI) { + raw_spin_lock(&irq->irq_lock); + list_del(&irq->ap_list); + irq->vcpu = NULL; + raw_spin_unlock(&irq->irq_lock); + vgic_put_irq(vcpu->kvm, irq); + } + } + + raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); +} + +void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending) +{ + WARN_ON(irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + pending)); +} + +bool vgic_get_phys_line_level(struct vgic_irq *irq) +{ + bool line_level; + + BUG_ON(!irq->hw); + + if (irq->get_input_level) + return irq->get_input_level(irq->intid); + + WARN_ON(irq_get_irqchip_state(irq->host_irq, + IRQCHIP_STATE_PENDING, + &line_level)); + return line_level; +} + +/* Set/Clear the physical active state */ +void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active) +{ + + BUG_ON(!irq->hw); + WARN_ON(irq_set_irqchip_state(irq->host_irq, + IRQCHIP_STATE_ACTIVE, + active)); +} + +/** + * kvm_vgic_target_oracle - compute the target vcpu for an irq + * + * @irq: The irq to route. Must be already locked. + * + * Based on the current state of the interrupt (enabled, pending, + * active, vcpu and target_vcpu), compute the next vcpu this should be + * given to. Return NULL if this shouldn't be injected at all. + * + * Requires the IRQ lock to be held. + */ +static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) +{ + lockdep_assert_held(&irq->irq_lock); + + /* If the interrupt is active, it must stay on the current vcpu */ + if (irq->active) + return irq->vcpu ? : irq->target_vcpu; + + /* + * If the IRQ is not active but enabled and pending, we should direct + * it to its configured target VCPU. + * If the distributor is disabled, pending interrupts shouldn't be + * forwarded. + */ + if (irq->enabled && irq_is_pending(irq)) { + if (unlikely(irq->target_vcpu && + !irq->target_vcpu->kvm->arch.vgic.enabled)) + return NULL; + + return irq->target_vcpu; + } + + /* If neither active nor pending and enabled, then this IRQ should not + * be queued to any VCPU. + */ + return NULL; +} + +/* + * The order of items in the ap_lists defines how we'll pack things in LRs as + * well, the first items in the list being the first things populated in the + * LRs. + * + * A hard rule is that active interrupts can never be pushed out of the LRs + * (and therefore take priority) since we cannot reliably trap on deactivation + * of IRQs and therefore they have to be present in the LRs. + * + * Otherwise things should be sorted by the priority field and the GIC + * hardware support will take care of preemption of priority groups etc. + * + * Return negative if "a" sorts before "b", 0 to preserve order, and positive + * to sort "b" before "a". + */ +static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b) +{ + struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list); + struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list); + bool penda, pendb; + int ret; + + /* + * list_sort may call this function with the same element when + * the list is fairly long. + */ + if (unlikely(irqa == irqb)) + return 0; + + raw_spin_lock(&irqa->irq_lock); + raw_spin_lock_nested(&irqb->irq_lock, SINGLE_DEPTH_NESTING); + + if (irqa->active || irqb->active) { + ret = (int)irqb->active - (int)irqa->active; + goto out; + } + + penda = irqa->enabled && irq_is_pending(irqa); + pendb = irqb->enabled && irq_is_pending(irqb); + + if (!penda || !pendb) { + ret = (int)pendb - (int)penda; + goto out; + } + + /* Both pending and enabled, sort by priority */ + ret = irqa->priority - irqb->priority; +out: + raw_spin_unlock(&irqb->irq_lock); + raw_spin_unlock(&irqa->irq_lock); + return ret; +} + +/* Must be called with the ap_list_lock held */ +static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + + lockdep_assert_held(&vgic_cpu->ap_list_lock); + + list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); +} + +/* + * Only valid injection if changing level for level-triggered IRQs or for a + * rising edge, and in-kernel connected IRQ lines can only be controlled by + * their owner. + */ +static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner) +{ + if (irq->owner != owner) + return false; + + switch (irq->config) { + case VGIC_CONFIG_LEVEL: + return irq->line_level != level; + case VGIC_CONFIG_EDGE: + return level; + } + + return false; +} + +/* + * Check whether an IRQ needs to (and can) be queued to a VCPU's ap list. + * Do the queuing if necessary, taking the right locks in the right order. + * Returns true when the IRQ was queued, false otherwise. + * + * Needs to be entered with the IRQ lock already held, but will return + * with all locks dropped. + */ +bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags) +{ + struct kvm_vcpu *vcpu; + + lockdep_assert_held(&irq->irq_lock); + +retry: + vcpu = vgic_target_oracle(irq); + if (irq->vcpu || !vcpu) { + /* + * If this IRQ is already on a VCPU's ap_list, then it + * cannot be moved or modified and there is no more work for + * us to do. + * + * Otherwise, if the irq is not pending and enabled, it does + * not need to be inserted into an ap_list and there is also + * no more work for us to do. + */ + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + /* + * We have to kick the VCPU here, because we could be + * queueing an edge-triggered interrupt for which we + * get no EOI maintenance interrupt. In that case, + * while the IRQ is already on the VCPU's AP list, the + * VCPU could have EOI'ed the original interrupt and + * won't see this one until it exits for some other + * reason. + */ + if (vcpu) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + } + return false; + } + + /* + * We must unlock the irq lock to take the ap_list_lock where + * we are going to insert this new pending interrupt. + */ + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + /* someone can do stuff here, which we re-check below */ + + raw_spin_lock_irqsave(&vcpu->arch.vgic_cpu.ap_list_lock, flags); + raw_spin_lock(&irq->irq_lock); + + /* + * Did something change behind our backs? + * + * There are two cases: + * 1) The irq lost its pending state or was disabled behind our + * backs and/or it was queued to another VCPU's ap_list. + * 2) Someone changed the affinity on this irq behind our + * backs and we are now holding the wrong ap_list_lock. + * + * In both cases, drop the locks and retry. + */ + + if (unlikely(irq->vcpu || vcpu != vgic_target_oracle(irq))) { + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, + flags); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + goto retry; + } + + /* + * Grab a reference to the irq to reflect the fact that it is + * now in the ap_list. + */ + vgic_get_irq_kref(irq); + list_add_tail(&irq->ap_list, &vcpu->arch.vgic_cpu.ap_list_head); + irq->vcpu = vcpu; + + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock_irqrestore(&vcpu->arch.vgic_cpu.ap_list_lock, flags); + + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + + return true; +} + +/** + * kvm_vgic_inject_irq - Inject an IRQ from a device to the vgic + * @kvm: The VM structure pointer + * @cpuid: The CPU for PPIs + * @intid: The INTID to inject a new state to. + * @level: Edge-triggered: true: to trigger the interrupt + * false: to ignore the call + * Level-sensitive true: raise the input signal + * false: lower the input signal + * @owner: The opaque pointer to the owner of the IRQ being raised to verify + * that the caller is allowed to inject this IRQ. Userspace + * injections will have owner == NULL. + * + * The VGIC is not concerned with devices being active-LOW or active-HIGH for + * level-sensitive interrupts. You can think of the level parameter as 1 + * being HIGH and 0 being LOW and all devices being active-HIGH. + */ +int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, + bool level, void *owner) +{ + struct kvm_vcpu *vcpu; + struct vgic_irq *irq; + unsigned long flags; + int ret; + + trace_vgic_update_irq_pending(cpuid, intid, level); + + ret = vgic_lazy_init(kvm); + if (ret) + return ret; + + vcpu = kvm_get_vcpu(kvm, cpuid); + if (!vcpu && intid < VGIC_NR_PRIVATE_IRQS) + return -EINVAL; + + irq = vgic_get_irq(kvm, vcpu, intid); + if (!irq) + return -EINVAL; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + + if (!vgic_validate_injection(irq, level, owner)) { + /* Nothing to see here, move along... */ + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(kvm, irq); + return 0; + } + + if (irq->config == VGIC_CONFIG_LEVEL) + irq->line_level = level; + else + irq->pending_latch = true; + + vgic_queue_irq_unlock(kvm, irq, flags); + vgic_put_irq(kvm, irq); + + return 0; +} + +/* @irq->irq_lock must be held */ +static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq, + unsigned int host_irq, + bool (*get_input_level)(int vindid)) +{ + struct irq_desc *desc; + struct irq_data *data; + + /* + * Find the physical IRQ number corresponding to @host_irq + */ + desc = irq_to_desc(host_irq); + if (!desc) { + kvm_err("%s: no interrupt descriptor\n", __func__); + return -EINVAL; + } + data = irq_desc_get_irq_data(desc); + while (data->parent_data) + data = data->parent_data; + + irq->hw = true; + irq->host_irq = host_irq; + irq->hwintid = data->hwirq; + irq->get_input_level = get_input_level; + return 0; +} + +/* @irq->irq_lock must be held */ +static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq) +{ + irq->hw = false; + irq->hwintid = 0; + irq->get_input_level = NULL; +} + +int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq, + u32 vintid, bool (*get_input_level)(int vindid)) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + int ret; + + BUG_ON(!irq); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + ret = kvm_vgic_map_irq(vcpu, irq, host_irq, get_input_level); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + return ret; +} + +/** + * kvm_vgic_reset_mapped_irq - Reset a mapped IRQ + * @vcpu: The VCPU pointer + * @vintid: The INTID of the interrupt + * + * Reset the active and pending states of a mapped interrupt. Kernel + * subsystems injecting mapped interrupts should reset their interrupt lines + * when we are doing a reset of the VM. + */ +void kvm_vgic_reset_mapped_irq(struct kvm_vcpu *vcpu, u32 vintid) +{ + struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + unsigned long flags; + + if (!irq->hw) + goto out; + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + irq->active = false; + irq->pending_latch = false; + irq->line_level = false; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); +out: + vgic_put_irq(vcpu->kvm, irq); +} + +int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq; + unsigned long flags; + + if (!vgic_initialized(vcpu->kvm)) + return -EAGAIN; + + irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + BUG_ON(!irq); + + raw_spin_lock_irqsave(&irq->irq_lock, flags); + kvm_vgic_unmap_irq(irq); + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + return 0; +} + +/** + * kvm_vgic_set_owner - Set the owner of an interrupt for a VM + * + * @vcpu: Pointer to the VCPU (used for PPIs) + * @intid: The virtual INTID identifying the interrupt (PPI or SPI) + * @owner: Opaque pointer to the owner + * + * Returns 0 if intid is not already used by another in-kernel device and the + * owner is set, otherwise returns an error code. + */ +int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) +{ + struct vgic_irq *irq; + unsigned long flags; + int ret = 0; + + if (!vgic_initialized(vcpu->kvm)) + return -EAGAIN; + + /* SGIs and LPIs cannot be wired up to any device */ + if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) + return -EINVAL; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + if (irq->owner && irq->owner != owner) + ret = -EEXIST; + else + irq->owner = owner; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + + return ret; +} + +/** + * vgic_prune_ap_list - Remove non-relevant interrupts from the list + * + * @vcpu: The VCPU pointer + * + * Go over the list of "interesting" interrupts, and prune those that we + * won't have to consider in the near future. + */ +static void vgic_prune_ap_list(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq, *tmp; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + +retry: + raw_spin_lock(&vgic_cpu->ap_list_lock); + + list_for_each_entry_safe(irq, tmp, &vgic_cpu->ap_list_head, ap_list) { + struct kvm_vcpu *target_vcpu, *vcpuA, *vcpuB; + bool target_vcpu_needs_kick = false; + + raw_spin_lock(&irq->irq_lock); + + BUG_ON(vcpu != irq->vcpu); + + target_vcpu = vgic_target_oracle(irq); + + if (!target_vcpu) { + /* + * We don't need to process this interrupt any + * further, move it off the list. + */ + list_del(&irq->ap_list); + irq->vcpu = NULL; + raw_spin_unlock(&irq->irq_lock); + + /* + * This vgic_put_irq call matches the + * vgic_get_irq_kref in vgic_queue_irq_unlock, + * where we added the LPI to the ap_list. As + * we remove the irq from the list, we drop + * also drop the refcount. + */ + vgic_put_irq(vcpu->kvm, irq); + continue; + } + + if (target_vcpu == vcpu) { + /* We're on the right CPU */ + raw_spin_unlock(&irq->irq_lock); + continue; + } + + /* This interrupt looks like it has to be migrated. */ + + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock(&vgic_cpu->ap_list_lock); + + /* + * Ensure locking order by always locking the smallest + * ID first. + */ + if (vcpu->vcpu_id < target_vcpu->vcpu_id) { + vcpuA = vcpu; + vcpuB = target_vcpu; + } else { + vcpuA = target_vcpu; + vcpuB = vcpu; + } + + raw_spin_lock(&vcpuA->arch.vgic_cpu.ap_list_lock); + raw_spin_lock_nested(&vcpuB->arch.vgic_cpu.ap_list_lock, + SINGLE_DEPTH_NESTING); + raw_spin_lock(&irq->irq_lock); + + /* + * If the affinity has been preserved, move the + * interrupt around. Otherwise, it means things have + * changed while the interrupt was unlocked, and we + * need to replay this. + * + * In all cases, we cannot trust the list not to have + * changed, so we restart from the beginning. + */ + if (target_vcpu == vgic_target_oracle(irq)) { + struct vgic_cpu *new_cpu = &target_vcpu->arch.vgic_cpu; + + list_del(&irq->ap_list); + irq->vcpu = target_vcpu; + list_add_tail(&irq->ap_list, &new_cpu->ap_list_head); + target_vcpu_needs_kick = true; + } + + raw_spin_unlock(&irq->irq_lock); + raw_spin_unlock(&vcpuB->arch.vgic_cpu.ap_list_lock); + raw_spin_unlock(&vcpuA->arch.vgic_cpu.ap_list_lock); + + if (target_vcpu_needs_kick) { + kvm_make_request(KVM_REQ_IRQ_PENDING, target_vcpu); + kvm_vcpu_kick(target_vcpu); + } + + goto retry; + } + + raw_spin_unlock(&vgic_cpu->ap_list_lock); +} + +static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_fold_lr_state(vcpu); + else + vgic_v3_fold_lr_state(vcpu); +} + +/* Requires the irq_lock to be held. */ +static inline void vgic_populate_lr(struct kvm_vcpu *vcpu, + struct vgic_irq *irq, int lr) +{ + lockdep_assert_held(&irq->irq_lock); + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_populate_lr(vcpu, irq, lr); + else + vgic_v3_populate_lr(vcpu, irq, lr); +} + +static inline void vgic_clear_lr(struct kvm_vcpu *vcpu, int lr) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_clear_lr(vcpu, lr); + else + vgic_v3_clear_lr(vcpu, lr); +} + +static inline void vgic_set_underflow(struct kvm_vcpu *vcpu) +{ + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_set_underflow(vcpu); + else + vgic_v3_set_underflow(vcpu); +} + +/* Requires the ap_list_lock to be held. */ +static int compute_ap_list_depth(struct kvm_vcpu *vcpu, + bool *multi_sgi) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq; + int count = 0; + + *multi_sgi = false; + + lockdep_assert_held(&vgic_cpu->ap_list_lock); + + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + int w; + + raw_spin_lock(&irq->irq_lock); + /* GICv2 SGIs can count for more than one... */ + w = vgic_irq_get_lr_count(irq); + raw_spin_unlock(&irq->irq_lock); + + count += w; + *multi_sgi |= (w > 1); + } + return count; +} + +/* Requires the VCPU's ap_list_lock to be held. */ +static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq; + int count; + bool multi_sgi; + u8 prio = 0xff; + int i = 0; + + lockdep_assert_held(&vgic_cpu->ap_list_lock); + + count = compute_ap_list_depth(vcpu, &multi_sgi); + if (count > kvm_vgic_global_state.nr_lr || multi_sgi) + vgic_sort_ap_list(vcpu); + + count = 0; + + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + raw_spin_lock(&irq->irq_lock); + + /* + * If we have multi-SGIs in the pipeline, we need to + * guarantee that they are all seen before any IRQ of + * lower priority. In that case, we need to filter out + * these interrupts by exiting early. This is easy as + * the AP list has been sorted already. + */ + if (multi_sgi && irq->priority > prio) { + _raw_spin_unlock(&irq->irq_lock); + break; + } + + if (likely(vgic_target_oracle(irq) == vcpu)) { + vgic_populate_lr(vcpu, irq, count++); + + if (irq->source) + prio = irq->priority; + } + + raw_spin_unlock(&irq->irq_lock); + + if (count == kvm_vgic_global_state.nr_lr) { + if (!list_is_last(&irq->ap_list, + &vgic_cpu->ap_list_head)) + vgic_set_underflow(vcpu); + break; + } + } + + /* Nuke remaining LRs */ + for (i = count ; i < kvm_vgic_global_state.nr_lr; i++) + vgic_clear_lr(vcpu, i); + + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vcpu->arch.vgic_cpu.vgic_v2.used_lrs = count; + else + vcpu->arch.vgic_cpu.vgic_v3.used_lrs = count; +} + +static inline bool can_access_vgic_from_kernel(void) +{ + /* + * GICv2 can always be accessed from the kernel because it is + * memory-mapped, and VHE systems can access GICv3 EL2 system + * registers. + */ + return !static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif) || has_vhe(); +} + +static inline void vgic_save_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_save_state(vcpu); + else + __vgic_v3_save_state(&vcpu->arch.vgic_cpu.vgic_v3); +} + +/* Sync back the hardware VGIC state into our emulation after a guest's run. */ +void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) +{ + int used_lrs; + + /* An empty ap_list_head implies used_lrs == 0 */ + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) + return; + + if (can_access_vgic_from_kernel()) + vgic_save_state(vcpu); + + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + used_lrs = vcpu->arch.vgic_cpu.vgic_v2.used_lrs; + else + used_lrs = vcpu->arch.vgic_cpu.vgic_v3.used_lrs; + + if (used_lrs) + vgic_fold_lr_state(vcpu); + vgic_prune_ap_list(vcpu); +} + +static inline void vgic_restore_state(struct kvm_vcpu *vcpu) +{ + if (!static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + vgic_v2_restore_state(vcpu); + else + __vgic_v3_restore_state(&vcpu->arch.vgic_cpu.vgic_v3); +} + +/* Flush our emulation state into the GIC hardware before entering the guest. */ +void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) +{ + /* + * If there are no virtual interrupts active or pending for this + * VCPU, then there is no work to do and we can bail out without + * taking any lock. There is a potential race with someone injecting + * interrupts to the VCPU, but it is a benign race as the VCPU will + * either observe the new interrupt before or after doing this check, + * and introducing additional synchronization mechanism doesn't change + * this. + * + * Note that we still need to go through the whole thing if anything + * can be directly injected (GICv4). + */ + if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head) && + !vgic_supports_direct_msis(vcpu->kvm)) + return; + + DEBUG_SPINLOCK_BUG_ON(!irqs_disabled()); + + if (!list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) { + raw_spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); + vgic_flush_lr_state(vcpu); + raw_spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + } + + if (can_access_vgic_from_kernel()) + vgic_restore_state(vcpu); +} + +void kvm_vgic_load(struct kvm_vcpu *vcpu) +{ + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_load(vcpu); + else + vgic_v3_load(vcpu); +} + +void kvm_vgic_put(struct kvm_vcpu *vcpu) +{ + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_put(vcpu); + else + vgic_v3_put(vcpu); +} + +void kvm_vgic_vmcr_sync(struct kvm_vcpu *vcpu) +{ + if (unlikely(!irqchip_in_kernel(vcpu->kvm))) + return; + + if (kvm_vgic_global_state.type == VGIC_V2) + vgic_v2_vmcr_sync(vcpu); + else + vgic_v3_vmcr_sync(vcpu); +} + +int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + struct vgic_irq *irq; + bool pending = false; + unsigned long flags; + struct vgic_vmcr vmcr; + + if (!vcpu->kvm->arch.vgic.enabled) + return false; + + if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last) + return true; + + vgic_get_vmcr(vcpu, &vmcr); + + raw_spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags); + + list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { + raw_spin_lock(&irq->irq_lock); + pending = irq_is_pending(irq) && irq->enabled && + !irq->active && + irq->priority < vmcr.pmr; + raw_spin_unlock(&irq->irq_lock); + + if (pending) + break; + } + + raw_spin_unlock_irqrestore(&vgic_cpu->ap_list_lock, flags); + + return pending; +} + +void vgic_kick_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int c; + + /* + * We've injected an interrupt, time to find out who deserves + * a good kick... + */ + kvm_for_each_vcpu(c, vcpu, kvm) { + if (kvm_vgic_vcpu_pending_irq(vcpu)) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); + kvm_vcpu_kick(vcpu); + } + } +} + +bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid) +{ + struct vgic_irq *irq; + bool map_is_active; + unsigned long flags; + + if (!vgic_initialized(vcpu->kvm)) + return false; + + irq = vgic_get_irq(vcpu->kvm, vcpu, vintid); + raw_spin_lock_irqsave(&irq->irq_lock, flags); + map_is_active = irq->hw && irq->active; + raw_spin_unlock_irqrestore(&irq->irq_lock, flags); + vgic_put_irq(vcpu->kvm, irq); + + return map_is_active; +} diff --git a/arch/arm64/kvm/vgic/vgic.h b/arch/arm64/kvm/vgic/vgic.h new file mode 100644 index 000000000000..64fcd7511110 --- /dev/null +++ b/arch/arm64/kvm/vgic/vgic.h @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2015, 2016 ARM Ltd. + */ +#ifndef __KVM_ARM_VGIC_NEW_H__ +#define __KVM_ARM_VGIC_NEW_H__ + +#include <linux/irqchip/arm-gic-common.h> + +#define PRODUCT_ID_KVM 0x4b /* ASCII code K */ +#define IMPLEMENTER_ARM 0x43b + +#define VGIC_ADDR_UNDEF (-1) +#define IS_VGIC_ADDR_UNDEF(_x) ((_x) == VGIC_ADDR_UNDEF) + +#define INTERRUPT_ID_BITS_SPIS 10 +#define INTERRUPT_ID_BITS_ITS 16 +#define VGIC_PRI_BITS 5 + +#define vgic_irq_is_sgi(intid) ((intid) < VGIC_NR_SGIS) + +#define VGIC_AFFINITY_0_SHIFT 0 +#define VGIC_AFFINITY_0_MASK (0xffUL << VGIC_AFFINITY_0_SHIFT) +#define VGIC_AFFINITY_1_SHIFT 8 +#define VGIC_AFFINITY_1_MASK (0xffUL << VGIC_AFFINITY_1_SHIFT) +#define VGIC_AFFINITY_2_SHIFT 16 +#define VGIC_AFFINITY_2_MASK (0xffUL << VGIC_AFFINITY_2_SHIFT) +#define VGIC_AFFINITY_3_SHIFT 24 +#define VGIC_AFFINITY_3_MASK (0xffUL << VGIC_AFFINITY_3_SHIFT) + +#define VGIC_AFFINITY_LEVEL(reg, level) \ + ((((reg) & VGIC_AFFINITY_## level ##_MASK) \ + >> VGIC_AFFINITY_## level ##_SHIFT) << MPIDR_LEVEL_SHIFT(level)) + +/* + * The Userspace encodes the affinity differently from the MPIDR, + * Below macro converts vgic userspace format to MPIDR reg format. + */ +#define VGIC_TO_MPIDR(val) (VGIC_AFFINITY_LEVEL(val, 0) | \ + VGIC_AFFINITY_LEVEL(val, 1) | \ + VGIC_AFFINITY_LEVEL(val, 2) | \ + VGIC_AFFINITY_LEVEL(val, 3)) + +/* + * As per Documentation/virt/kvm/devices/arm-vgic-v3.rst, + * below macros are defined for CPUREG encoding. + */ +#define KVM_REG_ARM_VGIC_SYSREG_OP0_MASK 0x000000000000c000 +#define KVM_REG_ARM_VGIC_SYSREG_OP0_SHIFT 14 +#define KVM_REG_ARM_VGIC_SYSREG_OP1_MASK 0x0000000000003800 +#define KVM_REG_ARM_VGIC_SYSREG_OP1_SHIFT 11 +#define KVM_REG_ARM_VGIC_SYSREG_CRN_MASK 0x0000000000000780 +#define KVM_REG_ARM_VGIC_SYSREG_CRN_SHIFT 7 +#define KVM_REG_ARM_VGIC_SYSREG_CRM_MASK 0x0000000000000078 +#define KVM_REG_ARM_VGIC_SYSREG_CRM_SHIFT 3 +#define KVM_REG_ARM_VGIC_SYSREG_OP2_MASK 0x0000000000000007 +#define KVM_REG_ARM_VGIC_SYSREG_OP2_SHIFT 0 + +#define KVM_DEV_ARM_VGIC_SYSREG_MASK (KVM_REG_ARM_VGIC_SYSREG_OP0_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_OP1_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_CRN_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_CRM_MASK | \ + KVM_REG_ARM_VGIC_SYSREG_OP2_MASK) + +/* + * As per Documentation/virt/kvm/devices/arm-vgic-its.rst, + * below macros are defined for ITS table entry encoding. + */ +#define KVM_ITS_CTE_VALID_SHIFT 63 +#define KVM_ITS_CTE_VALID_MASK BIT_ULL(63) +#define KVM_ITS_CTE_RDBASE_SHIFT 16 +#define KVM_ITS_CTE_ICID_MASK GENMASK_ULL(15, 0) +#define KVM_ITS_ITE_NEXT_SHIFT 48 +#define KVM_ITS_ITE_PINTID_SHIFT 16 +#define KVM_ITS_ITE_PINTID_MASK GENMASK_ULL(47, 16) +#define KVM_ITS_ITE_ICID_MASK GENMASK_ULL(15, 0) +#define KVM_ITS_DTE_VALID_SHIFT 63 +#define KVM_ITS_DTE_VALID_MASK BIT_ULL(63) +#define KVM_ITS_DTE_NEXT_SHIFT 49 +#define KVM_ITS_DTE_NEXT_MASK GENMASK_ULL(62, 49) +#define KVM_ITS_DTE_ITTADDR_SHIFT 5 +#define KVM_ITS_DTE_ITTADDR_MASK GENMASK_ULL(48, 5) +#define KVM_ITS_DTE_SIZE_MASK GENMASK_ULL(4, 0) +#define KVM_ITS_L1E_VALID_MASK BIT_ULL(63) +/* we only support 64 kB translation table page size */ +#define KVM_ITS_L1E_ADDR_MASK GENMASK_ULL(51, 16) + +#define KVM_VGIC_V3_RDIST_INDEX_MASK GENMASK_ULL(11, 0) +#define KVM_VGIC_V3_RDIST_FLAGS_MASK GENMASK_ULL(15, 12) +#define KVM_VGIC_V3_RDIST_FLAGS_SHIFT 12 +#define KVM_VGIC_V3_RDIST_BASE_MASK GENMASK_ULL(51, 16) +#define KVM_VGIC_V3_RDIST_COUNT_MASK GENMASK_ULL(63, 52) +#define KVM_VGIC_V3_RDIST_COUNT_SHIFT 52 + +#ifdef CONFIG_DEBUG_SPINLOCK +#define DEBUG_SPINLOCK_BUG_ON(p) BUG_ON(p) +#else +#define DEBUG_SPINLOCK_BUG_ON(p) +#endif + +/* Requires the irq_lock to be held by the caller. */ +static inline bool irq_is_pending(struct vgic_irq *irq) +{ + if (irq->config == VGIC_CONFIG_EDGE) + return irq->pending_latch; + else + return irq->pending_latch || irq->line_level; +} + +static inline bool vgic_irq_is_mapped_level(struct vgic_irq *irq) +{ + return irq->config == VGIC_CONFIG_LEVEL && irq->hw; +} + +static inline int vgic_irq_get_lr_count(struct vgic_irq *irq) +{ + /* Account for the active state as an interrupt */ + if (vgic_irq_is_sgi(irq->intid) && irq->source) + return hweight8(irq->source) + irq->active; + + return irq_is_pending(irq) || irq->active; +} + +static inline bool vgic_irq_is_multi_sgi(struct vgic_irq *irq) +{ + return vgic_irq_get_lr_count(irq) > 1; +} + +/* + * This struct provides an intermediate representation of the fields contained + * in the GICH_VMCR and ICH_VMCR registers, such that code exporting the GIC + * state to userspace can generate either GICv2 or GICv3 CPU interface + * registers regardless of the hardware backed GIC used. + */ +struct vgic_vmcr { + u32 grpen0; + u32 grpen1; + + u32 ackctl; + u32 fiqen; + u32 cbpr; + u32 eoim; + + u32 abpr; + u32 bpr; + u32 pmr; /* Priority mask field in the GICC_PMR and + * ICC_PMR_EL1 priority field format */ +}; + +struct vgic_reg_attr { + struct kvm_vcpu *vcpu; + gpa_t addr; +}; + +int vgic_v3_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr); +int vgic_v2_parse_attr(struct kvm_device *dev, struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr); +const struct vgic_register_region * +vgic_get_mmio_region(struct kvm_vcpu *vcpu, struct vgic_io_device *iodev, + gpa_t addr, int len); +struct vgic_irq *vgic_get_irq(struct kvm *kvm, struct kvm_vcpu *vcpu, + u32 intid); +void __vgic_put_lpi_locked(struct kvm *kvm, struct vgic_irq *irq); +void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq); +bool vgic_get_phys_line_level(struct vgic_irq *irq); +void vgic_irq_set_phys_pending(struct vgic_irq *irq, bool pending); +void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active); +bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, + unsigned long flags); +void vgic_kick_vcpus(struct kvm *kvm); + +int vgic_check_ioaddr(struct kvm *kvm, phys_addr_t *ioaddr, + phys_addr_t addr, phys_addr_t alignment); + +void vgic_v2_fold_lr_state(struct kvm_vcpu *vcpu); +void vgic_v2_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); +void vgic_v2_clear_lr(struct kvm_vcpu *vcpu, int lr); +void vgic_v2_set_underflow(struct kvm_vcpu *vcpu); +void vgic_v2_set_npie(struct kvm_vcpu *vcpu); +int vgic_v2_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); +int vgic_v2_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +int vgic_v2_cpuif_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +void vgic_v2_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v2_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v2_enable(struct kvm_vcpu *vcpu); +int vgic_v2_probe(const struct gic_kvm_info *info); +int vgic_v2_map_resources(struct kvm *kvm); +int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, + enum vgic_type); + +void vgic_v2_init_lrs(void); +void vgic_v2_load(struct kvm_vcpu *vcpu); +void vgic_v2_put(struct kvm_vcpu *vcpu); +void vgic_v2_vmcr_sync(struct kvm_vcpu *vcpu); + +void vgic_v2_save_state(struct kvm_vcpu *vcpu); +void vgic_v2_restore_state(struct kvm_vcpu *vcpu); + +static inline void vgic_get_irq_kref(struct vgic_irq *irq) +{ + if (irq->intid < VGIC_MIN_LPI) + return; + + kref_get(&irq->refcount); +} + +void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); +void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); +void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr); +void vgic_v3_set_underflow(struct kvm_vcpu *vcpu); +void vgic_v3_set_npie(struct kvm_vcpu *vcpu); +void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_v3_enable(struct kvm_vcpu *vcpu); +int vgic_v3_probe(const struct gic_kvm_info *info); +int vgic_v3_map_resources(struct kvm *kvm); +int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq); +int vgic_v3_save_pending_tables(struct kvm *kvm); +int vgic_v3_set_redist_base(struct kvm *kvm, u32 index, u64 addr, u32 count); +int vgic_register_redist_iodev(struct kvm_vcpu *vcpu); +bool vgic_v3_check_base(struct kvm *kvm); + +void vgic_v3_load(struct kvm_vcpu *vcpu); +void vgic_v3_put(struct kvm_vcpu *vcpu); +void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu); + +bool vgic_has_its(struct kvm *kvm); +int kvm_vgic_register_its_device(void); +void vgic_enable_lpis(struct kvm_vcpu *vcpu); +void vgic_flush_pending_lpis(struct kvm_vcpu *vcpu); +int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); +int vgic_v3_has_attr_regs(struct kvm_device *dev, struct kvm_device_attr *attr); +int vgic_v3_dist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +int vgic_v3_redist_uaccess(struct kvm_vcpu *vcpu, bool is_write, + int offset, u32 *val); +int vgic_v3_cpu_sysregs_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u64 id, u64 *val); +int vgic_v3_has_cpu_sysregs_attr(struct kvm_vcpu *vcpu, bool is_write, u64 id, + u64 *reg); +int vgic_v3_line_level_info_uaccess(struct kvm_vcpu *vcpu, bool is_write, + u32 intid, u64 *val); +int kvm_register_vgic_device(unsigned long type); +void vgic_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +void vgic_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr); +int vgic_lazy_init(struct kvm *kvm); +int vgic_init(struct kvm *kvm); + +void vgic_debug_init(struct kvm *kvm); +void vgic_debug_destroy(struct kvm *kvm); + +bool lock_all_vcpus(struct kvm *kvm); +void unlock_all_vcpus(struct kvm *kvm); + +static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu) +{ + struct vgic_cpu *cpu_if = &vcpu->arch.vgic_cpu; + + /* + * num_pri_bits are initialized with HW supported values. + * We can rely safely on num_pri_bits even if VM has not + * restored ICC_CTLR_EL1 before restoring APnR registers. + */ + switch (cpu_if->num_pri_bits) { + case 7: return 3; + case 6: return 1; + default: return 0; + } +} + +static inline bool +vgic_v3_redist_region_full(struct vgic_redist_region *region) +{ + if (!region->count) + return false; + + return (region->free_index >= region->count); +} + +struct vgic_redist_region *vgic_v3_rdist_free_slot(struct list_head *rdregs); + +static inline size_t +vgic_v3_rd_region_size(struct kvm *kvm, struct vgic_redist_region *rdreg) +{ + if (!rdreg->count) + return atomic_read(&kvm->online_vcpus) * KVM_VGIC_V3_REDIST_SIZE; + else + return rdreg->count * KVM_VGIC_V3_REDIST_SIZE; +} + +struct vgic_redist_region *vgic_v3_rdist_region_from_index(struct kvm *kvm, + u32 index); + +bool vgic_v3_rdist_overlap(struct kvm *kvm, gpa_t base, size_t size); + +static inline bool vgic_dist_overlap(struct kvm *kvm, gpa_t base, size_t size) +{ + struct vgic_dist *d = &kvm->arch.vgic; + + return (base + size > d->vgic_dist_base) && + (base < d->vgic_dist_base + KVM_VGIC_V3_DIST_SIZE); +} + +int vgic_copy_lpi_list(struct kvm *kvm, struct kvm_vcpu *vcpu, u32 **intid_ptr); +int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its, + u32 devid, u32 eventid, struct vgic_irq **irq); +struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi); +int vgic_its_inject_cached_translation(struct kvm *kvm, struct kvm_msi *msi); +void vgic_lpi_translation_cache_init(struct kvm *kvm); +void vgic_lpi_translation_cache_destroy(struct kvm *kvm); +void vgic_its_invalidate_cache(struct kvm *kvm); + +bool vgic_supports_direct_msis(struct kvm *kvm); +int vgic_v4_init(struct kvm *kvm); +void vgic_v4_teardown(struct kvm *kvm); +void vgic_v4_configure_vsgis(struct kvm *kvm); + +#endif diff --git a/arch/arm64/lib/copy_from_user.S b/arch/arm64/lib/copy_from_user.S index 8e25e89ad01f..0f8a3a9e3795 100644 --- a/arch/arm64/lib/copy_from_user.S +++ b/arch/arm64/lib/copy_from_user.S @@ -20,36 +20,36 @@ * x0 - bytes not copied */ - .macro ldrb1 ptr, regB, val - uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val + .macro ldrb1 reg, ptr, val + uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val .endm - .macro strb1 ptr, regB, val - strb \ptr, [\regB], \val + .macro strb1 reg, ptr, val + strb \reg, [\ptr], \val .endm - .macro ldrh1 ptr, regB, val - uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val + .macro ldrh1 reg, ptr, val + uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val .endm - .macro strh1 ptr, regB, val - strh \ptr, [\regB], \val + .macro strh1 reg, ptr, val + strh \reg, [\ptr], \val .endm - .macro ldr1 ptr, regB, val - uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val + .macro ldr1 reg, ptr, val + uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val .endm - .macro str1 ptr, regB, val - str \ptr, [\regB], \val + .macro str1 reg, ptr, val + str \reg, [\ptr], \val .endm - .macro ldp1 ptr, regB, regC, val - uao_ldp 9998f, \ptr, \regB, \regC, \val + .macro ldp1 reg1, reg2, ptr, val + uao_ldp 9998f, \reg1, \reg2, \ptr, \val .endm - .macro stp1 ptr, regB, regC, val - stp \ptr, \regB, [\regC], \val + .macro stp1 reg1, reg2, ptr, val + stp \reg1, \reg2, [\ptr], \val .endm end .req x5 diff --git a/arch/arm64/lib/copy_in_user.S b/arch/arm64/lib/copy_in_user.S index 667139013ed1..80e37ada0ee1 100644 --- a/arch/arm64/lib/copy_in_user.S +++ b/arch/arm64/lib/copy_in_user.S @@ -21,36 +21,36 @@ * Returns: * x0 - bytes not copied */ - .macro ldrb1 ptr, regB, val - uao_user_alternative 9998f, ldrb, ldtrb, \ptr, \regB, \val + .macro ldrb1 reg, ptr, val + uao_user_alternative 9998f, ldrb, ldtrb, \reg, \ptr, \val .endm - .macro strb1 ptr, regB, val - uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val + .macro strb1 reg, ptr, val + uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val .endm - .macro ldrh1 ptr, regB, val - uao_user_alternative 9998f, ldrh, ldtrh, \ptr, \regB, \val + .macro ldrh1 reg, ptr, val + uao_user_alternative 9998f, ldrh, ldtrh, \reg, \ptr, \val .endm - .macro strh1 ptr, regB, val - uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val + .macro strh1 reg, ptr, val + uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val .endm - .macro ldr1 ptr, regB, val - uao_user_alternative 9998f, ldr, ldtr, \ptr, \regB, \val + .macro ldr1 reg, ptr, val + uao_user_alternative 9998f, ldr, ldtr, \reg, \ptr, \val .endm - .macro str1 ptr, regB, val - uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val + .macro str1 reg, ptr, val + uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val .endm - .macro ldp1 ptr, regB, regC, val - uao_ldp 9998f, \ptr, \regB, \regC, \val + .macro ldp1 reg1, reg2, ptr, val + uao_ldp 9998f, \reg1, \reg2, \ptr, \val .endm - .macro stp1 ptr, regB, regC, val - uao_stp 9998f, \ptr, \regB, \regC, \val + .macro stp1 reg1, reg2, ptr, val + uao_stp 9998f, \reg1, \reg2, \ptr, \val .endm end .req x5 diff --git a/arch/arm64/lib/copy_to_user.S b/arch/arm64/lib/copy_to_user.S index 1a104d0089f3..4ec59704b8f2 100644 --- a/arch/arm64/lib/copy_to_user.S +++ b/arch/arm64/lib/copy_to_user.S @@ -19,36 +19,36 @@ * Returns: * x0 - bytes not copied */ - .macro ldrb1 ptr, regB, val - ldrb \ptr, [\regB], \val + .macro ldrb1 reg, ptr, val + ldrb \reg, [\ptr], \val .endm - .macro strb1 ptr, regB, val - uao_user_alternative 9998f, strb, sttrb, \ptr, \regB, \val + .macro strb1 reg, ptr, val + uao_user_alternative 9998f, strb, sttrb, \reg, \ptr, \val .endm - .macro ldrh1 ptr, regB, val - ldrh \ptr, [\regB], \val + .macro ldrh1 reg, ptr, val + ldrh \reg, [\ptr], \val .endm - .macro strh1 ptr, regB, val - uao_user_alternative 9998f, strh, sttrh, \ptr, \regB, \val + .macro strh1 reg, ptr, val + uao_user_alternative 9998f, strh, sttrh, \reg, \ptr, \val .endm - .macro ldr1 ptr, regB, val - ldr \ptr, [\regB], \val + .macro ldr1 reg, ptr, val + ldr \reg, [\ptr], \val .endm - .macro str1 ptr, regB, val - uao_user_alternative 9998f, str, sttr, \ptr, \regB, \val + .macro str1 reg, ptr, val + uao_user_alternative 9998f, str, sttr, \reg, \ptr, \val .endm - .macro ldp1 ptr, regB, regC, val - ldp \ptr, \regB, [\regC], \val + .macro ldp1 reg1, reg2, ptr, val + ldp \reg1, \reg2, [\ptr], \val .endm - .macro stp1 ptr, regB, regC, val - uao_stp 9998f, \ptr, \regB, \regC, \val + .macro stp1 reg1, reg2, ptr, val + uao_stp 9998f, \reg1, \reg2, \ptr, \val .endm end .req x5 diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S index 243e107e9896..0f9e10ecda23 100644 --- a/arch/arm64/lib/crc32.S +++ b/arch/arm64/lib/crc32.S @@ -9,7 +9,7 @@ #include <asm/alternative.h> #include <asm/assembler.h> - .cpu generic+crc + .arch armv8-a+crc .macro __crc32, c cmp x2, #16 diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index 9f382adfa88a..e0bf83d556f2 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -24,36 +24,36 @@ * Returns: * x0 - dest */ - .macro ldrb1 ptr, regB, val - ldrb \ptr, [\regB], \val + .macro ldrb1 reg, ptr, val + ldrb \reg, [\ptr], \val .endm - .macro strb1 ptr, regB, val - strb \ptr, [\regB], \val + .macro strb1 reg, ptr, val + strb \reg, [\ptr], \val .endm - .macro ldrh1 ptr, regB, val - ldrh \ptr, [\regB], \val + .macro ldrh1 reg, ptr, val + ldrh \reg, [\ptr], \val .endm - .macro strh1 ptr, regB, val - strh \ptr, [\regB], \val + .macro strh1 reg, ptr, val + strh \reg, [\ptr], \val .endm - .macro ldr1 ptr, regB, val - ldr \ptr, [\regB], \val + .macro ldr1 reg, ptr, val + ldr \reg, [\ptr], \val .endm - .macro str1 ptr, regB, val - str \ptr, [\regB], \val + .macro str1 reg, ptr, val + str \reg, [\ptr], \val .endm - .macro ldp1 ptr, regB, regC, val - ldp \ptr, \regB, [\regC], \val + .macro ldp1 reg1, reg2, ptr, val + ldp \reg1, \reg2, [\ptr], \val .endm - .macro stp1 ptr, regB, regC, val - stp \ptr, \regB, [\regC], \val + .macro stp1 reg1, reg2, ptr, val + stp \reg1, \reg2, [\ptr], \val .endm .weak memcpy diff --git a/arch/arm64/mm/context.c b/arch/arm64/mm/context.c index 9b26f9a88724..d702d60e64da 100644 --- a/arch/arm64/mm/context.c +++ b/arch/arm64/mm/context.c @@ -92,6 +92,9 @@ static void set_reserved_asid_bits(void) bitmap_clear(asid_map, 0, NUM_USER_ASIDS); } +#define asid_gen_match(asid) \ + (!(((asid) ^ atomic64_read(&asid_generation)) >> asid_bits)) + static void flush_context(void) { int i; @@ -220,8 +223,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) * because atomic RmWs are totally ordered for a given location. */ old_active_asid = atomic64_read(&per_cpu(active_asids, cpu)); - if (old_active_asid && - !((asid ^ atomic64_read(&asid_generation)) >> asid_bits) && + if (old_active_asid && asid_gen_match(asid) && atomic64_cmpxchg_relaxed(&per_cpu(active_asids, cpu), old_active_asid, asid)) goto switch_mm_fastpath; @@ -229,7 +231,7 @@ void check_and_switch_context(struct mm_struct *mm, unsigned int cpu) raw_spin_lock_irqsave(&cpu_asid_lock, flags); /* Check that our ASID belongs to the current generation. */ asid = atomic64_read(&mm->context.id); - if ((asid ^ atomic64_read(&asid_generation)) >> asid_bits) { + if (!asid_gen_match(asid)) { asid = new_context(mm); atomic64_set(&mm->context.id, asid); } diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 860c00ec8bd3..0da020c563e6 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -146,6 +146,11 @@ static const struct prot_bits pte_bits[] = { .set = "UXN", .clear = " ", }, { + .mask = PTE_GP, + .val = PTE_GP, + .set = "GP", + .clear = " ", + }, { .mask = PTE_ATTRINDX_MASK, .val = PTE_ATTRINDX(MT_DEVICE_nGnRnE), .set = "DEVICE/nGnRnE", @@ -247,7 +252,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) } static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, - unsigned long val) + u64 val) { struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); static const char units[] = "KMGTPE"; diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index c9cedc0432d2..dff2d72b0883 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -635,11 +635,13 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) inf = esr_to_fault_info(esr); - /* - * Return value ignored as we rely on signal merging. - * Future patches will make this more robust. - */ - apei_claim_sea(regs); + if (user_mode(regs) && apei_claim_sea(regs) == 0) { + /* + * APEI claimed this as a firmware-first notification. + * Some processing deferred to task_work before ret_to_user(). + */ + return 0; + } if (esr & ESR_ELx_FnV) siaddr = NULL; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index e42727e3568e..d2df416b840e 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -272,7 +272,7 @@ int pfn_valid(unsigned long pfn) if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS) return 0; - if (!valid_section(__nr_to_section(pfn_to_section_nr(pfn)))) + if (!valid_section(__pfn_to_section(pfn))) return 0; #endif return memblock_is_map_memory(addr); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index a374e4f51a62..c299b73dd5e4 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -610,6 +610,22 @@ core_initcall(map_entry_trampoline); #endif /* + * Open coded check for BTI, only for use to determine configuration + * for early mappings for before the cpufeature code has run. + */ +static bool arm64_early_this_cpu_has_bti(void) +{ + u64 pfr1; + + if (!IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + return false; + + pfr1 = read_sysreg_s(SYS_ID_AA64PFR1_EL1); + return cpuid_feature_extract_unsigned_field(pfr1, + ID_AA64PFR1_BT_SHIFT); +} + +/* * Create fine-grained mappings for the kernel. */ static void __init map_kernel(pgd_t *pgdp) @@ -625,6 +641,14 @@ static void __init map_kernel(pgd_t *pgdp) pgprot_t text_prot = rodata_enabled ? PAGE_KERNEL_ROX : PAGE_KERNEL_EXEC; /* + * If we have a CPU that supports BTI and a kernel built for + * BTI then mark the kernel executable text as guarded pages + * now so we don't have to rewrite the page tables later. + */ + if (arm64_early_this_cpu_has_bti()) + text_prot = __pgprot_modify(text_prot, PTE_GP, PTE_GP); + + /* * Only rodata will be remapped with different permissions later on, * all other segments are allowed to use contiguous mappings. */ diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 250c49008d73..bde08090b838 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -126,13 +126,13 @@ int set_memory_nx(unsigned long addr, int numpages) { return change_memory_common(addr, numpages, __pgprot(PTE_PXN), - __pgprot(0)); + __pgprot(PTE_MAYBE_GP)); } int set_memory_x(unsigned long addr, int numpages) { return change_memory_common(addr, numpages, - __pgprot(0), + __pgprot(PTE_MAYBE_GP), __pgprot(PTE_PXN)); } diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 197a9ba2d5ea..b7bebb12a56d 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -58,6 +58,8 @@ * cpu_do_suspend - save CPU registers context * * x0: virtual address of context pointer + * + * This must be kept in sync with struct cpu_suspend_ctx in <asm/suspend.h>. */ SYM_FUNC_START(cpu_do_suspend) mrs x2, tpidr_el0 @@ -82,6 +84,11 @@ alternative_endif stp x8, x9, [x0, #48] stp x10, x11, [x0, #64] stp x12, x13, [x0, #80] + /* + * Save x18 as it may be used as a platform register, e.g. by shadow + * call stack. + */ + str x18, [x0, #96] ret SYM_FUNC_END(cpu_do_suspend) @@ -98,6 +105,13 @@ SYM_FUNC_START(cpu_do_resume) ldp x9, x10, [x0, #48] ldp x11, x12, [x0, #64] ldp x13, x14, [x0, #80] + /* + * Restore x18, as it may be used as a platform register, and clear + * the buffer to minimize the risk of exposure when used for shadow + * call stack. + */ + ldr x18, [x0, #96] + str xzr, [x0, #96] msr tpidr_el0, x2 msr tpidrro_el0, x3 msr contextidr_el1, x4 @@ -139,7 +153,7 @@ alternative_if ARM64_HAS_RAS_EXTN msr_s SYS_DISR_EL1, xzr alternative_else_nop_endif - ptrauth_keys_install_kernel x14, 0, x1, x2, x3 + ptrauth_keys_install_kernel_nosync x14, x1, x2, x3 isb ret SYM_FUNC_END(cpu_do_resume) @@ -386,8 +400,6 @@ SYM_FUNC_END(idmap_kpti_install_ng_mappings) * * Initialise the processor for turning the MMU on. * - * Input: - * x0 with a flag ARM64_CPU_BOOT_PRIMARY/ARM64_CPU_BOOT_SECONDARY/ARM64_CPU_RUNTIME. * Output: * Return in x0 the value of the SCTLR_EL1 register. */ @@ -446,51 +458,9 @@ SYM_FUNC_START(__cpu_setup) 1: #endif /* CONFIG_ARM64_HW_AFDBM */ msr tcr_el1, x10 - mov x1, x0 /* * Prepare SCTLR */ mov_q x0, SCTLR_EL1_SET - -#ifdef CONFIG_ARM64_PTR_AUTH - /* No ptrauth setup for run time cpus */ - cmp x1, #ARM64_CPU_RUNTIME - b.eq 3f - - /* Check if the CPU supports ptrauth */ - mrs x2, id_aa64isar1_el1 - ubfx x2, x2, #ID_AA64ISAR1_APA_SHIFT, #8 - cbz x2, 3f - - /* - * The primary cpu keys are reset here and can be - * re-initialised with some proper values later. - */ - msr_s SYS_APIAKEYLO_EL1, xzr - msr_s SYS_APIAKEYHI_EL1, xzr - - /* Just enable ptrauth for primary cpu */ - cmp x1, #ARM64_CPU_BOOT_PRIMARY - b.eq 2f - - /* if !system_supports_address_auth() then skip enable */ -alternative_if_not ARM64_HAS_ADDRESS_AUTH - b 3f -alternative_else_nop_endif - - /* Install ptrauth key for secondary cpus */ - adr_l x2, secondary_data - ldr x3, [x2, #CPU_BOOT_TASK] // get secondary_data.task - cbz x3, 2f // check for slow booting cpus - ldp x3, x4, [x2, #CPU_BOOT_PTRAUTH_KEY] - msr_s SYS_APIAKEYLO_EL1, x3 - msr_s SYS_APIAKEYHI_EL1, x4 - -2: /* Enable ptrauth instructions */ - ldr x2, =SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ - SCTLR_ELx_ENDA | SCTLR_ELx_ENDB - orr x0, x0, x2 -3: -#endif ret // return to head.S SYM_FUNC_END(__cpu_setup) diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index eb73f9f72c46..cc0cf0f5c7c3 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -100,6 +100,14 @@ /* Rd = Rn OP imm12 */ #define A64_ADD_I(sf, Rd, Rn, imm12) A64_ADDSUB_IMM(sf, Rd, Rn, imm12, ADD) #define A64_SUB_I(sf, Rd, Rn, imm12) A64_ADDSUB_IMM(sf, Rd, Rn, imm12, SUB) +#define A64_ADDS_I(sf, Rd, Rn, imm12) \ + A64_ADDSUB_IMM(sf, Rd, Rn, imm12, ADD_SETFLAGS) +#define A64_SUBS_I(sf, Rd, Rn, imm12) \ + A64_ADDSUB_IMM(sf, Rd, Rn, imm12, SUB_SETFLAGS) +/* Rn + imm12; set condition flags */ +#define A64_CMN_I(sf, Rn, imm12) A64_ADDS_I(sf, A64_ZR, Rn, imm12) +/* Rn - imm12; set condition flags */ +#define A64_CMP_I(sf, Rn, imm12) A64_SUBS_I(sf, A64_ZR, Rn, imm12) /* Rd = Rn */ #define A64_MOV(sf, Rd, Rn) A64_ADD_I(sf, Rd, Rn, 0) @@ -189,4 +197,26 @@ /* Rn & Rm; set condition flags */ #define A64_TST(sf, Rn, Rm) A64_ANDS(sf, A64_ZR, Rn, Rm) +/* Logical (immediate) */ +#define A64_LOGIC_IMM(sf, Rd, Rn, imm, type) ({ \ + u64 imm64 = (sf) ? (u64)imm : (u64)(u32)imm; \ + aarch64_insn_gen_logical_immediate(AARCH64_INSN_LOGIC_##type, \ + A64_VARIANT(sf), Rn, Rd, imm64); \ +}) +/* Rd = Rn OP imm */ +#define A64_AND_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, AND) +#define A64_ORR_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, ORR) +#define A64_EOR_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, EOR) +#define A64_ANDS_I(sf, Rd, Rn, imm) A64_LOGIC_IMM(sf, Rd, Rn, imm, AND_SETFLAGS) +/* Rn & imm; set condition flags */ +#define A64_TST_I(sf, Rn, imm) A64_ANDS_I(sf, A64_ZR, Rn, imm) + +/* HINTs */ +#define A64_HINT(x) aarch64_insn_gen_hint(x) + +/* BTI */ +#define A64_BTI_C A64_HINT(AARCH64_INSN_HINT_BTIC) +#define A64_BTI_J A64_HINT(AARCH64_INSN_HINT_BTIJ) +#define A64_BTI_JC A64_HINT(AARCH64_INSN_HINT_BTIJC) + #endif /* _BPF_JIT_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index cdc79de0c794..3cb25b43b368 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -167,11 +167,21 @@ static inline int epilogue_offset(const struct jit_ctx *ctx) return to - from; } +static bool is_addsub_imm(u32 imm) +{ + /* Either imm12 or shifted imm12. */ + return !(imm & ~0xfff) || !(imm & ~0xfff000); +} + /* Stack must be multiples of 16B */ #define STACK_ALIGN(sz) (((sz) + 15) & ~15) /* Tail call offset to jump into */ +#if IS_ENABLED(CONFIG_ARM64_BTI_KERNEL) +#define PROLOGUE_OFFSET 8 +#else #define PROLOGUE_OFFSET 7 +#endif static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) { @@ -208,6 +218,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) * */ + /* BTI landing pad */ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + emit(A64_BTI_C, ctx); + /* Save FP and LR registers to stay align with ARM64 AAPCS */ emit(A64_PUSH(A64_FP, A64_LR, A64_SP), ctx); emit(A64_MOV(1, A64_FP, A64_SP), ctx); @@ -230,6 +244,10 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf) cur_offset, PROLOGUE_OFFSET); return -1; } + + /* BTI landing pad for the tail call, done with a BR */ + if (IS_ENABLED(CONFIG_ARM64_BTI_KERNEL)) + emit(A64_BTI_J, ctx); } ctx->stack_size = STACK_ALIGN(prog->aux->stack_depth); @@ -356,6 +374,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const bool isdw = BPF_SIZE(code) == BPF_DW; u8 jmp_cond, reg; s32 jmp_offset; + u32 a64_insn; #define check_imm(bits, imm) do { \ if ((((imm) > 0) && ((imm) >> (bits))) || \ @@ -478,28 +497,55 @@ emit_bswap_uxt: /* dst = dst OP imm */ case BPF_ALU | BPF_ADD | BPF_K: case BPF_ALU64 | BPF_ADD | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_ADD(is64, dst, dst, tmp), ctx); + if (is_addsub_imm(imm)) { + emit(A64_ADD_I(is64, dst, dst, imm), ctx); + } else if (is_addsub_imm(-imm)) { + emit(A64_SUB_I(is64, dst, dst, -imm), ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_ADD(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_SUB | BPF_K: case BPF_ALU64 | BPF_SUB | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_SUB(is64, dst, dst, tmp), ctx); + if (is_addsub_imm(imm)) { + emit(A64_SUB_I(is64, dst, dst, imm), ctx); + } else if (is_addsub_imm(-imm)) { + emit(A64_ADD_I(is64, dst, dst, -imm), ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_SUB(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_AND | BPF_K: case BPF_ALU64 | BPF_AND | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_AND(is64, dst, dst, tmp), ctx); + a64_insn = A64_AND_I(is64, dst, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_AND(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_OR | BPF_K: case BPF_ALU64 | BPF_OR | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_ORR(is64, dst, dst, tmp), ctx); + a64_insn = A64_ORR_I(is64, dst, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_ORR(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_XOR | BPF_K: case BPF_ALU64 | BPF_XOR | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_EOR(is64, dst, dst, tmp), ctx); + a64_insn = A64_EOR_I(is64, dst, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_EOR(is64, dst, dst, tmp), ctx); + } break; case BPF_ALU | BPF_MUL | BPF_K: case BPF_ALU64 | BPF_MUL | BPF_K: @@ -623,13 +669,24 @@ emit_cond_jmp: case BPF_JMP32 | BPF_JSLT | BPF_K: case BPF_JMP32 | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_CMP(is64, dst, tmp), ctx); + if (is_addsub_imm(imm)) { + emit(A64_CMP_I(is64, dst, imm), ctx); + } else if (is_addsub_imm(-imm)) { + emit(A64_CMN_I(is64, dst, -imm), ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_CMP(is64, dst, tmp), ctx); + } goto emit_cond_jmp; case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: - emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_TST(is64, dst, tmp), ctx); + a64_insn = A64_TST_I(is64, dst, imm); + if (a64_insn != AARCH64_BREAK_FAULT) { + emit(a64_insn, ctx); + } else { + emit_a64_mov_i(is64, tmp, imm, ctx); + emit(A64_TST(is64, dst, tmp), ctx); + } goto emit_cond_jmp; /* function call */ case BPF_JMP | BPF_CALL: diff --git a/arch/c6x/lib/checksum.c b/arch/c6x/lib/checksum.c index 46940844c553..335ca4900808 100644 --- a/arch/c6x/lib/checksum.c +++ b/arch/c6x/lib/checksum.c @@ -4,28 +4,6 @@ #include <linux/module.h> #include <net/checksum.h> -#include <asm/byteorder.h> - -/* - * copy from fs while checksumming, otherwise like csum_partial - */ -__wsum -csum_partial_copy_from_user(const void __user *src, void *dst, int len, - __wsum sum, int *csum_err) -{ - int missing; - - missing = __copy_from_user(dst, src, len); - if (missing) { - memset(dst + len - missing, 0, missing); - *csum_err = -EFAULT; - } else - *csum_err = 0; - - return csum_partial(dst, len, sum); -} -EXPORT_SYMBOL(csum_partial_copy_from_user); - /* These are from csum_64plus.S */ EXPORT_SYMBOL(csum_partial); EXPORT_SYMBOL(csum_partial_copy); diff --git a/arch/ia64/include/asm/checksum.h b/arch/ia64/include/asm/checksum.h index 0ed18bc3f6cf..2a1c64629cdc 100644 --- a/arch/ia64/include/asm/checksum.h +++ b/arch/ia64/include/asm/checksum.h @@ -37,16 +37,6 @@ extern __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, */ extern __wsum csum_partial(const void *buff, int len, __wsum sum); -/* - * Same as csum_partial, but copies from src while it checksums. - * - * Here it is even more important to align src and dst on a 32-bit (or - * even better 64-bit) boundary. - */ -extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, - int *errp); - extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum); diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c index f69f3fe0532e..a54eacbc61a9 100644 --- a/arch/ia64/kernel/efi.c +++ b/arch/ia64/kernel/efi.c @@ -57,12 +57,12 @@ unsigned long hcdp_phys = EFI_INVALID_TABLE_ADDR; unsigned long sal_systab_phys = EFI_INVALID_TABLE_ADDR; static const efi_config_table_type_t arch_tables[] __initconst = { - {ESI_TABLE_GUID, "ESI", &esi_phys}, - {HCDP_TABLE_GUID, "HCDP", &hcdp_phys}, - {MPS_TABLE_GUID, "MPS", &mps_phys}, - {PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, "PALO", &palo_phys}, - {SAL_SYSTEM_TABLE_GUID, "SALsystab", &sal_systab_phys}, - {NULL_GUID, NULL, 0}, + {ESI_TABLE_GUID, &esi_phys, "ESI" }, + {HCDP_TABLE_GUID, &hcdp_phys, "HCDP" }, + {MPS_TABLE_GUID, &mps_phys, "MPS" }, + {PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, &palo_phys, "PALO" }, + {SAL_SYSTEM_TABLE_GUID, &sal_systab_phys, "SALsystab" }, + {}, }; extern efi_status_t efi_call_phys (void *, ...); diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 042911e670b8..49e325b604b3 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -358,3 +358,4 @@ # 435 reserved for clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c index bf9396b1ed32..6e82e0be8040 100644 --- a/arch/ia64/lib/csum_partial_copy.c +++ b/arch/ia64/lib/csum_partial_copy.c @@ -12,7 +12,7 @@ #include <linux/types.h> #include <linux/string.h> -#include <linux/uaccess.h> +#include <net/checksum.h> /* * XXX Fixme: those 2 inlines are meant for debugging and will go away @@ -103,39 +103,11 @@ out: * This is very ugly but temporary. THIS NEEDS SERIOUS ENHANCEMENTS. * But it's very tricky to get right even in C. */ -extern unsigned long do_csum(const unsigned char *, long); - -__wsum -csum_partial_copy_from_user(const void __user *src, void *dst, - int len, __wsum psum, int *errp) -{ - unsigned long result; - - /* XXX Fixme - * for now we separate the copy from checksum for obvious - * alignment difficulties. Look at the Alpha code and you'll be - * scared. - */ - - if (__copy_from_user(dst, src, len) != 0 && errp) - *errp = -EFAULT; - - result = do_csum(dst, len); - - /* add in old sum, and carry.. */ - result += (__force u32)psum; - /* 32+c bits -> 32 bits */ - result = (result & 0xffffffff) + (result >> 32); - return (__force __wsum)result; -} - -EXPORT_SYMBOL(csum_partial_copy_from_user); - __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) { - return csum_partial_copy_from_user((__force const void __user *)src, - dst, len, sum, NULL); + memcpy(dst, src, len); + return csum_partial(dst, len, sum); } EXPORT_SYMBOL(csum_partial_copy_nocheck); diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index c32ab8041cf6..4eb911d64e8d 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -221,6 +221,7 @@ static void __init amiga_identify(void) case AMI_1200: AMIGAHW_SET(A1200_IDE); AMIGAHW_SET(PCMCIA); + fallthrough; case AMI_500: case AMI_500PLUS: case AMI_1000: @@ -233,7 +234,7 @@ static void __init amiga_identify(void) case AMI_3000T: AMIGAHW_SET(AMBER_FF); AMIGAHW_SET(MAGIC_REKICK); - /* fall through */ + fallthrough; case AMI_3000PLUS: AMIGAHW_SET(A3000_SCSI); AMIGAHW_SET(A3000_CLK); @@ -242,7 +243,7 @@ static void __init amiga_identify(void) case AMI_4000T: AMIGAHW_SET(A4000_SCSI); - /* fall through */ + fallthrough; case AMI_4000: AMIGAHW_SET(A4000_IDE); AMIGAHW_SET(A3000_CLK); @@ -628,7 +629,7 @@ struct savekmsg { unsigned long magic2; /* SAVEKMSG_MAGIC2 */ unsigned long magicptr; /* address of magic1 */ unsigned long size; - char data[0]; + char data[]; }; static struct savekmsg *savekmsg; diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c index b4103b6bfdeb..9ef4ec0aea00 100644 --- a/arch/m68k/coldfire/device.c +++ b/arch/m68k/coldfire/device.c @@ -22,6 +22,7 @@ #include <asm/mcfqspi.h> #include <linux/platform_data/edma.h> #include <linux/platform_data/dma-mcf-edma.h> +#include <linux/platform_data/mmc-esdhc-mcf.h> /* * All current ColdFire parts contain from 2, 3, 4 or 10 UARTS. @@ -551,9 +552,35 @@ static struct platform_device mcf_edma = { .platform_data = &mcf_edma_data, } }; - #endif /* IS_ENABLED(CONFIG_MCF_EDMA) */ +#if IS_ENABLED(CONFIG_MMC) +static struct mcf_esdhc_platform_data mcf_esdhc_data = { + .max_bus_width = 4, + .cd_type = ESDHC_CD_NONE, +}; + +static struct resource mcf_esdhc_resources[] = { + { + .start = MCFSDHC_BASE, + .end = MCFSDHC_BASE + MCFSDHC_SIZE - 1, + .flags = IORESOURCE_MEM, + }, { + .start = MCF_IRQ_SDHC, + .end = MCF_IRQ_SDHC, + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device mcf_esdhc = { + .name = "sdhci-esdhc-mcf", + .id = 0, + .num_resources = ARRAY_SIZE(mcf_esdhc_resources), + .resource = mcf_esdhc_resources, + .dev.platform_data = &mcf_esdhc_data, +}; +#endif /* IS_ENABLED(CONFIG_MMC) */ + static struct platform_device *mcf_devices[] __initdata = { &mcf_uart, #if IS_ENABLED(CONFIG_FEC) @@ -586,6 +613,9 @@ static struct platform_device *mcf_devices[] __initdata = { #if IS_ENABLED(CONFIG_MCF_EDMA) &mcf_edma, #endif +#if IS_ENABLED(CONFIG_MMC) + &mcf_esdhc, +#endif }; /* @@ -614,4 +644,3 @@ static int __init mcf_init_devices(void) } arch_initcall(mcf_init_devices); - diff --git a/arch/m68k/coldfire/m5441x.c b/arch/m68k/coldfire/m5441x.c index 5bd24c9b865d..1e5259a652d1 100644 --- a/arch/m68k/coldfire/m5441x.c +++ b/arch/m68k/coldfire/m5441x.c @@ -52,7 +52,7 @@ DEFINE_CLK(0, "mcfssi.0", 47, MCF_CLK); DEFINE_CLK(0, "pll.0", 48, MCF_CLK); DEFINE_CLK(0, "mcfrng.0", 49, MCF_CLK); DEFINE_CLK(0, "mcfssi.1", 50, MCF_CLK); -DEFINE_CLK(0, "mcfsdhc.0", 51, MCF_CLK); +DEFINE_CLK(0, "sdhci-esdhc-mcf.0", 51, MCF_CLK); DEFINE_CLK(0, "enet-fec.0", 53, MCF_CLK); DEFINE_CLK(0, "enet-fec.1", 54, MCF_CLK); DEFINE_CLK(0, "switch.0", 55, MCF_CLK); @@ -74,6 +74,10 @@ DEFINE_CLK(1, "mcfpwm.0", 34, MCF_BUSCLK); DEFINE_CLK(1, "sys.0", 36, MCF_BUSCLK); DEFINE_CLK(1, "gpio.0", 37, MCF_BUSCLK); +DEFINE_CLK(2, "ipg.0", 0, MCF_CLK); +DEFINE_CLK(2, "ahb.0", 1, MCF_CLK); +DEFINE_CLK(2, "per.0", 2, MCF_CLK); + struct clk *mcf_clks[] = { &__clk_0_2, &__clk_0_8, @@ -131,6 +135,11 @@ struct clk *mcf_clks[] = { &__clk_1_34, &__clk_1_36, &__clk_1_37, + + &__clk_2_0, + &__clk_2_1, + &__clk_2_2, + NULL, }; @@ -151,6 +160,7 @@ static struct clk * const enable_clks[] __initconst = { &__clk_0_33, /* pit.1 */ &__clk_0_37, /* eport */ &__clk_0_48, /* pll */ + &__clk_0_51, /* esdhc */ &__clk_1_36, /* CCM/reset module/Power management */ &__clk_1_37, /* gpio */ @@ -194,6 +204,21 @@ static struct clk * const disable_clks[] __initconst = { &__clk_1_29, /* uart 9 */ }; +static void __clk_enable2(struct clk *clk) +{ + __raw_writel(__raw_readl(MCFSDHC_CLK) | (1 << clk->slot), MCFSDHC_CLK); +} + +static void __clk_disable2(struct clk *clk) +{ + __raw_writel(__raw_readl(MCFSDHC_CLK) & ~(1 << clk->slot), MCFSDHC_CLK); +} + +struct clk_ops clk_ops2 = { + .enable = __clk_enable2, + .disable = __clk_disable2, +}; + static void __init m5441x_clk_init(void) { unsigned i; diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 5b3a273ae3da..888b75e7fd79 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -100,7 +100,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -381,6 +380,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -452,6 +452,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_MSM6242=m CONFIG_RTC_DRV_RP5C01=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -472,6 +473,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -619,9 +621,11 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 0bf0907a7c80..45303846b659 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -96,7 +96,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -360,6 +359,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -408,6 +408,7 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -428,6 +429,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -575,9 +577,11 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 876e69292294..de824c1bc3d3 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -103,7 +103,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -376,6 +375,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -430,6 +430,7 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -450,6 +451,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -597,9 +599,11 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index aa59c242e715..071839ca6a59 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -93,7 +93,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -358,6 +357,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -401,6 +401,7 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -421,6 +422,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -568,9 +570,11 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 308cd93929a9..37ac7b019ec1 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -95,7 +95,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -359,6 +358,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -410,6 +410,7 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -430,6 +431,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -577,9 +579,11 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 0bc210ace870..608779866260 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -94,7 +94,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -375,6 +374,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -432,6 +432,7 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -452,6 +453,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -599,9 +601,11 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 3b3b832dee80..0abb53c38c20 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -114,7 +114,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -419,6 +418,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -518,6 +518,7 @@ CONFIG_RTC_DRV_MSM6242=m CONFIG_RTC_DRV_RP5C01=m CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -538,6 +539,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -685,9 +687,11 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index e3633c66926f..cb14c234d3ad 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -92,7 +92,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -357,6 +356,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -400,6 +400,7 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -420,6 +421,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -567,9 +569,11 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 88b3f7f9f146..e8a1920aded7 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -93,7 +93,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -358,6 +357,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -401,6 +401,7 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -421,6 +422,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -568,9 +570,11 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 3dd5b536921e..2cbf416fc725 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -94,7 +94,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -365,6 +364,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -419,6 +419,7 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -439,6 +440,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -586,9 +588,11 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index 715e015ed270..fed3cc7abcc4 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -90,7 +90,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -355,6 +354,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -403,6 +403,7 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -423,6 +424,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -570,8 +572,10 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index f9ff129ac7c2..0954fde256e6 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -90,7 +90,6 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_SET=m CONFIG_NF_TABLES_INET=y CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m @@ -355,6 +354,7 @@ CONFIG_IPVLAN=m CONFIG_IPVTAP=m CONFIG_VXLAN=m CONFIG_GENEVE=m +CONFIG_BAREUDP=m CONFIG_GTP=m CONFIG_MACSEC=m CONFIG_NETCONSOLE=m @@ -402,6 +402,7 @@ CONFIG_RTC_CLASS=y # CONFIG_RTC_NVMEM is not set CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set +# CONFIG_VHOST_MENU is not set # CONFIG_IOMMU_SUPPORT is not set CONFIG_DAX=m CONFIG_EXT4_FS=y @@ -422,6 +423,7 @@ CONFIG_ZISOFS=y CONFIG_UDF_FS=m CONFIG_MSDOS_FS=m CONFIG_VFAT_FS=m +CONFIG_EXFAT_FS=m CONFIG_PROC_KCORE=y CONFIG_PROC_CHILDREN=y CONFIG_TMPFS=y @@ -569,9 +571,11 @@ CONFIG_XZ_DEC_TEST=m CONFIG_STRING_SELFTEST=m # CONFIG_SECTION_MISMATCH_WARN_ONLY is not set CONFIG_MAGIC_SYSRQ=y +CONFIG_TEST_LOCKUP=m CONFIG_WW_MUTEX_SELFTEST=m CONFIG_EARLY_PRINTK=y CONFIG_TEST_LIST_SORT=m +CONFIG_TEST_MIN_HEAP=m CONFIG_TEST_SORT=m CONFIG_REED_SOLOMON_TEST=m CONFIG_ATOMIC64_SELFTEST=m diff --git a/arch/m68k/include/asm/checksum.h b/arch/m68k/include/asm/checksum.h index f9b94e4b94f9..3f2c15d6f18c 100644 --- a/arch/m68k/include/asm/checksum.h +++ b/arch/m68k/include/asm/checksum.h @@ -30,7 +30,8 @@ __wsum csum_partial(const void *buff, int len, __wsum sum); * better 64-bit) boundary */ -extern __wsum csum_partial_copy_from_user(const void __user *src, +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER +extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *csum_err); diff --git a/arch/m68k/include/asm/floppy.h b/arch/m68k/include/asm/floppy.h index c3b9ad6732fc..a4d0fea47c6b 100644 --- a/arch/m68k/include/asm/floppy.h +++ b/arch/m68k/include/asm/floppy.h @@ -63,21 +63,21 @@ static __inline__ void release_dma_lock(unsigned long flags) } -static __inline__ unsigned char fd_inb(int port) +static __inline__ unsigned char fd_inb(int base, int reg) { if(MACH_IS_Q40) - return inb_p(port); + return inb_p(base + reg); else if(MACH_IS_SUN3X) - return sun3x_82072_fd_inb(port); + return sun3x_82072_fd_inb(base + reg); return 0; } -static __inline__ void fd_outb(unsigned char value, int port) +static __inline__ void fd_outb(unsigned char value, int base, int reg) { if(MACH_IS_Q40) - outb_p(value, port); + outb_p(value, base + reg); else if(MACH_IS_SUN3X) - sun3x_82072_fd_outb(value, port); + sun3x_82072_fd_outb(value, base + reg); } @@ -211,26 +211,27 @@ asmlinkage irqreturn_t floppy_hardint(int irq, void *dev_id) st=1; for(lcount=virtual_dma_count, lptr=virtual_dma_addr; lcount; lcount--, lptr++) { - st=inb(virtual_dma_port+4) & 0xa0 ; - if(st != 0xa0) + st = inb(virtual_dma_port + FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) break; if(virtual_dma_mode) - outb_p(*lptr, virtual_dma_port+5); + outb_p(*lptr, virtual_dma_port + FD_DATA); else - *lptr = inb_p(virtual_dma_port+5); + *lptr = inb_p(virtual_dma_port + FD_DATA); } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = inb(virtual_dma_port+4); + st = inb(virtual_dma_port + FD_STATUS); } #ifdef TRACE_FLPY_INT calls++; #endif - if(st == 0x20) + if (st == STATUS_DMA) return IRQ_HANDLED; - if(!(st & 0x20)) { + if (!(st & STATUS_DMA)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count=0; #ifdef TRACE_FLPY_INT diff --git a/arch/m68k/include/asm/m5441xsim.h b/arch/m68k/include/asm/m5441xsim.h index 4892f314ff38..e091e36d3464 100644 --- a/arch/m68k/include/asm/m5441xsim.h +++ b/arch/m68k/include/asm/m5441xsim.h @@ -279,6 +279,13 @@ #define MCFGPIO_PIN_MAX 87 /* + * Phase Locked Loop (PLL) + */ +#define MCF_PLL_CR 0xFC0C0000 +#define MCF_PLL_DR 0xFC0C0004 +#define MCF_PLL_SR 0xFC0C0008 + +/* * DSPI module. */ #define MCFDSPI_BASE0 0xfc05c000 @@ -298,5 +305,13 @@ #define MCFEDMA_IRQ_INTR16 (MCFINT1_VECBASE + MCFEDMA_EDMA_INTR16) #define MCFEDMA_IRQ_INTR56 (MCFINT2_VECBASE + MCFEDMA_EDMA_INTR56) #define MCFEDMA_IRQ_ERR (MCFINT0_VECBASE + MCFINT0_EDMA_ERR) +/* + * esdhc module. + */ +#define MCFSDHC_BASE 0xfc0cc000 +#define MCFSDHC_SIZE 256 +#define MCFINT2_SDHC 31 +#define MCF_IRQ_SDHC (MCFINT2_VECBASE + MCFINT2_SDHC) +#define MCFSDHC_CLK (MCFSDHC_BASE + 0x2c) #endif /* m5441xsim_h */ diff --git a/arch/m68k/include/asm/mac_via.h b/arch/m68k/include/asm/mac_via.h index de1470c4d829..1149251ea58d 100644 --- a/arch/m68k/include/asm/mac_via.h +++ b/arch/m68k/include/asm/mac_via.h @@ -257,6 +257,7 @@ extern int rbv_present,via_alt_mapping; struct irq_desc; +extern void via_l2_flush(int writeback); extern void via_register_interrupts(void); extern void via_irq_enable(int); extern void via_irq_disable(int); diff --git a/arch/m68k/include/asm/mcfclk.h b/arch/m68k/include/asm/mcfclk.h index 0aca504fae31..722627e06d66 100644 --- a/arch/m68k/include/asm/mcfclk.h +++ b/arch/m68k/include/asm/mcfclk.h @@ -30,6 +30,8 @@ extern struct clk_ops clk_ops0; extern struct clk_ops clk_ops1; #endif /* MCFPM_PPMCR1 */ +extern struct clk_ops clk_ops2; + #define DEFINE_CLK(clk_bank, clk_name, clk_slot, clk_rate) \ static struct clk __clk_##clk_bank##_##clk_slot = { \ .name = clk_name, \ diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h index 7e85de984df1..9ae9f8d05925 100644 --- a/arch/m68k/include/asm/uaccess_mm.h +++ b/arch/m68k/include/asm/uaccess_mm.h @@ -142,7 +142,7 @@ asm volatile ("\n" \ __get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT); \ break; \ case 8: { \ - const void *__gu_ptr = (ptr); \ + const void __user *__gu_ptr = (ptr); \ union { \ u64 l; \ __typeof__(*(ptr)) t; \ diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index f4f49fcb76d0..f71b1bbcc198 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 435 common clone3 __sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 diff --git a/arch/m68k/lib/checksum.c b/arch/m68k/lib/checksum.c index 5fa3d392e181..31797be9a3dc 100644 --- a/arch/m68k/lib/checksum.c +++ b/arch/m68k/lib/checksum.c @@ -129,7 +129,7 @@ EXPORT_SYMBOL(csum_partial); */ __wsum -csum_partial_copy_from_user(const void __user *src, void *dst, +csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *csum_err) { /* @@ -316,7 +316,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst, return(sum); } -EXPORT_SYMBOL(csum_partial_copy_from_user); +EXPORT_SYMBOL(csum_and_copy_from_user); /* diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 611f73bfc87c..d0126ab01360 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -59,7 +59,6 @@ extern void iop_preinit(void); extern void iop_init(void); extern void via_init(void); extern void via_init_clock(irq_handler_t func); -extern void via_flush_cache(void); extern void oss_init(void); extern void psc_init(void); extern void baboon_init(void); @@ -130,21 +129,6 @@ int __init mac_parse_bootinfo(const struct bi_record *record) return unknown; } -/* - * Flip into 24bit mode for an instant - flushes the L2 cache card. We - * have to disable interrupts for this. Our IRQ handlers will crap - * themselves if they take an IRQ in 24bit mode! - */ - -static void mac_cache_card_flush(int writeback) -{ - unsigned long flags; - - local_irq_save(flags); - via_flush_cache(); - local_irq_restore(flags); -} - void __init config_mac(void) { if (!MACH_IS_MAC) @@ -175,9 +159,8 @@ void __init config_mac(void) * not. */ - if (macintosh_config->ident == MAC_MODEL_IICI - || macintosh_config->ident == MAC_MODEL_IIFX) - mach_l2_flush = mac_cache_card_flush; + if (macintosh_config->ident == MAC_MODEL_IICI) + mach_l2_flush = via_l2_flush; } diff --git a/arch/m68k/mac/iop.c b/arch/m68k/mac/iop.c index 9bfa17015768..d3775afb0f07 100644 --- a/arch/m68k/mac/iop.c +++ b/arch/m68k/mac/iop.c @@ -299,7 +299,6 @@ void __init iop_init(void) /* * Register the interrupt handler for the IOPs. - * TODO: might be wrong for non-OSS machines. Anyone? */ void __init iop_register_interrupts(void) @@ -566,36 +565,42 @@ irqreturn_t iop_ism_irq(int irq, void *dev_id) uint iop_num = (uint) dev_id; volatile struct mac_iop *iop = iop_base[iop_num]; int i,state; + u8 events = iop->status_ctrl & (IOP_INT0 | IOP_INT1); iop_pr_debug("status %02X\n", iop->status_ctrl); - /* INT0 indicates a state change on an outgoing message channel */ - - if (iop->status_ctrl & IOP_INT0) { - iop->status_ctrl = IOP_INT0 | IOP_RUN | IOP_AUTOINC; - iop_pr_debug("new status %02X, send states", iop->status_ctrl); - for (i = 0 ; i < NUM_IOP_CHAN ; i++) { - state = iop_readb(iop, IOP_ADDR_SEND_STATE + i); - iop_pr_cont(" %02X", state); - if (state == IOP_MSG_COMPLETE) { - iop_handle_send(iop_num, i); + do { + /* INT0 indicates state change on an outgoing message channel */ + if (events & IOP_INT0) { + iop->status_ctrl = IOP_INT0 | IOP_RUN | IOP_AUTOINC; + iop_pr_debug("new status %02X, send states", + iop->status_ctrl); + for (i = 0; i < NUM_IOP_CHAN; i++) { + state = iop_readb(iop, IOP_ADDR_SEND_STATE + i); + iop_pr_cont(" %02X", state); + if (state == IOP_MSG_COMPLETE) + iop_handle_send(iop_num, i); } + iop_pr_cont("\n"); } - iop_pr_cont("\n"); - } - if (iop->status_ctrl & IOP_INT1) { /* INT1 for incoming msgs */ - iop->status_ctrl = IOP_INT1 | IOP_RUN | IOP_AUTOINC; - iop_pr_debug("new status %02X, recv states", iop->status_ctrl); - for (i = 0 ; i < NUM_IOP_CHAN ; i++) { - state = iop_readb(iop, IOP_ADDR_RECV_STATE + i); - iop_pr_cont(" %02X", state); - if (state == IOP_MSG_NEW) { - iop_handle_recv(iop_num, i); + /* INT1 for incoming messages */ + if (events & IOP_INT1) { + iop->status_ctrl = IOP_INT1 | IOP_RUN | IOP_AUTOINC; + iop_pr_debug("new status %02X, recv states", + iop->status_ctrl); + for (i = 0; i < NUM_IOP_CHAN; i++) { + state = iop_readb(iop, IOP_ADDR_RECV_STATE + i); + iop_pr_cont(" %02X", state); + if (state == IOP_MSG_NEW) + iop_handle_recv(iop_num, i); } + iop_pr_cont("\n"); } - iop_pr_cont("\n"); - } + + events = iop->status_ctrl & (IOP_INT0 | IOP_INT1); + } while (events); + return IRQ_HANDLED; } diff --git a/arch/m68k/mac/via.c b/arch/m68k/mac/via.c index 3c2cfcb74982..1f0fad2a98a0 100644 --- a/arch/m68k/mac/via.c +++ b/arch/m68k/mac/via.c @@ -294,10 +294,14 @@ void via_debug_dump(void) * the system into 24-bit mode for an instant. */ -void via_flush_cache(void) +void via_l2_flush(int writeback) { + unsigned long flags; + + local_irq_save(flags); via2[gBufB] &= ~VIA2B_vMode32; via2[gBufB] |= VIA2B_vMode32; + local_irq_restore(flags); } /* diff --git a/arch/m68k/tools/amiga/dmesg.c b/arch/m68k/tools/amiga/dmesg.c index 7340f5b6cf6d..f8005a7efb0b 100644 --- a/arch/m68k/tools/amiga/dmesg.c +++ b/arch/m68k/tools/amiga/dmesg.c @@ -34,7 +34,7 @@ struct savekmsg { u_long magic2; /* SAVEKMSG_MAGIC2 */ u_long magicptr; /* address of magic1 */ u_long size; - char data[0]; + char data[]; }; diff --git a/arch/microblaze/kernel/microblaze_ksyms.c b/arch/microblaze/kernel/microblaze_ksyms.c index 92e12c2c2ec1..51c43ee5e380 100644 --- a/arch/microblaze/kernel/microblaze_ksyms.c +++ b/arch/microblaze/kernel/microblaze_ksyms.c @@ -6,7 +6,6 @@ #include <linux/export.h> #include <linux/string.h> -#include <linux/cryptohash.h> #include <linux/delay.h> #include <linux/in6.h> #include <linux/syscalls.h> diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 4c67b11f9c9e..edacc4561f2b 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -443,3 +443,4 @@ 435 common clone3 sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 diff --git a/arch/mips/Kbuild b/arch/mips/Kbuild index a8d5e4fcbe53..d5d6ef9bb986 100644 --- a/arch/mips/Kbuild +++ b/arch/mips/Kbuild @@ -12,7 +12,7 @@ obj-y := $(platform-y) # make clean traverses $(obj-) without having included .config, so # everything ends up here -obj- := $(platform-) +obj- := $(platform-y) # mips object files # The object files are linked as core-y files would be linked diff --git a/arch/mips/Kbuild.platforms b/arch/mips/Kbuild.platforms index a69b272a3ab0..c7368a81fd1e 100644 --- a/arch/mips/Kbuild.platforms +++ b/arch/mips/Kbuild.platforms @@ -1,42 +1,44 @@ # SPDX-License-Identifier: GPL-2.0 # All platforms listed in alphabetic order -platforms += alchemy -platforms += ar7 -platforms += ath25 -platforms += ath79 -platforms += bcm47xx -platforms += bcm63xx -platforms += bmips -platforms += cavium-octeon -platforms += cobalt -platforms += dec -platforms += emma -platforms += generic -platforms += jazz -platforms += jz4740 -platforms += lantiq -platforms += lasat -platforms += loongson2ef -platforms += loongson32 -platforms += loongson64 -platforms += mti-malta -platforms += netlogic -platforms += paravirt -platforms += pic32 -platforms += pistachio -platforms += pmcs-msp71xx -platforms += pnx833x -platforms += ralink -platforms += rb532 -platforms += sgi-ip22 -platforms += sgi-ip27 -platforms += sgi-ip30 -platforms += sgi-ip32 -platforms += sibyte -platforms += sni -platforms += txx9 -platforms += vr41xx +platform-$(CONFIG_MIPS_ALCHEMY) += alchemy/ +platform-$(CONFIG_AR7) += ar7/ +platform-$(CONFIG_ATH25) += ath25/ +platform-$(CONFIG_ATH79) += ath79/ +platform-$(CONFIG_BCM47XX) += bcm47xx/ +platform-$(CONFIG_BCM63XX) += bcm63xx/ +platform-$(CONFIG_BMIPS_GENERIC) += bmips/ +platform-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon/ +platform-$(CONFIG_MIPS_COBALT) += cobalt/ +platform-$(CONFIG_MACH_DECSTATION) += dec/ +platform-$(CONFIG_MIPS_GENERIC) += generic/ +platform-$(CONFIG_MACH_JAZZ) += jazz/ +platform-$(CONFIG_MACH_INGENIC) += jz4740/ +platform-$(CONFIG_LANTIQ) += lantiq/ +platform-$(CONFIG_MACH_LOONGSON2EF) += loongson2ef/ +platform-$(CONFIG_MACH_LOONGSON32) += loongson32/ +platform-$(CONFIG_MACH_LOONGSON64) += loongson64/ +platform-$(CONFIG_MIPS_MALTA) += mti-malta/ +platform-$(CONFIG_NLM_COMMON) += netlogic/ +platform-$(CONFIG_MIPS_PARAVIRT) += paravirt/ +platform-$(CONFIG_PIC32MZDA) += pic32/ +platform-$(CONFIG_MACH_PISTACHIO) += pistachio/ +platform-$(CONFIG_SOC_PNX833X) += pnx833x/ +platform-$(CONFIG_RALINK) += ralink/ +platform-$(CONFIG_MIKROTIK_RB532) += rb532/ +platform-$(CONFIG_SGI_IP22) += sgi-ip22/ +platform-$(CONFIG_SGI_IP27) += sgi-ip27/ +platform-$(CONFIG_SGI_IP28) += sgi-ip22/ +platform-$(CONFIG_SGI_IP30) += sgi-ip30/ +platform-$(CONFIG_SGI_IP32) += sgi-ip32/ +platform-$(CONFIG_SIBYTE_BCM112X) += sibyte/ +platform-$(CONFIG_SIBYTE_SB1250) += sibyte/ +platform-$(CONFIG_SIBYTE_BCM1x55) += sibyte/ +platform-$(CONFIG_SIBYTE_BCM1x80) += sibyte/ +platform-$(CONFIG_SNI_RM) += sni/ +platform-$(CONFIG_MACH_TX39XX) += txx9/ +platform-$(CONFIG_MACH_TX49XX) += txx9/ +platform-$(CONFIG_MACH_VR41XX) += vr41xx/ # include the platform specific files -include $(patsubst %, $(srctree)/arch/mips/%/Platform, $(platforms)) +include $(patsubst %, $(srctree)/arch/mips/%/Platform, $(platform-y)) diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 690718b3701a..b6338e806a4b 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -92,6 +92,9 @@ config MIPS select SYSCTL_EXCEPTION_TRACE select VIRT_TO_BUS +config MIPS_FIXUP_BIGPHYS_ADDR + bool + menu "Machine selection" choice @@ -157,6 +160,7 @@ config MIPS_ALCHEMY select CSRC_R4K select IRQ_MIPS_CPU select DMA_MAYBE_COHERENT # Au1000,1500,1100 aren't, rest is + select MIPS_FIXUP_BIGPHYS_ADDR if PCI select SYS_HAS_CPU_MIPS32_R1 select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_APM_EMULATION @@ -427,23 +431,6 @@ config LANTIQ select ARCH_HAS_RESET_CONTROLLER select RESET_CONTROLLER -config LASAT - bool "LASAT Networks platforms" - select CEVT_R4K - select CRC32 - select CSRC_R4K - select DMA_NONCOHERENT - select SYS_HAS_EARLY_PRINTK - select HAVE_PCI - select IRQ_MIPS_CPU - select PCI_GT64XXX_PCI0 - select MIPS_NILE4 - select R5000_CPU_SCACHE - select SYS_HAS_CPU_R5000 - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_64BIT_KERNEL if BROKEN - select SYS_SUPPORTS_LITTLE_ENDIAN - config MACH_LOONGSON32 bool "Loongson 32-bit family of machines" select SYS_SUPPORTS_ZBOOT @@ -475,8 +462,10 @@ config MACH_LOONGSON64 select ISA select I8259 select IRQ_MIPS_CPU - select NR_CPUS_DEFAULT_4 + select NO_EXCEPT_FILL + select NR_CPUS_DEFAULT_64 select USE_GENERIC_EARLY_PRINTK_8250 + select PCI_DRIVERS_GENERIC select SYS_HAS_CPU_LOONGSON64 select SYS_HAS_EARLY_PRINTK select SYS_SUPPORTS_SMP @@ -593,13 +582,6 @@ config MACH_PIC32 Microchip PIC32 is a family of general-purpose 32 bit MIPS core microcontrollers. -config NEC_MARKEINS - bool "NEC EMMA2RH Mark-eins board" - select SOC_EMMA2RH - select HAVE_PCI - help - This enables support for the NEC Electronics Mark-eins boards. - config MACH_VR41XX bool "NEC VR4100 series based machines" select CEVT_R4K @@ -621,30 +603,6 @@ config NXP_STB225 help Support for NXP Semiconductors STB225 Development Board. -config PMC_MSP - bool "PMC-Sierra MSP chipsets" - select CEVT_R4K - select CSRC_R4K - select DMA_NONCOHERENT - select SWAP_IO_SPACE - select NO_EXCEPT_FILL - select BOOT_RAW - select SYS_HAS_CPU_MIPS32_R1 - select SYS_HAS_CPU_MIPS32_R2 - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_BIG_ENDIAN - select SYS_SUPPORTS_MIPS16 - select IRQ_MIPS_CPU - select SERIAL_8250 - select SERIAL_8250_CONSOLE - select USB_EHCI_BIG_ENDIAN_MMIO - select USB_EHCI_BIG_ENDIAN_DESC - help - This adds support for the PMC-Sierra family of Multi-Service - Processor System-On-A-Chips. These parts include a number - of integrated peripherals, interfaces and DSPs in addition to - a variety of MIPS cores. - config RALINK bool "Ralink based machines" select CEVT_R4K @@ -1087,10 +1045,8 @@ source "arch/mips/generic/Kconfig" source "arch/mips/jazz/Kconfig" source "arch/mips/jz4740/Kconfig" source "arch/mips/lantiq/Kconfig" -source "arch/mips/lasat/Kconfig" source "arch/mips/pic32/Kconfig" source "arch/mips/pistachio/Kconfig" -source "arch/mips/pmcs-msp71xx/Kconfig" source "arch/mips/ralink/Kconfig" source "arch/mips/sgi-ip27/Kconfig" source "arch/mips/sibyte/Kconfig" @@ -1154,6 +1110,7 @@ config CSRC_IOASIC bool config CSRC_R4K + select CLOCKSOURCE_WATCHDOG if CPU_FREQ bool config CSRC_SB1250 @@ -1211,9 +1168,6 @@ config MIPS_BONITO64 config MIPS_MSC bool -config MIPS_NILE4 - bool - config SYNC_R4K bool @@ -1334,18 +1288,6 @@ config PCI_XTALK_BRIDGE config NO_EXCEPT_FILL bool -config SOC_EMMA2RH - bool - select CEVT_R4K - select CSRC_R4K - select DMA_NONCOHERENT - select IRQ_MIPS_CPU - select SWAP_IO_SPACE - select SYS_HAS_CPU_R5500 - select SYS_SUPPORTS_32BIT_KERNEL - select SYS_SUPPORTS_64BIT_KERNEL - select SYS_SUPPORTS_BIG_ENDIAN - config SOC_PNX833X bool select CEVT_R4K @@ -1419,9 +1361,6 @@ config MIPS_L1_CACHE_SHIFT default "4" if MIPS_L1_CACHE_SHIFT_4 default "5" -config HAVE_STD_PC_SERIAL_PORT - bool - config ARC_CMDLINE_ONLY bool @@ -1504,6 +1443,18 @@ config CPU_LOONGSON3_WORKAROUNDS If unsure, please say Y. +config CPU_LOONGSON3_CPUCFG_EMULATION + bool "Emulate the CPUCFG instruction on older Loongson cores" + default y + depends on CPU_LOONGSON64 + help + Loongson-3A R4 and newer have the CPUCFG instruction available for + userland to query CPU capabilities, much like CPUID on x86. This + option provides emulation of the instruction on older Loongson + cores, back to Loongson-3A1000. + + If unsure, please say Y. + config CPU_LOONGSON2E bool "Loongson 2E" depends on SYS_HAS_CPU_LOONGSON2E @@ -1580,6 +1531,21 @@ config CPU_MIPS32_R2 specific type of processor in your system, choose those that one otherwise CPU_MIPS32_R1 is a safe bet for any MIPS32 system. +config CPU_MIPS32_R5 + bool "MIPS32 Release 5" + depends on SYS_HAS_CPU_MIPS32_R5 + select CPU_HAS_PREFETCH + select CPU_SUPPORTS_32BIT_KERNEL + select CPU_SUPPORTS_HIGHMEM + select CPU_SUPPORTS_MSA + select HAVE_KVM + select MIPS_O32_FP64_SUPPORT + help + Choose this option to build a kernel for release 5 or later of the + MIPS32 architecture. New MIPS processors, starting with the Warrior + family, are based on a MIPS32r5 processor. If you own an older + processor, you probably need to select MIPS32r1 or MIPS32r2 instead. + config CPU_MIPS32_R6 bool "MIPS32 Release 6" depends on SYS_HAS_CPU_MIPS32_R6 @@ -1632,6 +1598,23 @@ config CPU_MIPS64_R2 specific type of processor in your system, choose those that one otherwise CPU_MIPS64_R1 is a safe bet for any MIPS64 system. +config CPU_MIPS64_R5 + bool "MIPS64 Release 5" + depends on SYS_HAS_CPU_MIPS64_R5 + select CPU_HAS_PREFETCH + select CPU_SUPPORTS_32BIT_KERNEL + select CPU_SUPPORTS_64BIT_KERNEL + select CPU_SUPPORTS_HIGHMEM + select CPU_SUPPORTS_HUGEPAGES + select CPU_SUPPORTS_MSA + select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32 + select HAVE_KVM + help + Choose this option to build a kernel for release 5 or later of the + MIPS64 architecture. This is a intermediate MIPS architecture + release partly implementing release 6 features. Though there is no + any hardware known to be based on this release. + config CPU_MIPS64_R6 bool "MIPS64 Release 6" depends on SYS_HAS_CPU_MIPS64_R6 @@ -1650,6 +1633,28 @@ config CPU_MIPS64_R6 family, are based on a MIPS64r6 processor. If you own an older processor, you probably need to select MIPS64r1 or MIPS64r2 instead. +config CPU_P5600 + bool "MIPS Warrior P5600" + depends on SYS_HAS_CPU_P5600 + select CPU_HAS_PREFETCH + select CPU_SUPPORTS_32BIT_KERNEL + select CPU_SUPPORTS_HIGHMEM + select CPU_SUPPORTS_MSA + select CPU_SUPPORTS_UNCACHED_ACCELERATED + select CPU_SUPPORTS_CPUFREQ + select CPU_MIPSR2_IRQ_VI + select CPU_MIPSR2_IRQ_EI + select HAVE_KVM + select MIPS_O32_FP64_SUPPORT + help + Choose this option to build a kernel for MIPS Warrior P5600 CPU. + It's based on MIPS32r5 ISA with XPA, EVA, dual/quad issue exec pipes, + MMU with two-levels TLB, UCA, MSA, MDU core level features and system + level features like up to six P5600 calculation cores, CM2 with L2 + cache, IOCU/IOMMU (though might be unused depending on the system- + specific IP core configuration), GIC, CPC, virtualisation module, + eJTAG and PDtrace. + config CPU_R3000 bool "R3000" depends on SYS_HAS_CPU_R3000 @@ -1826,7 +1831,8 @@ endchoice config CPU_MIPS32_3_5_FEATURES bool "MIPS32 Release 3.5 Features" depends on SYS_HAS_CPU_MIPS32_R3_5 - depends on CPU_MIPS32_R2 || CPU_MIPS32_R6 + depends on CPU_MIPS32_R2 || CPU_MIPS32_R5 || CPU_MIPS32_R6 || \ + CPU_P5600 help Choose this option to build a kernel for release 2 or later of the MIPS32 architecture including features from the 3.5 release such as @@ -1846,7 +1852,7 @@ config CPU_MIPS32_3_5_EVA config CPU_MIPS32_R5_FEATURES bool "MIPS32 Release 5 Features" depends on SYS_HAS_CPU_MIPS32_R5 - depends on CPU_MIPS32_R2 + depends on CPU_MIPS32_R2 || CPU_MIPS32_R5 || CPU_P5600 help Choose this option to build a kernel for release 2 or later of the MIPS32 architecture including features from release 5 such as @@ -2001,6 +2007,10 @@ config SYS_HAS_CPU_MIPS64_R6 bool select ARCH_HAS_SYNC_DMA_FOR_CPU if DMA_NONCOHERENT +config SYS_HAS_CPU_P5600 + bool + select ARCH_HAS_SYNC_DMA_FOR_CPU if DMA_NONCOHERENT + config SYS_HAS_CPU_R3000 bool @@ -2084,11 +2094,13 @@ endmenu # config CPU_MIPS32 bool - default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R6 + default y if CPU_MIPS32_R1 || CPU_MIPS32_R2 || CPU_MIPS32_R5 || \ + CPU_MIPS32_R6 || CPU_P5600 config CPU_MIPS64 bool - default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R6 + default y if CPU_MIPS64_R1 || CPU_MIPS64_R2 || CPU_MIPS64_R5 || \ + CPU_MIPS64_R6 # # These indicate the revision of the architecture @@ -2104,6 +2116,13 @@ config CPU_MIPSR2 select CPU_HAS_DIEI if !CPU_DIEI_BROKEN select MIPS_SPRAM +config CPU_MIPSR5 + bool + default y if CPU_MIPS32_R5 || CPU_MIPS64_R5 || CPU_P5600 + select CPU_HAS_RIXI + select CPU_HAS_DIEI if !CPU_DIEI_BROKEN + select MIPS_SPRAM + config CPU_MIPSR6 bool default y if CPU_MIPS32_R6 || CPU_MIPS64_R6 @@ -2118,6 +2137,7 @@ config TARGET_ISA_REV int default 1 if CPU_MIPSR1 default 2 if CPU_MIPSR2 + default 5 if CPU_MIPSR5 default 6 if CPU_MIPSR6 default 0 help @@ -2707,7 +2727,11 @@ config NEED_PER_CPU_EMBED_FIRST_CHUNK config RELOCATABLE bool "Relocatable kernel" - depends on SYS_SUPPORTS_RELOCATABLE && (CPU_MIPS32_R2 || CPU_MIPS64_R2 || CPU_MIPS32_R6 || CPU_MIPS64_R6 || CAVIUM_OCTEON_SOC) + depends on SYS_SUPPORTS_RELOCATABLE + depends on CPU_MIPS32_R2 || CPU_MIPS64_R2 || \ + CPU_MIPS32_R5 || CPU_MIPS64_R5 || \ + CPU_MIPS32_R6 || CPU_MIPS64_R6 || \ + CPU_P5600 || CAVIUM_OCTEON_SOC help This builds a kernel image that retains relocation information so it can be loaded someplace besides the default 1MB. @@ -3275,3 +3299,5 @@ endmenu source "drivers/firmware/Kconfig" source "arch/mips/kvm/Kconfig" + +source "arch/mips/vdso/Kconfig" diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index 93a2974d2ab7..7a8d94cdd493 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -148,4 +148,14 @@ config MIPS_CPS_NS16550_SHIFT form their addresses. That is, log base 2 of the span between adjacent ns16550 registers in the system. +config MIPS_CPS_NS16550_WIDTH + int "UART Register Width" + default 1 + help + ns16550 registers width. UART registers IO access methods will be + selected in accordance with this parameter. By setting it to 1, 2 or + 4 UART registers will be accessed by means of lb/sb, lh/sh or lw/sw + instructions respectively. Any value not from that set activates + lb/sb instructions. + endif # MIPS_CPS_NS16550_BOOL diff --git a/arch/mips/Makefile b/arch/mips/Makefile index e1c44aed8156..0d0f29d662c9 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -116,33 +116,8 @@ endif cflags-y += -ffreestanding -# -# We explicitly add the endianness specifier if needed, this allows -# to compile kernels with a toolchain for the other endianness. We -# carefully avoid to add it redundantly because gcc 3.3/3.4 complains -# when fed the toolchain default! -# -# Certain gcc versions up to gcc 4.1.1 (probably 4.2-subversion as of -# 2006-10-10 don't properly change the predefined symbols if -EB / -EL -# are used, so we kludge that here. A bug has been filed at -# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=29413. -# -# clang doesn't suffer from these issues and our checks against -dumpmachine -# don't work so well when cross compiling, since without providing --target -# clang's output will be based upon the build machine. So for clang we simply -# unconditionally specify -EB or -EL as appropriate. -# -ifdef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_BIG_ENDIAN) += -EB cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -EL -else -undef-all += -UMIPSEB -U_MIPSEB -U__MIPSEB -U__MIPSEB__ -undef-all += -UMIPSEL -U_MIPSEL -U__MIPSEL -U__MIPSEL__ -predef-be += -DMIPSEB -D_MIPSEB -D__MIPSEB -D__MIPSEB__ -predef-le += -DMIPSEL -D_MIPSEL -D__MIPSEL -D__MIPSEL__ -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' && echo -EB $(undef-all) $(predef-be)) -cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += $(shell $(CC) -dumpmachine |grep -q 'mips.*el-.*' || echo -EL $(undef-all) $(predef-le)) -endif cflags-$(CONFIG_SB1XXX_CORELIS) += $(call cc-option,-mno-sched-prolog) \ -fno-omit-frame-pointer @@ -171,10 +146,13 @@ cflags-$(CONFIG_CPU_R4X00) += -march=r4600 -Wa,--trap cflags-$(CONFIG_CPU_TX49XX) += -march=r4600 -Wa,--trap cflags-$(CONFIG_CPU_MIPS32_R1) += -march=mips32 -Wa,--trap cflags-$(CONFIG_CPU_MIPS32_R2) += -march=mips32r2 -Wa,--trap +cflags-$(CONFIG_CPU_MIPS32_R5) += -march=mips32r5 -Wa,--trap -modd-spreg cflags-$(CONFIG_CPU_MIPS32_R6) += -march=mips32r6 -Wa,--trap -modd-spreg cflags-$(CONFIG_CPU_MIPS64_R1) += -march=mips64 -Wa,--trap cflags-$(CONFIG_CPU_MIPS64_R2) += -march=mips64r2 -Wa,--trap +cflags-$(CONFIG_CPU_MIPS64_R5) += -march=mips64r5 -Wa,--trap cflags-$(CONFIG_CPU_MIPS64_R6) += -march=mips64r6 -Wa,--trap +cflags-$(CONFIG_CPU_P5600) += -march=p5600 -Wa,--trap -modd-spreg cflags-$(CONFIG_CPU_R5000) += -march=r5000 -Wa,--trap cflags-$(CONFIG_CPU_R5500) += $(call cc-option,-march=r5500,-march=r5000) \ -Wa,--trap @@ -288,12 +266,23 @@ ifdef CONFIG_64BIT endif endif +# When linking a 32-bit executable the LLVM linker cannot cope with a +# 32-bit load address that has been sign-extended to 64 bits. Simply +# remove the upper 32 bits then, as it is safe to do so with other +# linkers. +ifdef CONFIG_64BIT + load-ld = $(load-y) +else + load-ld = $(subst 0xffffffff,0x,$(load-y)) +endif + KBUILD_AFLAGS += $(cflags-y) KBUILD_CFLAGS += $(cflags-y) -KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) +KBUILD_CPPFLAGS += -DVMLINUX_LOAD_ADDRESS=$(load-y) -DLINKER_LOAD_ADDRESS=$(load-ld) KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0) bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) \ + LINKER_LOAD_ADDRESS=$(load-ld) \ VMLINUX_ENTRY_ADDRESS=$(entry-y) \ PLATFORM="$(platform-y)" \ ITS_INPUTS="$(its-y)" @@ -359,12 +348,6 @@ ifeq ($(shell expr $(zload-y) \< 0xffffffff80000000 2> /dev/null), 0) bootz-y += uzImage.bin endif -ifdef CONFIG_LASAT -rom.bin rom.sw: vmlinux - $(Q)$(MAKE) $(build)=arch/mips/lasat/image \ - $(bootvars-y) $@ -endif - # # Some machines like the Indy need 32-bit ELF binaries for booting purposes. # Other need ECOFF, so we build a 32-bit ELF binary for them which we then @@ -430,7 +413,6 @@ archclean: $(Q)$(MAKE) $(clean)=arch/mips/boot $(Q)$(MAKE) $(clean)=arch/mips/boot/compressed $(Q)$(MAKE) $(clean)=arch/mips/boot/tools - $(Q)$(MAKE) $(clean)=arch/mips/lasat archheaders: $(Q)$(MAKE) $(build)=arch/mips/kernel/syscalls all diff --git a/arch/mips/alchemy/Platform b/arch/mips/alchemy/Platform index 33c9da3b077b..c8cff50b0eda 100644 --- a/arch/mips/alchemy/Platform +++ b/arch/mips/alchemy/Platform @@ -15,19 +15,16 @@ load-$(CONFIG_MIPS_DB1XXX) += 0xffffffff80100000 # # 4G-Systems MTX-1 "MeshCube" wireless router # -platform-$(CONFIG_MIPS_MTX1) += alchemy/ load-$(CONFIG_MIPS_MTX1) += 0xffffffff80100000 # # MyCable eval board # -platform-$(CONFIG_MIPS_XXS1500) += alchemy/ load-$(CONFIG_MIPS_XXS1500) += 0xffffffff80100000 # # Trapeze ITS GRP board # -platform-$(CONFIG_MIPS_GPR) += alchemy/ load-$(CONFIG_MIPS_GPR) += 0xffffffff80100000 # boards can specify their own <gpio.h> in one of their include dirs. diff --git a/arch/mips/alchemy/common/setup.c b/arch/mips/alchemy/common/setup.c index 7faaa6d593a7..0f60efe0481e 100644 --- a/arch/mips/alchemy/common/setup.c +++ b/arch/mips/alchemy/common/setup.c @@ -27,6 +27,7 @@ #include <linux/init.h> #include <linux/ioport.h> +#include <linux/mm.h> #include <asm/dma-coherence.h> #include <asm/mipsregs.h> @@ -72,9 +73,9 @@ void __init plat_mem_setup(void) iomem_resource.end = IOMEM_RESOURCE_END; } -#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_PCI) +#ifdef CONFIG_MIPS_FIXUP_BIGPHYS_ADDR /* This routine should be valid for all Au1x based boards */ -phys_addr_t __fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) +phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) { unsigned long start = ALCHEMY_PCI_MEMWIN_START; unsigned long end = ALCHEMY_PCI_MEMWIN_END; @@ -90,5 +91,13 @@ phys_addr_t __fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) /* default nop */ return phys_addr; } -EXPORT_SYMBOL(__fixup_bigphys_addr); -#endif + +int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long vaddr, + unsigned long pfn, unsigned long size, pgprot_t prot) +{ + phys_addr_t phys_addr = fixup_bigphys_addr(pfn << PAGE_SHIFT, size); + + return remap_pfn_range(vma, vaddr, phys_addr >> PAGE_SHIFT, size, prot); +} +EXPORT_SYMBOL(io_remap_pfn_range); +#endif /* CONFIG_MIPS_FIXUP_BIGPHYS_ADDR */ diff --git a/arch/mips/alchemy/devboards/db1550.c b/arch/mips/alchemy/devboards/db1550.c index 3e0c75c0ece0..752b93d91ac9 100644 --- a/arch/mips/alchemy/devboards/db1550.c +++ b/arch/mips/alchemy/devboards/db1550.c @@ -225,7 +225,7 @@ static void __init pb1550_nand_setup(void) case 0: case 2: case 8: case 0xC: case 0xD: /* x16 NAND Flash */ pb1550_nand_pd.devwidth = 1; - /* fallthrough */ + fallthrough; case 1: case 3: case 9: case 0xE: case 0xF: /* x8 NAND, already set up */ platform_device_register(&pb1550_nand_dev); diff --git a/arch/mips/ar7/Platform b/arch/mips/ar7/Platform index 21f9102d533c..a9257cc01c3c 100644 --- a/arch/mips/ar7/Platform +++ b/arch/mips/ar7/Platform @@ -1,6 +1,5 @@ # # Texas Instruments AR7 # -platform-$(CONFIG_AR7) += ar7/ cflags-$(CONFIG_AR7) += -I$(srctree)/arch/mips/include/asm/mach-ar7 load-$(CONFIG_AR7) += 0xffffffff94100000 diff --git a/arch/mips/ar7/setup.c b/arch/mips/ar7/setup.c index b3ffe7c898eb..352d5dbc777c 100644 --- a/arch/mips/ar7/setup.c +++ b/arch/mips/ar7/setup.c @@ -57,7 +57,7 @@ const char *get_system_type(void) case TITAN_CHIP_1060: return "TI AR7 (TNETV1060)"; } - /* fall through */ + fallthrough; default: return "TI AR7 (unknown)"; } diff --git a/arch/mips/ath25/Platform b/arch/mips/ath25/Platform index ef3f81fa080b..aef098b6f405 100644 --- a/arch/mips/ath25/Platform +++ b/arch/mips/ath25/Platform @@ -1,6 +1,5 @@ # # Atheros AR531X/AR231X WiSoC # -platform-$(CONFIG_ATH25) += ath25/ cflags-$(CONFIG_ATH25) += -I$(srctree)/arch/mips/include/asm/mach-ath25 load-$(CONFIG_ATH25) += 0xffffffff80041000 diff --git a/arch/mips/ath79/Platform b/arch/mips/ath79/Platform index 2bd663647d27..57744472ed2e 100644 --- a/arch/mips/ath79/Platform +++ b/arch/mips/ath79/Platform @@ -2,6 +2,5 @@ # Atheros AR71xx/AR724x/AR913x # -platform-$(CONFIG_ATH79) += ath79/ cflags-$(CONFIG_ATH79) += -I$(srctree)/arch/mips/include/asm/mach-ath79 load-$(CONFIG_ATH79) = 0xffffffff80060000 diff --git a/arch/mips/ath79/setup.c b/arch/mips/ath79/setup.c index acb4fd647a30..4b7c066ac88e 100644 --- a/arch/mips/ath79/setup.c +++ b/arch/mips/ath79/setup.c @@ -153,8 +153,7 @@ static void __init ath79_detect_sys_type(void) case REV_ID_MAJOR_QCA9533_V2: ver = 2; ath79_soc_rev = 2; - /* fall through */ - + fallthrough; case REV_ID_MAJOR_QCA9533: ath79_soc = ATH79_SOC_QCA9533; chip = "9533"; diff --git a/arch/mips/bcm47xx/Platform b/arch/mips/bcm47xx/Platform index 70783b75fd9d..833b204fe5da 100644 --- a/arch/mips/bcm47xx/Platform +++ b/arch/mips/bcm47xx/Platform @@ -1,7 +1,6 @@ # # Broadcom BCM47XX boards # -platform-$(CONFIG_BCM47XX) += bcm47xx/ cflags-$(CONFIG_BCM47XX) += \ -I$(srctree)/arch/mips/include/asm/mach-bcm47xx load-$(CONFIG_BCM47XX) := 0xffffffff80001000 diff --git a/arch/mips/bcm63xx/Platform b/arch/mips/bcm63xx/Platform index 5f86b2fff6de..882dc40f49a2 100644 --- a/arch/mips/bcm63xx/Platform +++ b/arch/mips/bcm63xx/Platform @@ -1,7 +1,6 @@ # # Broadcom BCM63XX boards # -platform-$(CONFIG_BCM63XX) += bcm63xx/ cflags-$(CONFIG_BCM63XX) += \ -I$(srctree)/arch/mips/include/asm/mach-bcm63xx/ load-$(CONFIG_BCM63XX) := 0xffffffff80010000 diff --git a/arch/mips/bcm63xx/cpu.c b/arch/mips/bcm63xx/cpu.c index f61c16f57a97..8e3e199dd35d 100644 --- a/arch/mips/bcm63xx/cpu.c +++ b/arch/mips/bcm63xx/cpu.c @@ -304,7 +304,7 @@ void __init bcm63xx_cpu_init(void) case CPU_BMIPS3300: if ((read_c0_prid() & PRID_IMP_MASK) != PRID_IMP_BMIPS3300_ALT) __cpu_name[cpu] = "Broadcom BCM6338"; - /* fall-through */ + fallthrough; case CPU_BMIPS32: chipid_reg = BCM_6345_PERF_BASE; break; diff --git a/arch/mips/bcm63xx/dev-flash.c b/arch/mips/bcm63xx/dev-flash.c index a1093934c616..f9cc015d3dc9 100644 --- a/arch/mips/bcm63xx/dev-flash.c +++ b/arch/mips/bcm63xx/dev-flash.c @@ -94,7 +94,7 @@ static int __init bcm63xx_detect_flash_type(void) case STRAPBUS_6368_BOOT_SEL_PARALLEL: return BCM63XX_FLASH_TYPE_PARALLEL; } - /* fall through */ + fallthrough; default: return -EINVAL; } diff --git a/arch/mips/bmips/Platform b/arch/mips/bmips/Platform index 5f127fd7f4b5..1434ea31ce85 100644 --- a/arch/mips/bmips/Platform +++ b/arch/mips/bmips/Platform @@ -1,7 +1,6 @@ # # Broadcom Generic BMIPS kernel # -platform-$(CONFIG_BMIPS_GENERIC) += bmips/ cflags-$(CONFIG_BMIPS_GENERIC) += \ -I$(srctree)/arch/mips/include/asm/mach-bmips/ load-$(CONFIG_BMIPS_GENERIC) := 0xffffffff80010000 diff --git a/arch/mips/boot/compressed/Makefile b/arch/mips/boot/compressed/Makefile index 0df0ee8a298d..6e56caef69f0 100644 --- a/arch/mips/boot/compressed/Makefile +++ b/arch/mips/boot/compressed/Makefile @@ -90,7 +90,7 @@ ifneq ($(zload-y),) VMLINUZ_LOAD_ADDRESS := $(zload-y) else VMLINUZ_LOAD_ADDRESS = $(shell $(obj)/calc_vmlinuz_load_addr \ - $(obj)/vmlinux.bin $(VMLINUX_LOAD_ADDRESS)) + $(obj)/vmlinux.bin $(LINKER_LOAD_ADDRESS)) endif UIMAGE_LOADADDR = $(VMLINUZ_LOAD_ADDRESS) diff --git a/arch/mips/boot/dts/Makefile b/arch/mips/boot/dts/Makefile index d429a69bfe30..19027129add8 100644 --- a/arch/mips/boot/dts/Makefile +++ b/arch/mips/boot/dts/Makefile @@ -1,17 +1,19 @@ # SPDX-License-Identifier: GPL-2.0 -subdir-y += brcm -subdir-y += cavium-octeon -subdir-y += img -subdir-y += ingenic -subdir-y += lantiq -subdir-y += loongson -subdir-y += mscc -subdir-y += mti -subdir-y += netlogic -subdir-y += ni -subdir-y += pic32 -subdir-y += qca -subdir-y += ralink -subdir-y += xilfpga +subdir-$(CONFIG_BMIPS_GENERIC) += brcm +subdir-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon +subdir-$(CONFIG_MACH_PISTACHIO) += img +subdir-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += img +subdir-$(CONFIG_MACH_INGENIC) += ingenic +subdir-$(CONFIG_LANTIQ) += lantiq +subdir-$(CONFIG_MACH_LOONGSON64) += loongson +subdir-$(CONFIG_MSCC_OCELOT) += mscc +subdir-$(CONFIG_MIPS_MALTA) += mti +subdir-$(CONFIG_LEGACY_BOARD_SEAD3) += mti +subdir-$(CONFIG_NLM_XLP_BOARD) += netlogic +subdir-$(CONFIG_FIT_IMAGE_FDT_NI169445) += ni +subdir-$(CONFIG_MACH_PIC32) += pic32 +subdir-$(CONFIG_ATH79) += qca +subdir-$(CONFIG_RALINK) += ralink +subdir-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += xilfpga obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix /, $(subdir-y)) diff --git a/arch/mips/boot/dts/ingenic/ci20.dts b/arch/mips/boot/dts/ingenic/ci20.dts index db0ca250bd1a..75f5bfbf2c37 100644 --- a/arch/mips/boot/dts/ingenic/ci20.dts +++ b/arch/mips/boot/dts/ingenic/ci20.dts @@ -386,6 +386,9 @@ interrupt-parent = <&gpe>; interrupts = <19 4>; + + nvmem-cells = <ð0_addr>; + nvmem-cell-names = "mac-address"; }; }; diff --git a/arch/mips/boot/dts/ingenic/gcw0.dts b/arch/mips/boot/dts/ingenic/gcw0.dts index f58d239c2058..8d22828787d8 100644 --- a/arch/mips/boot/dts/ingenic/gcw0.dts +++ b/arch/mips/boot/dts/ingenic/gcw0.dts @@ -4,6 +4,10 @@ #include "jz4770.dtsi" #include <dt-bindings/clock/ingenic,tcu.h> +#include <dt-bindings/gpio/gpio.h> +#include <dt-bindings/iio/adc/ingenic,adc.h> +#include <dt-bindings/input/input.h> + / { compatible = "gcw,zero", "ingenic,jz4770"; model = "GCW Zero"; @@ -15,20 +19,370 @@ serial3 = &uart3; }; + memory: memory { + device_type = "memory"; + reg = <0x0 0x10000000>, + <0x30000000 0x10000000>; + }; + chosen { stdout-path = "serial2:57600n8"; }; - board { - compatible = "simple-bus"; + vcc: regulator@0 { + compatible = "regulator-fixed"; + regulator-name = "vcc"; + + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + }; + + mmc1_power: regulator@1 { + compatible = "regulator-fixed"; + regulator-name = "mmc1_vcc"; + gpio = <&gpe 9 0>; + + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + vin-supply = <&vcc>; + }; + + headphones_amp: analog-amplifier@0 { + compatible = "simple-audio-amplifier"; + enable-gpios = <&gpf 3 GPIO_ACTIVE_LOW>; + enable-delay-ms = <50>; + + VCC-supply = <&ldo5>; + sound-name-prefix = "Headphones Amp"; + }; + + speaker_amp: analog-amplifier@1 { + compatible = "simple-audio-amplifier"; + enable-gpios = <&gpf 20 GPIO_ACTIVE_HIGH>; + + VCC-supply = <&ldo5>; + sound-name-prefix = "Speaker Amp"; + }; + + sound { + compatible = "simple-audio-card"; + + simple-audio-card,name = "gcw0-audio"; + simple-audio-card,format = "i2s"; + + simple-audio-card,widgets = + "Speaker", "Speaker", + "Headphone", "Headphones", + "Line", "FM Radio", + "Microphone", "Built-in Mic"; + simple-audio-card,routing = + "Headphones Amp INL", "LHPOUT", + "Headphones Amp INR", "RHPOUT", + "Headphones", "Headphones Amp OUTL", + "Headphones", "Headphones Amp OUTR", + "Speaker Amp INL", "LOUT", + "Speaker Amp INR", "ROUT", + "Speaker", "Speaker Amp OUTL", + "Speaker", "Speaker Amp OUTR", + "LLINEIN", "FM Radio", + "RLINEIN", "FM Radio", + "Built-in Mic", "MICBIAS", + "MIC1P", "Built-in Mic", + "MIC1N", "Built-in Mic"; + simple-audio-card,pin-switches = "Speaker", "Headphones"; + + simple-audio-card,hp-det-gpio = <&gpf 21 GPIO_ACTIVE_HIGH>; + simple-audio-card,aux-devs = <&speaker_amp>, <&headphones_amp>; + + simple-audio-card,bitclock-master = <&dai_codec>; + simple-audio-card,frame-master = <&dai_codec>; + + dai_cpu: simple-audio-card,cpu { + sound-dai = <&aic>; + }; + + dai_codec: simple-audio-card,codec { + sound-dai = <&codec>; + }; + }; + + rumble { + compatible = "pwm-vibrator"; + pwms = <&pwm 4 2000000 0>; + pwm-names = "enable"; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_pwm4>; + }; + + backlight: backlight { + compatible = "pwm-backlight"; + pwms = <&pwm 1 40000 0>; + power-supply = <&vcc>; + + brightness-levels = <0 16 32 48 64 80 96 112 128 + 144 160 176 192 208 224 240 255>; + default-brightness-level = <12>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_pwm1>; + }; + + gpio-keys { + compatible = "gpio-keys"; + #address-cells = <1>; + #size-cells = <0>; + + autorepeat; + + button@0 { + label = "D-pad up"; + linux,code = <KEY_UP>; + linux,can-disable; + gpios = <&gpe 21 GPIO_ACTIVE_LOW>; + }; + + button@1 { + label = "D-pad down"; + linux,code = <KEY_DOWN>; + linux,can-disable; + gpios = <&gpe 25 GPIO_ACTIVE_LOW>; + }; + + button@2 { + label = "D-pad left"; + linux,code = <KEY_LEFT>; + linux,can-disable; + gpios = <&gpe 23 GPIO_ACTIVE_LOW>; + }; + + button@3 { + label = "D-pad right"; + linux,code = <KEY_RIGHT>; + linux,can-disable; + gpios = <&gpe 24 GPIO_ACTIVE_LOW>; + }; + + button@4 { + label = "Button A"; + linux,code = <KEY_LEFTCTRL>; + linux,can-disable; + gpios = <&gpe 29 GPIO_ACTIVE_LOW>; + }; + + button@5 { + label = "Button B"; + linux,code = <KEY_LEFTALT>; + linux,can-disable; + gpios = <&gpe 20 GPIO_ACTIVE_LOW>; + }; + + button@6 { + label = "Button Y"; + linux,code = <KEY_SPACE>; + linux,can-disable; + gpios = <&gpe 27 GPIO_ACTIVE_LOW>; + }; + + button@7 { + label = "Button X"; + linux,code = <KEY_LEFTSHIFT>; + linux,can-disable; + gpios = <&gpe 28 GPIO_ACTIVE_LOW>; + }; + + button@8 { + label = "Left shoulder button"; + linux,code = <KEY_TAB>; + linux,can-disable; + gpios = <&gpb 20 GPIO_ACTIVE_LOW>; + }; + + button@9 { + label = "Right shoulder button"; + linux,code = <KEY_BACKSPACE>; + linux,can-disable; + gpios = <&gpe 26 GPIO_ACTIVE_LOW>; + }; + + button@10 { + label = "Start button"; + linux,code = <KEY_ENTER>; + linux,can-disable; + gpios = <&gpb 21 GPIO_ACTIVE_LOW>; + }; + + button@11 { + label = "Select button"; + linux,code = <KEY_ESC>; + linux,can-disable; + /* + * This is the only button that is active high, + * since it doubles as BOOT_SEL1. + */ + gpios = <&gpd 18 GPIO_ACTIVE_HIGH>; + }; + + button@12 { + label = "Power slider"; + linux,code = <KEY_POWER>; + linux,can-disable; + gpios = <&gpa 30 GPIO_ACTIVE_LOW>; + wakeup-source; + }; + + button@13 { + label = "Power hold"; + linux,code = <KEY_PAUSE>; + linux,can-disable; + gpios = <&gpf 11 GPIO_ACTIVE_LOW>; + }; + }; + + i2c3: i2c-controller@3 { + compatible = "i2c-gpio"; + #address-cells = <1>; + #size-cells = <0>; + + sda-gpios = <&gpd 5 GPIO_ACTIVE_HIGH>; + scl-gpios = <&gpd 4 GPIO_ACTIVE_HIGH>; + i2c-gpio,delay-us = <2>; /* 250 kHz */ + + act8600: pmic@5a { + compatible = "active-semi,act8600"; + reg = <0x5a>; + + regulators { + /* USB OTG */ + otg_vbus: SUDCDC_REG4 { + /* + * 5.3V instead of 5.0V to compensate + * for the voltage drop of a diode + * between the regulator and the + * connector. + */ + regulator-min-microvolt = <5300000>; + regulator-max-microvolt = <5300000>; + inl-supply = <&vcc>; + }; + + /* + * When this is off, there is no sound, but also + * no USB networking. + */ + ldo5: LDO5 { + regulator-min-microvolt = <2500000>; + regulator-max-microvolt = <2500000>; + inl-supply = <&vcc>; + }; + + /* LCD panel and FM radio */ + ldo6: LDO6 { + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + inl-supply = <&vcc>; + }; + + /* ??? */ + LDO7 { + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + /*regulator-always-on;*/ + inl-supply = <&vcc>; + }; + + /* + * The colors on the LCD are wrong when this is + * off. Which is strange, since the LCD panel + * data sheet only mentions a 3.3V input. + */ + LDO8 { + regulator-min-microvolt = <1800000>; + regulator-max-microvolt = <1800000>; + regulator-always-on; + inl-supply = <&vcc>; + }; + + /* RTC fixed 3.3V */ + LDO_REG9 { + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; + regulator-always-on; + inl-supply = <&vcc>; + }; + + /* Unused fixed 1.2V */ + LDO_REG10 { + inl-supply = <&vcc>; + }; + }; + }; + }; + + leds { + compatible = "gpio-leds"; + + led { + gpios = <&gpb 30 GPIO_ACTIVE_LOW>; + default-state = "on"; + }; + }; + + spi { + compatible = "spi-gpio"; #address-cells = <1>; - #size-cells = <1>; - ranges; + #size-cells = <0>; + + sck-gpios = <&gpe 15 GPIO_ACTIVE_HIGH>; + mosi-gpios = <&gpe 17 GPIO_ACTIVE_HIGH>; + cs-gpios = <&gpe 16 GPIO_ACTIVE_HIGH>; + num-chipselects = <1>; + + nt39016@0 { + compatible = "kingdisplay,kd035g6-54nt"; + reg = <0>; + + spi-max-frequency = <3125000>; + spi-3wire; + spi-cs-high; - otg_phy: otg-phy { - compatible = "usb-nop-xceiv"; - clocks = <&cgu JZ4770_CLK_OTG_PHY>; - clock-names = "main_clk"; + reset-gpios = <&gpe 2 GPIO_ACTIVE_LOW>; + + backlight = <&backlight>; + power-supply = <&ldo6>; + + port { + panel_input: endpoint { + remote-endpoint = <&panel_output>; + }; + }; + }; + }; + + connector { + compatible = "gpio-usb-b-connector", "usb-b-connector"; + label = "mini-USB"; + type = "mini"; + + /* + * USB OTG is not yet working reliably, the ID detection + * mechanism tends to fry easily for unknown reasons. + * Until this is fixed, disable OTG by not providing the + * ID GPIO to the driver. + */ + //id-gpios = <&gpf 18 GPIO_ACTIVE_LOW>; + + vbus-gpios = <&gpb 5 GPIO_ACTIVE_HIGH>; + vbus-supply = <&otg_vbus>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_otg>; + + port { + usb_ep: endpoint { + remote-endpoint = <&usb_otg_ep>; + }; }; }; }; @@ -37,24 +391,86 @@ clock-frequency = <12000000>; }; +&pinctrl { + pins_lcd: lcd { + function = "lcd"; + groups = "lcd-24bit"; + }; + + pins_uart2: uart2 { + function = "uart2"; + groups = "uart2-data"; + }; + + pins_mmc0: mmc0 { + function = "mmc0"; + groups = "mmc0-1bit-a", "mmc0-4bit-a"; + }; + + pins_mmc1: mmc1 { + function = "mmc1"; + groups = "mmc1-1bit-d", "mmc1-4bit-d"; + }; + + pins_otg: otg { + otg-vbus-pin { + function = "otg"; + groups = "otg-vbus"; + }; + + vbus-pin { + pins = "PB5"; + bias-disable; + }; + }; + + pins_pwm1: pwm1 { + function = "pwm1"; + groups = "pwm1"; + }; + + pins_pwm4: pwm4 { + function = "pwm4"; + groups = "pwm4"; + }; +}; + &uart2 { + pinctrl-names = "default"; + pinctrl-0 = <&pins_uart2>; + status = "okay"; }; &cgu { - /* Put high-speed peripherals under PLL1, such that we can change the + /* + * Put high-speed peripherals under PLL1, such that we can change the * PLL0 frequency on demand without having to suspend peripherals. * We use a rate of 432 MHz, which is the least common multiple of * 27 MHz (required by TV encoder) and 48 MHz (required by USB host). + * Put the GPU under PLL0 since we want a higher frequency. + * Use the 32 kHz oscillator as the parent of the RTC for a higher + * precision. */ assigned-clocks = <&cgu JZ4770_CLK_PLL1>, - <&cgu JZ4770_CLK_UHC>; + <&cgu JZ4770_CLK_GPU>, + <&cgu JZ4770_CLK_RTC>, + <&cgu JZ4770_CLK_UHC>, + <&cgu JZ4770_CLK_LPCLK_MUX>, + <&cgu JZ4770_CLK_MMC0_MUX>, + <&cgu JZ4770_CLK_MMC1_MUX>; assigned-clock-parents = <0>, + <&cgu JZ4770_CLK_PLL0>, + <&cgu JZ4770_CLK_OSC32K>, + <&cgu JZ4770_CLK_PLL1>, + <&cgu JZ4770_CLK_PLL1>, + <&cgu JZ4770_CLK_PLL1>, <&cgu JZ4770_CLK_PLL1>; assigned-clock-rates = - <432000000>; + <432000000>, + <600000000>; }; &uhc { @@ -63,10 +479,69 @@ }; &tcu { - /* 750 kHz for the system timer and clocksource */ - assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>; - assigned-clock-rates = <750000>, <750000>; + /* + * 750 kHz for the system timer and clocksource, 12 MHz for the OST, + * and use RTC as the parent for the watchdog clock + */ + assigned-clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER2>, + <&tcu TCU_CLK_OST>, <&tcu TCU_CLK_WDT>; + assigned-clock-parents = <0>, <0>, <0>, <&cgu JZ4770_CLK_RTC>; + assigned-clock-rates = <750000>, <750000>, <12000000>; - /* PWM1 is in use, so reserve channel #2 for the clocksource */ + /* PWM1 is in use, so use channel #2 for the clocksource */ ingenic,pwm-channels-mask = <0xfa>; }; + +&usb_otg { + port { + usb_otg_ep: endpoint { + remote-endpoint = <&usb_ep>; + }; + }; +}; + +&otg_phy { + vcc-supply = <&ldo5>; +}; + +&rtc { + clocks = <&cgu JZ4770_CLK_RTC>; + clock-names = "rtc"; + + system-power-controller; +}; + +&mmc0 { + status = "okay"; + + bus-width = <4>; + max-frequency = <48000000>; + vmmc-supply = <&vcc>; + non-removable; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_mmc0>; +}; + +&mmc1 { + status = "okay"; + + bus-width = <4>; + max-frequency = <48000000>; + cd-gpios = <&gpb 2 GPIO_ACTIVE_LOW>; + vmmc-supply = <&mmc1_power>; + + pinctrl-names = "default"; + pinctrl-0 = <&pins_mmc1>; +}; + +&lcd { + pinctrl-names = "default"; + pinctrl-0 = <&pins_lcd>; + + port { + panel_output: endpoint { + remote-endpoint = <&panel_input>; + }; + }; +}; diff --git a/arch/mips/boot/dts/ingenic/gcw0_proto.dts b/arch/mips/boot/dts/ingenic/gcw0_proto.dts new file mode 100644 index 000000000000..02df22f8ae0f --- /dev/null +++ b/arch/mips/boot/dts/ingenic/gcw0_proto.dts @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/dts-v1/; + +#include "gcw0.dts" + +/ { + model = "GCW Zero Prototype"; +}; + +&memory { + /* Prototype has only 256 MiB of RAM */ + reg = <0x0 0x10000000>; +}; diff --git a/arch/mips/boot/dts/ingenic/jz4740.dtsi b/arch/mips/boot/dts/ingenic/jz4740.dtsi index a3301bab9231..1520585c235c 100644 --- a/arch/mips/boot/dts/ingenic/jz4740.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4740.dtsi @@ -55,10 +55,10 @@ #clock-cells = <1>; - clocks = <&cgu JZ4740_CLK_RTC - &cgu JZ4740_CLK_EXT - &cgu JZ4740_CLK_PCLK - &cgu JZ4740_CLK_TCU>; + clocks = <&cgu JZ4740_CLK_RTC>, + <&cgu JZ4740_CLK_EXT>, + <&cgu JZ4740_CLK_PCLK>, + <&cgu JZ4740_CLK_TCU>; clock-names = "rtc", "ext", "pclk", "tcu"; interrupt-controller; @@ -74,6 +74,20 @@ clocks = <&tcu TCU_CLK_WDT>; clock-names = "wdt"; }; + + pwm: pwm@40 { + compatible = "ingenic,jz4740-pwm"; + reg = <0x40 0x80>; + + #pwm-cells = <3>; + + clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>, + <&tcu TCU_CLK_TIMER2>, <&tcu TCU_CLK_TIMER3>, + <&tcu TCU_CLK_TIMER4>, <&tcu TCU_CLK_TIMER5>, + <&tcu TCU_CLK_TIMER6>, <&tcu TCU_CLK_TIMER7>; + clock-names = "timer0", "timer1", "timer2", "timer3", + "timer4", "timer5", "timer6", "timer7"; + }; }; rtc_dev: rtc@10003000 { @@ -241,10 +255,10 @@ reg = <0x13010000 0x54>; #address-cells = <2>; #size-cells = <1>; - ranges = <1 0 0x18000000 0x4000000 - 2 0 0x14000000 0x4000000 - 3 0 0x0c000000 0x4000000 - 4 0 0x08000000 0x4000000>; + ranges = <1 0 0x18000000 0x4000000>, + <2 0 0x14000000 0x4000000>, + <3 0 0x0c000000 0x4000000>, + <4 0 0x08000000 0x4000000>; clocks = <&cgu JZ4740_CLK_MCLK>; }; @@ -258,8 +272,7 @@ dmac: dma-controller@13020000 { compatible = "ingenic,jz4740-dma"; - reg = <0x13020000 0xbc - 0x13020300 0x14>; + reg = <0x13020000 0xbc>, <0x13020300 0x14>; #dma-cells = <2>; interrupt-parent = <&intc>; diff --git a/arch/mips/boot/dts/ingenic/jz4770.dtsi b/arch/mips/boot/dts/ingenic/jz4770.dtsi index 0bfb9edff3d0..fa11ac950499 100644 --- a/arch/mips/boot/dts/ingenic/jz4770.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4770.dtsi @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <dt-bindings/clock/jz4770-cgu.h> +#include <dt-bindings/clock/ingenic,tcu.h> / { #address-cells = <1>; @@ -37,13 +38,25 @@ }; cgu: jz4770-cgu@10000000 { - compatible = "ingenic,jz4770-cgu"; + compatible = "ingenic,jz4770-cgu", "simple-mfd"; reg = <0x10000000 0x100>; + #address-cells = <1>; + #size-cells = <1>; + ranges = <0x0 0x10000000 0x100>; clocks = <&ext>, <&osc32k>; clock-names = "ext", "osc32k"; #clock-cells = <1>; + + otg_phy: usb-phy@3c { + compatible = "ingenic,jz4770-phy"; + reg = <0x3c 0x10>; + + clocks = <&cgu JZ4770_CLK_OTG_PHY>; + + #phy-cells = <0>; + }; }; tcu: timer@10002000 { @@ -55,9 +68,9 @@ #clock-cells = <1>; - clocks = <&cgu JZ4770_CLK_RTC - &cgu JZ4770_CLK_EXT - &cgu JZ4770_CLK_PCLK>; + clocks = <&cgu JZ4770_CLK_RTC>, + <&cgu JZ4770_CLK_EXT>, + <&cgu JZ4770_CLK_PCLK>; clock-names = "rtc", "ext", "pclk"; interrupt-controller; @@ -65,6 +78,47 @@ interrupt-parent = <&intc>; interrupts = <27 26 25>; + + watchdog: watchdog@0 { + compatible = "ingenic,jz4770-watchdog", + "ingenic,jz4740-watchdog"; + reg = <0x0 0xc>; + + clocks = <&tcu TCU_CLK_WDT>; + clock-names = "wdt"; + }; + + pwm: pwm@40 { + compatible = "ingenic,jz4770-pwm", "ingenic,jz4740-pwm"; + reg = <0x40 0x80>; + + #pwm-cells = <3>; + + clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>, + <&tcu TCU_CLK_TIMER2>, <&tcu TCU_CLK_TIMER3>, + <&tcu TCU_CLK_TIMER4>, <&tcu TCU_CLK_TIMER5>, + <&tcu TCU_CLK_TIMER6>, <&tcu TCU_CLK_TIMER7>; + clock-names = "timer0", "timer1", "timer2", "timer3", + "timer4", "timer5", "timer6", "timer7"; + }; + + ost: timer@e0 { + compatible = "ingenic,jz4770-ost"; + reg = <0xe0 0x20>; + + clocks = <&tcu TCU_CLK_OST>; + clock-names = "ost"; + + interrupts = <15>; + }; + }; + + rtc: rtc@10003000 { + compatible = "ingenic,jz4770-rtc", "ingenic,jz4760-rtc"; + reg = <0x10003000 0x40>; + + interrupt-parent = <&intc>; + interrupts = <32>; }; pinctrl: pin-controller@10010000 { @@ -165,6 +219,93 @@ }; }; + aic: audio-controller@10020000 { + compatible = "ingenic,jz4770-i2s"; + reg = <0x10020000 0x94>; + + #sound-dai-cells = <0>; + + clocks = <&cgu JZ4770_CLK_AIC>, <&cgu JZ4770_CLK_I2S>, + <&cgu JZ4770_CLK_EXT>, <&cgu JZ4770_CLK_PLL0>; + clock-names = "aic", "i2s", "ext", "pll half"; + + interrupt-parent = <&intc>; + interrupts = <34>; + + dmas = <&dmac0 25 0xffffffff>, <&dmac0 24 0xffffffff>; + dma-names = "rx", "tx"; + }; + + codec: audio-codec@100200a0 { + compatible = "ingenic,jz4770-codec"; + reg = <0x100200a4 0x8>; + + #sound-dai-cells = <0>; + + clocks = <&cgu JZ4770_CLK_AIC>; + clock-names = "aic"; + }; + + mmc0: mmc@10021000 { + compatible = "ingenic,jz4770-mmc", "ingenic,jz4760-mmc"; + reg = <0x10021000 0x1000>; + + clocks = <&cgu JZ4770_CLK_MMC0>; + clock-names = "mmc"; + + interrupt-parent = <&intc>; + interrupts = <37>; + + dmas = <&dmac1 27 0xffffffff>, <&dmac1 26 0xffffffff>; + dma-names = "rx", "tx"; + + cap-sd-highspeed; + cap-mmc-highspeed; + cap-sdio-irq; + + status = "disabled"; + }; + + mmc1: mmc@10022000 { + compatible = "ingenic,jz4770-mmc", "ingenic,jz4760-mmc"; + reg = <0x10022000 0x1000>; + + clocks = <&cgu JZ4770_CLK_MMC1>; + clock-names = "mmc"; + + interrupt-parent = <&intc>; + interrupts = <36>; + + dmas = <&dmac1 31 0xffffffff>, <&dmac1 30 0xffffffff>; + dma-names = "rx", "tx"; + + cap-sd-highspeed; + cap-mmc-highspeed; + cap-sdio-irq; + + status = "disabled"; + }; + + mmc2: mmc@10023000 { + compatible = "ingenic,jz4770-mmc", "ingenic,jz4760-mmc"; + reg = <0x10023000 0x1000>; + + clocks = <&cgu JZ4770_CLK_MMC2>; + clock-names = "mmc"; + + interrupt-parent = <&intc>; + interrupts = <35>; + + dmas = <&dmac1 37 0xffffffff>, <&dmac1 36 0xffffffff>; + dma-names = "rx", "tx"; + + cap-sd-highspeed; + cap-mmc-highspeed; + cap-sdio-irq; + + status = "disabled"; + }; + uart0: serial@10030000 { compatible = "ingenic,jz4770-uart"; reg = <0x10030000 0x100>; @@ -217,34 +358,63 @@ status = "disabled"; }; + adc: adc@10070000 { + compatible = "ingenic,jz4770-adc"; + reg = <0x10070000 0x30>; + + #io-channel-cells = <1>; + + clocks = <&cgu JZ4770_CLK_ADC>; + clock-names = "adc"; + + interrupt-parent = <&intc>; + interrupts = <18>; + }; + + gpu: gpu@13040000 { + compatible = "vivante,gc"; + reg = <0x13040000 0x10000>; + + clocks = <&cgu JZ4770_CLK_GPU>, + <&cgu JZ4770_CLK_GPU>, + <&cgu JZ4770_CLK_GPU>; + clock-names = "bus", "core", "shader"; + + interrupt-parent = <&intc>; + interrupts = <6>; + }; + + lcd: lcd-controller@13050000 { + compatible = "ingenic,jz4770-lcd"; + reg = <0x13050000 0x300>; + + interrupt-parent = <&intc>; + interrupts = <31>; + + clocks = <&cgu JZ4770_CLK_LPCLK_MUX>; + clock-names = "lcd_pclk"; + }; + dmac0: dma-controller@13420000 { compatible = "ingenic,jz4770-dma"; - reg = <0x13420000 0xC0 - 0x13420300 0x20>; + reg = <0x13420000 0xC0>, <0x13420300 0x20>; - #dma-cells = <1>; + #dma-cells = <2>; clocks = <&cgu JZ4770_CLK_DMA>; interrupt-parent = <&intc>; interrupts = <24>; - - /* Disable dmac0 until we have something that uses it */ - status = "disabled"; }; dmac1: dma-controller@13420100 { compatible = "ingenic,jz4770-dma"; - reg = <0x13420100 0xC0 - 0x13420400 0x20>; + reg = <0x13420100 0xC0>, <0x13420400 0x20>; - #dma-cells = <1>; + #dma-cells = <2>; clocks = <&cgu JZ4770_CLK_DMA>; interrupt-parent = <&intc>; interrupts = <23>; - - /* Disable dmac1 until we have something that uses it */ - status = "disabled"; }; uhc: uhc@13430000 { @@ -260,4 +430,29 @@ status = "disabled"; }; + + usb_otg: usb@13440000 { + compatible = "ingenic,jz4770-musb"; + reg = <0x13440000 0x10000>; + + clocks = <&cgu JZ4770_CLK_OTG>; + clock-names = "udc"; + + interrupt-parent = <&intc>; + interrupts = <21>; + interrupt-names = "mc"; + + phys = <&otg_phy>; + + usb-role-switch; + }; + + rom: memory@1fc00000 { + compatible = "mtd-rom"; + probe-type = "map_rom"; + reg = <0x1fc00000 0x2000>; + + bank-width = <4>; + device-width = <1>; + }; }; diff --git a/arch/mips/boot/dts/ingenic/jz4780.dtsi b/arch/mips/boot/dts/ingenic/jz4780.dtsi index bb89653d16a3..b7f409a7cf5d 100644 --- a/arch/mips/boot/dts/ingenic/jz4780.dtsi +++ b/arch/mips/boot/dts/ingenic/jz4780.dtsi @@ -58,9 +58,9 @@ #clock-cells = <1>; - clocks = <&cgu JZ4780_CLK_RTCLK - &cgu JZ4780_CLK_EXCLK - &cgu JZ4780_CLK_PCLK>; + clocks = <&cgu JZ4780_CLK_RTCLK>, + <&cgu JZ4780_CLK_EXCLK>, + <&cgu JZ4780_CLK_PCLK>; clock-names = "rtc", "ext", "pclk"; interrupt-controller; @@ -76,6 +76,30 @@ clocks = <&tcu TCU_CLK_WDT>; clock-names = "wdt"; }; + + pwm: pwm@40 { + compatible = "ingenic,jz4780-pwm", "ingenic,jz4740-pwm"; + reg = <0x40 0x80>; + + #pwm-cells = <3>; + + clocks = <&tcu TCU_CLK_TIMER0>, <&tcu TCU_CLK_TIMER1>, + <&tcu TCU_CLK_TIMER2>, <&tcu TCU_CLK_TIMER3>, + <&tcu TCU_CLK_TIMER4>, <&tcu TCU_CLK_TIMER5>, + <&tcu TCU_CLK_TIMER6>, <&tcu TCU_CLK_TIMER7>; + clock-names = "timer0", "timer1", "timer2", "timer3", + "timer4", "timer5", "timer6", "timer7"; + }; + + ost: timer@e0 { + compatible = "ingenic,jz4780-ost", "ingenic,jz4770-ost"; + reg = <0xe0 0x20>; + + clocks = <&tcu TCU_CLK_OST>; + clock-names = "ost"; + + interrupts = <15>; + }; }; rtc_dev: rtc@10003000 { @@ -196,8 +220,7 @@ gpio-miso = <&gpe 14 0>; gpio-sck = <&gpe 15 0>; gpio-mosi = <&gpe 17 0>; - cs-gpios = <&gpe 16 0 - &gpe 18 0>; + cs-gpios = <&gpe 16 0>, <&gpe 18 0>; spidev@0 { compatible = "spidev"; @@ -358,26 +381,40 @@ }; nemc: nemc@13410000 { - compatible = "ingenic,jz4780-nemc"; + compatible = "ingenic,jz4780-nemc", "simple-mfd"; reg = <0x13410000 0x10000>; #address-cells = <2>; #size-cells = <1>; - ranges = <1 0 0x1b000000 0x1000000 - 2 0 0x1a000000 0x1000000 - 3 0 0x19000000 0x1000000 - 4 0 0x18000000 0x1000000 - 5 0 0x17000000 0x1000000 - 6 0 0x16000000 0x1000000>; + ranges = <0 0 0x13410000 0x10000>, + <1 0 0x1b000000 0x1000000>, + <2 0 0x1a000000 0x1000000>, + <3 0 0x19000000 0x1000000>, + <4 0 0x18000000 0x1000000>, + <5 0 0x17000000 0x1000000>, + <6 0 0x16000000 0x1000000>; clocks = <&cgu JZ4780_CLK_NEMC>; status = "disabled"; + + efuse: efuse@d0 { + reg = <0 0xd0 0x30>; + compatible = "ingenic,jz4780-efuse"; + + clocks = <&cgu JZ4780_CLK_AHB2>; + + #address-cells = <1>; + #size-cells = <1>; + + eth0_addr: eth-mac-addr@0x22 { + reg = <0x22 0x6>; + }; + }; }; dma: dma@13420000 { compatible = "ingenic,jz4780-dma"; - reg = <0x13420000 0x400 - 0x13421000 0x40>; + reg = <0x13420000 0x400>, <0x13421000 0x40>; #dma-cells = <2>; interrupt-parent = <&intc>; diff --git a/arch/mips/boot/dts/ingenic/x1000.dtsi b/arch/mips/boot/dts/ingenic/x1000.dtsi index 147f7d5c243a..59a63a0985a8 100644 --- a/arch/mips/boot/dts/ingenic/x1000.dtsi +++ b/arch/mips/boot/dts/ingenic/x1000.dtsi @@ -58,9 +58,9 @@ #clock-cells = <1>; - clocks = <&cgu X1000_CLK_RTCLK - &cgu X1000_CLK_EXCLK - &cgu X1000_CLK_PCLK>; + clocks = <&cgu X1000_CLK_RTCLK>, + <&cgu X1000_CLK_EXCLK>, + <&cgu X1000_CLK_PCLK>; clock-names = "rtc", "ext", "pclk"; interrupt-controller; @@ -239,8 +239,7 @@ pdma: dma-controller@13420000 { compatible = "ingenic,x1000-dma"; - reg = <0x13420000 0x400 - 0x13421000 0x40>; + reg = <0x13420000 0x400>, <0x13421000 0x40>; #dma-cells = <2>; interrupt-parent = <&intc>; diff --git a/arch/mips/boot/dts/loongson/rs780e-pch.dtsi b/arch/mips/boot/dts/loongson/rs780e-pch.dtsi index 45c54d555fa4..d0d5d60a8697 100644 --- a/arch/mips/boot/dts/loongson/rs780e-pch.dtsi +++ b/arch/mips/boot/dts/loongson/rs780e-pch.dtsi @@ -9,6 +9,18 @@ 0 0x40000000 0 0x40000000 0 0x40000000 0xfd 0xfe000000 0xfd 0xfe000000 0 0x2000000 /* PCI Config Space */>; + pci@1a000000 { + compatible = "loongson,rs780e-pci"; + device_type = "pci"; + #address-cells = <3>; + #size-cells = <2>; + + reg = <0 0x1a000000 0 0x02000000>; + + ranges = <0x01000000 0 0x00004000 0 0x18004000 0 0x00004000>, + <0x02000000 0 0x40000000 0 0x40000000 0 0x40000000>; + }; + isa { compatible = "isa"; #address-cells = <2>; @@ -21,6 +33,11 @@ interrupts = <8>; interrupt-parent = <&htpic>; }; + + acpi@800 { + compatible = "loongson,rs780e-acpi"; + reg = <1 0x800 0x100>; + }; }; }; }; diff --git a/arch/mips/boot/dts/mscc/ocelot.dtsi b/arch/mips/boot/dts/mscc/ocelot.dtsi index 797d336db54d..f94e8a02ed06 100644 --- a/arch/mips/boot/dts/mscc/ocelot.dtsi +++ b/arch/mips/boot/dts/mscc/ocelot.dtsi @@ -214,7 +214,7 @@ miim1: miim1 { pins = "GPIO_14", "GPIO_15"; - function = "miim1"; + function = "miim"; }; }; diff --git a/arch/mips/boot/dts/qca/ar9331.dtsi b/arch/mips/boot/dts/qca/ar9331.dtsi index 8f5aed760abb..83b3c0ce135a 100644 --- a/arch/mips/boot/dts/qca/ar9331.dtsi +++ b/arch/mips/boot/dts/qca/ar9331.dtsi @@ -59,7 +59,7 @@ #qca,ddr-wb-channel-cells = <1>; }; - uart: uart@18020000 { + uart: serial@18020000 { compatible = "qca,ar9330-uart"; reg = <0x18020000 0x14>; diff --git a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts index 0f2b20044834..7695d326df11 100644 --- a/arch/mips/boot/dts/qca/ar9331_dpt_module.dts +++ b/arch/mips/boot/dts/qca/ar9331_dpt_module.dts @@ -3,6 +3,7 @@ #include <dt-bindings/gpio/gpio.h> #include <dt-bindings/input/input.h> +#include <dt-bindings/leds/common.h> #include "ar9331.dtsi" @@ -22,8 +23,9 @@ leds { compatible = "gpio-leds"; - system { - label = "dpt-module:green:system"; + led-0 { + function = LED_FUNCTION_STATUS; + color = <LED_COLOR_ID_GREEN>; gpios = <&gpio 27 GPIO_ACTIVE_LOW>; default-state = "off"; }; diff --git a/arch/mips/cavium-octeon/Platform b/arch/mips/cavium-octeon/Platform index 45be853700e6..4adef38dea9d 100644 --- a/arch/mips/cavium-octeon/Platform +++ b/arch/mips/cavium-octeon/Platform @@ -1,7 +1,6 @@ # # Cavium Octeon # -platform-$(CONFIG_CAVIUM_OCTEON_SOC) += cavium-octeon/ cflags-$(CONFIG_CAVIUM_OCTEON_SOC) += \ -I$(srctree)/arch/mips/include/asm/mach-cavium-octeon load-$(CONFIG_CAVIUM_OCTEON_SOC) += 0xffffffff81100000 diff --git a/arch/mips/cavium-octeon/crypto/octeon-md5.c b/arch/mips/cavium-octeon/crypto/octeon-md5.c index d1ed066e1a17..8c8ea139653e 100644 --- a/arch/mips/cavium-octeon/crypto/octeon-md5.c +++ b/arch/mips/cavium-octeon/crypto/octeon-md5.c @@ -25,7 +25,6 @@ #include <linux/module.h> #include <linux/string.h> #include <asm/byteorder.h> -#include <linux/cryptohash.h> #include <asm/octeon/octeon.h> #include <crypto/internal/hash.h> diff --git a/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c b/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c index cc94cfa545b4..cb210d2ef0c4 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c +++ b/arch/mips/cavium-octeon/executive/cvmx-helper-npi.c @@ -59,18 +59,6 @@ int __cvmx_helper_npi_probe(int interface) && !OCTEON_IS_MODEL(OCTEON_CN52XX_PASS1_X)) /* The packet engines didn't exist before pass 2 */ return 4; -#if 0 - /* - * Technically CN30XX, CN31XX, and CN50XX contain packet - * engines, but nobody ever uses them. Since this is the case, - * we disable them here. - */ - else if (OCTEON_IS_MODEL(OCTEON_CN31XX) - || OCTEON_IS_MODEL(OCTEON_CN50XX)) - return 2; - else if (OCTEON_IS_MODEL(OCTEON_CN30XX)) - return 1; -#endif #endif return 0; } diff --git a/arch/mips/cavium-octeon/executive/cvmx-pko.c b/arch/mips/cavium-octeon/executive/cvmx-pko.c index b077597c668a..b0efc35e95c4 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-pko.c +++ b/arch/mips/cavium-octeon/executive/cvmx-pko.c @@ -489,7 +489,7 @@ cvmx_pko_status_t cvmx_pko_config_port(uint64_t port, uint64_t base_queue, config.s.qos_mask = 0xff; break; } - /* fall through - to the error case, when Pass 1 */ + fallthrough; /* to the error case, when Pass 1 */ default: cvmx_dprintf("ERROR: cvmx_pko_config_port: Invalid " "priority %llu\n", diff --git a/arch/mips/cavium-octeon/octeon-platform.c b/arch/mips/cavium-octeon/octeon-platform.c index 51685f893eab..d56e9b9d2e43 100644 --- a/arch/mips/cavium-octeon/octeon-platform.c +++ b/arch/mips/cavium-octeon/octeon-platform.c @@ -141,7 +141,7 @@ static void octeon2_usb_clocks_start(struct device *dev) default: pr_err("Invalid UCTL clock rate of %u, using 12000000 instead\n", clock_rate); - /* Fall through */ + fallthrough; case 12000000: clk_rst_ctl.s.p_refclk_div = 0; break; @@ -1116,7 +1116,7 @@ end_led: new_f[0] = cpu_to_be32(48000000); fdt_setprop_inplace(initial_boot_params, usbn, "refclk-frequency", new_f, sizeof(new_f)); - /* Fall through ...*/ + fallthrough; case USB_CLOCK_TYPE_REF_12: /* Missing "refclk-type" defaults to external. */ fdt_nop_property(initial_boot_params, usbn, "refclk-type"); diff --git a/arch/mips/cavium-octeon/octeon-usb.c b/arch/mips/cavium-octeon/octeon-usb.c index cc88a08bc1f7..1fd85c559700 100644 --- a/arch/mips/cavium-octeon/octeon-usb.c +++ b/arch/mips/cavium-octeon/octeon-usb.c @@ -398,7 +398,7 @@ static int dwc3_octeon_clocks_start(struct device *dev, u64 base) default: dev_err(dev, "Invalid ref_clk %u, using 100000000 instead\n", clock_rate); - /* fall through */ + fallthrough; case 100000000: mpll_mul = 0x19; if (ref_clk_sel < 2) diff --git a/arch/mips/cobalt/Platform b/arch/mips/cobalt/Platform index 34123efd6dfe..4254895ad6f4 100644 --- a/arch/mips/cobalt/Platform +++ b/arch/mips/cobalt/Platform @@ -1,6 +1,5 @@ # # Cobalt Server # -platform-$(CONFIG_MIPS_COBALT) += cobalt/ cflags-$(CONFIG_MIPS_COBALT) += -I$(srctree)/arch/mips/include/asm/mach-cobalt load-$(CONFIG_MIPS_COBALT) += 0xffffffff80080000 diff --git a/arch/mips/configs/ath79_defconfig b/arch/mips/configs/ath79_defconfig index 3d14d67dc746..96622a2ad333 100644 --- a/arch/mips/configs/ath79_defconfig +++ b/arch/mips/configs/ath79_defconfig @@ -46,7 +46,6 @@ CONFIG_MTD_JEDECPROBE=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y CONFIG_NETDEVICES=y CONFIG_ATH9K=m diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig index 0db0088bbc1c..e511fe0243a5 100644 --- a/arch/mips/configs/ci20_defconfig +++ b/arch/mips/configs/ci20_defconfig @@ -1,5 +1,4 @@ # CONFIG_LOCALVERSION_AUTO is not set -CONFIG_MODULES=y CONFIG_KERNEL_XZ=y CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y @@ -29,6 +28,7 @@ CONFIG_HIGHMEM=y CONFIG_HZ_100=y # CONFIG_SECCOMP is not set # CONFIG_SUSPEND is not set +CONFIG_MODULES=y # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set # CONFIG_COMPACTION is not set CONFIG_CMA=y @@ -38,17 +38,12 @@ CONFIG_UNIX=y CONFIG_INET=y CONFIG_IP_PNP=y CONFIG_IP_PNP_DHCP=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set # CONFIG_INET_DIAG is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set CONFIG_DEVTMPFS=y # CONFIG_FW_LOADER is not set # CONFIG_ALLOW_DEV_COREDUMP is not set -CONFIG_DMA_CMA=y -CONFIG_CMA_SIZE_MBYTES=32 CONFIG_MTD=y CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_JZ4780=y @@ -72,9 +67,8 @@ CONFIG_DM9000_FORCE_SIMPLE_PHY_POLL=y # CONFIG_NET_VENDOR_VIA is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_WLAN is not set -# CONFIG_INPUT_KEYBOARD is not set +CONFIG_KEYBOARD_GPIO=m # CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set CONFIG_VT_HW_CONSOLE_BINDING=y CONFIG_LEGACY_PTY_COUNT=2 CONFIG_SERIAL_8250=y @@ -89,7 +83,7 @@ CONFIG_I2C_JZ4780=y CONFIG_SPI=y CONFIG_SPI_GPIO=y CONFIG_GPIO_SYSFS=y -CONFIG_KEYBOARD_GPIO=m +CONFIG_POWER_SUPPLY=y # CONFIG_HWMON is not set CONFIG_WATCHDOG=y CONFIG_JZ4740_WDT=y @@ -97,17 +91,45 @@ CONFIG_REGULATOR=y CONFIG_REGULATOR_DEBUG=y CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_REGULATOR_ACT8865=y +CONFIG_RC_CORE=m +CONFIG_LIRC=y +CONFIG_RC_DEVICES=y +CONFIG_IR_GPIO_CIR=m +CONFIG_IR_GPIO_TX=m +CONFIG_MEDIA_SUPPORT=m # CONFIG_VGA_CONSOLE is not set # CONFIG_HID is not set # CONFIG_USB_SUPPORT is not set CONFIG_MMC=y CONFIG_MMC_JZ4740=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=y +CONFIG_LEDS_TRIGGER_ONESHOT=y +CONFIG_LEDS_TRIGGER_MTD=y +CONFIG_LEDS_TRIGGER_HEARTBEAT=y +CONFIG_LEDS_TRIGGER_BACKLIGHT=m +CONFIG_LEDS_TRIGGER_CPU=y +CONFIG_LEDS_TRIGGER_ACTIVITY=y +CONFIG_LEDS_TRIGGER_GPIO=y +CONFIG_LEDS_TRIGGER_DEFAULT_ON=y +CONFIG_LEDS_TRIGGER_TRANSIENT=y +CONFIG_LEDS_TRIGGER_CAMERA=m +CONFIG_LEDS_TRIGGER_PANIC=y +CONFIG_LEDS_TRIGGER_NETDEV=y +CONFIG_LEDS_TRIGGER_PATTERN=y +CONFIG_LEDS_TRIGGER_AUDIO=y CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_JZ4740=y CONFIG_DMADEVICES=y CONFIG_DMA_JZ4780=y +CONFIG_INGENIC_OST=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_MEMORY=y +CONFIG_PWM=y +CONFIG_PWM_JZ4740=m CONFIG_EXT4_FS=y # CONFIG_DNOTIFY is not set CONFIG_PROC_KCORE=y @@ -156,11 +178,13 @@ CONFIG_NLS_ISO8859_15=y CONFIG_NLS_KOI8_R=y CONFIG_NLS_KOI8_U=y CONFIG_NLS_UTF8=y +CONFIG_DMA_CMA=y +CONFIG_CMA_SIZE_MBYTES=32 CONFIG_PRINTK_TIME=y CONFIG_DEBUG_INFO=y CONFIG_STRIP_ASM_SYMS=y -CONFIG_DEBUG_FS=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_FS=y CONFIG_PANIC_ON_OOPS=y CONFIG_PANIC_TIMEOUT=10 # CONFIG_SCHED_DEBUG is not set @@ -169,21 +193,3 @@ CONFIG_STACKTRACE=y # CONFIG_FTRACE is not set CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="earlycon console=ttyS4,115200 clk_ignore_unused" -CONFIG_LEDS_CLASS=y -CONFIG_LEDS_GPIO=y -CONFIG_LEDS_TRIGGERS=y -CONFIG_LEDS_TRIGGER_MTD=y -CONFIG_LEDS_TRIGGER_TIMER=y -CONFIG_LEDS_TRIGGER_ONESHOT=y -CONFIG_LEDS_TRIGGER_ONESHOT=y -CONFIG_LEDS_TRIGGER_HEARTBEAT=y -CONFIG_LEDS_TRIGGER_BACKLIGHT=m -CONFIG_LEDS_TRIGGER_CPU=y -CONFIG_LEDS_TRIGGER_DEFAULT_ON=y -CONFIG_LEDS_TRIGGER_TRANSIENT=y -CONFIG_LEDS_TRIGGER_CAMERA=m -CONFIG_LIRC=y -CONFIG_MEDIA_SUPPORT=m -CONFIG_RC_DEVICES=y -CONFIG_IR_GPIO_CIR=m -CONFIG_IR_GPIO_TX=m diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig index e6f3e8e3da39..b8bd66300996 100644 --- a/arch/mips/configs/db1xxx_defconfig +++ b/arch/mips/configs/db1xxx_defconfig @@ -92,7 +92,6 @@ CONFIG_MTD_CFI=y CONFIG_MTD_CFI_ADV_OPTIONS=y CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_PHYSMAP=y -CONFIG_MTD_M25P80=y CONFIG_MTD_SST25L=y CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_ECC_SW_BCH=y diff --git a/arch/mips/configs/gcw0_defconfig b/arch/mips/configs/gcw0_defconfig index a3e3eb3c5a8b..48131cb47e66 100644 --- a/arch/mips/configs/gcw0_defconfig +++ b/arch/mips/configs/gcw0_defconfig @@ -1,27 +1,152 @@ +CONFIG_DEFAULT_HOSTNAME="gcw0" CONFIG_NO_HZ_IDLE=y CONFIG_HIGH_RES_TIMERS=y CONFIG_PREEMPT_VOLUNTARY=y CONFIG_EMBEDDED=y +CONFIG_PROFILING=y CONFIG_MACH_INGENIC=y CONFIG_JZ4770_GCW0=y CONFIG_HIGHMEM=y # CONFIG_SECCOMP is not set -# CONFIG_SUSPEND is not set +CONFIG_MIPS_RAW_APPENDED_DTB=y +CONFIG_MIPS_CMDLINE_DTB_EXTEND=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set # CONFIG_BOUNCE is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y CONFIG_INET=y +CONFIG_CFG80211=y +CONFIG_MAC80211=m CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_LOOP_MIN_COUNT=0 CONFIG_NETDEVICES=y +# CONFIG_ETHERNET is not set +# CONFIG_WLAN_VENDOR_ADMTEK is not set +# CONFIG_WLAN_VENDOR_ATH is not set +# CONFIG_WLAN_VENDOR_ATMEL is not set +# CONFIG_WLAN_VENDOR_BROADCOM is not set +# CONFIG_WLAN_VENDOR_CISCO is not set +# CONFIG_WLAN_VENDOR_INTEL is not set +# CONFIG_WLAN_VENDOR_INTERSIL is not set +# CONFIG_WLAN_VENDOR_MARVELL is not set +# CONFIG_WLAN_VENDOR_MEDIATEK is not set +# CONFIG_WLAN_VENDOR_RALINK is not set +CONFIG_RTL8192CU=m +# CONFIG_RTLWIFI_DEBUG is not set +# CONFIG_WLAN_VENDOR_RSI is not set +# CONFIG_WLAN_VENDOR_ST is not set +# CONFIG_WLAN_VENDOR_TI is not set +# CONFIG_WLAN_VENDOR_ZYDAS is not set +# CONFIG_WLAN_VENDOR_QUANTENNA is not set +CONFIG_INPUT_EVDEV=y +# CONFIG_KEYBOARD_ATKBD is not set +CONFIG_KEYBOARD_GPIO=y +# CONFIG_INPUT_MOUSE is not set +CONFIG_INPUT_JOYSTICK=y +CONFIG_INPUT_MISC=y +CONFIG_INPUT_UINPUT=y +CONFIG_INPUT_PWM_VIBRA=y +# CONFIG_SERIO is not set CONFIG_SERIAL_8250=y # CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_8250_INGENIC=y +CONFIG_HW_RANDOM=y +CONFIG_I2C_GPIO=y +CONFIG_SPI=y +CONFIG_SPI_GPIO=y +CONFIG_POWER_SUPPLY=y +# CONFIG_HWMON is not set +CONFIG_WATCHDOG=y +CONFIG_JZ4740_WDT=y +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y +CONFIG_REGULATOR_ACT8865=y +CONFIG_DRM=y +CONFIG_DRM_FBDEV_OVERALLOC=300 +CONFIG_DRM_PANEL_NOVATEK_NT39016=y +CONFIG_DRM_INGENIC=y +CONFIG_DRM_ETNAVIV=y +CONFIG_BACKLIGHT_CLASS_DEVICE=y +# CONFIG_BACKLIGHT_GENERIC is not set +CONFIG_BACKLIGHT_PWM=y +# CONFIG_VGA_CONSOLE is not set +CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +# CONFIG_SND_SUPPORT_OLD_API is not set +# CONFIG_SND_PROC_FS is not set +# CONFIG_SND_DRIVERS is not set +# CONFIG_SND_SPI is not set +# CONFIG_SND_MIPS is not set +# CONFIG_SND_USB is not set +CONFIG_SND_SOC=y +CONFIG_SND_JZ4740_SOC_I2S=y +CONFIG_SND_SOC_JZ4770_CODEC=y +CONFIG_SND_SOC_SIMPLE_AMPLIFIER=y +CONFIG_SND_SIMPLE_CARD=y +CONFIG_USB_CONN_GPIO=y CONFIG_USB=y +CONFIG_USB_OTG=y +CONFIG_USB_OTG_BLACKLIST_HUB=y CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y -CONFIG_NOP_USB_XCEIV=y -CONFIG_TMPFS=y +CONFIG_USB_MUSB_HDRC=y +CONFIG_USB_MUSB_GADGET=y +CONFIG_USB_MUSB_JZ4740=y +CONFIG_USB_INVENTRA_DMA=y +CONFIG_JZ4770_PHY=y +CONFIG_USB_GADGET=y +CONFIG_USB_GADGET_VBUS_DRAW=500 +CONFIG_USB_ETH=y +CONFIG_MMC=y +# CONFIG_PWRSEQ_EMMC is not set +# CONFIG_PWRSEQ_SIMPLE is not set +CONFIG_MMC_JZ4740=y +CONFIG_NEW_LEDS=y +CONFIG_LEDS_CLASS=y +CONFIG_LEDS_GPIO=y +CONFIG_LEDS_TRIGGERS=y +CONFIG_LEDS_TRIGGER_TIMER=y +CONFIG_LEDS_TRIGGER_DEFAULT_ON=y +CONFIG_LEDS_TRIGGER_PANIC=y +CONFIG_RTC_CLASS=y +# CONFIG_RTC_NVMEM is not set +CONFIG_RTC_DRV_JZ4740=y +CONFIG_DMADEVICES=y +CONFIG_DMA_JZ4780=y +# CONFIG_VIRTIO_MENU is not set +CONFIG_STAGING=y +CONFIG_R8188EU=m +CONFIG_INGENIC_OST=y +# CONFIG_IOMMU_SUPPORT is not set +CONFIG_IIO=y +CONFIG_IIO_BUFFER=y +CONFIG_IIO_BUFFER_CB=y +CONFIG_IIO_KFIFO_BUF=y +CONFIG_MXC6255=m +CONFIG_INGENIC_ADC=y +CONFIG_PWM=y +CONFIG_PWM_JZ4740=y +CONFIG_EXT4_FS=y +CONFIG_VFAT_FS=y +CONFIG_SQUASHFS=y +CONFIG_SQUASHFS_FILE_DIRECT=y +CONFIG_SQUASHFS_DECOMP_MULTI=y +# CONFIG_NETWORK_FILESYSTEMS is not set +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=y +CONFIG_LSM="lockdown,yama,loadpin,safesetid,integrity" +CONFIG_CRYPTO_ECHAINIV=y +CONFIG_FONTS=y +CONFIG_FONT_6x10=y +CONFIG_DEBUG_FS=y diff --git a/arch/mips/configs/generic/board-ocelot.config b/arch/mips/configs/generic/board-ocelot.config index 7626f2a75b03..510709565404 100644 --- a/arch/mips/configs/generic/board-ocelot.config +++ b/arch/mips/configs/generic/board-ocelot.config @@ -9,7 +9,6 @@ CONFIG_GENERIC_PHY=y CONFIG_MTD=y CONFIG_MTD_CMDLINE_PARTS=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_M25P80=y CONFIG_MTD_RAW_NAND=y CONFIG_MTD_NAND_PLATFORM=y CONFIG_MTD_SPI_NOR=y diff --git a/arch/mips/configs/lasat_defconfig b/arch/mips/configs/lasat_defconfig deleted file mode 100644 index 00cf461db971..000000000000 --- a/arch/mips/configs/lasat_defconfig +++ /dev/null @@ -1,55 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EXPERT=y -# CONFIG_EPOLL is not set -# CONFIG_SIGNALFD is not set -# CONFIG_TIMERFD is not set -# CONFIG_EVENTFD is not set -# CONFIG_KALLSYMS is not set -CONFIG_SLAB=y -CONFIG_LASAT=y -CONFIG_PICVUE=y -CONFIG_PICVUE_PROC=y -CONFIG_DS1603=y -CONFIG_LASAT_SYSCTL=y -CONFIG_HZ_1000=y -# CONFIG_SECCOMP is not set -CONFIG_PCI=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_INET=y -# CONFIG_INET_XFRM_MODE_TRANSPORT is not set -# CONFIG_INET_XFRM_MODE_TUNNEL is not set -# CONFIG_INET_XFRM_MODE_BEET is not set -# CONFIG_INET_DIAG is not set -# CONFIG_IPV6 is not set -CONFIG_MTD=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_BLK_DEV_SD=y -CONFIG_ATA=y -CONFIG_PATA_CMD64X=y -CONFIG_ATA_GENERIC=y -CONFIG_PATA_LEGACY=y -CONFIG_NETDEVICES=y -CONFIG_PCNET32=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -CONFIG_SERIO_RAW=y -# CONFIG_VT is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -# CONFIG_SERIAL_8250_PCI is not set -# CONFIG_HW_RANDOM is not set -# CONFIG_HWMON is not set -# CONFIG_USB_SUPPORT is not set -CONFIG_EXT2_FS=y -CONFIG_EXT3_FS=y -# CONFIG_DNOTIFY is not set -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_CONFIGFS_FS=y -CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index 51675f5000d6..3d4c7e9996c5 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -21,6 +21,7 @@ CONFIG_SYSFS_DEPRECATED=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y CONFIG_EMBEDDED=y +CONFIG_PERF_EVENTS=y CONFIG_MACH_LOONGSON64=y CONFIG_SMP=y CONFIG_HZ_256=y @@ -216,6 +217,7 @@ CONFIG_SERIAL_8250_EXTENDED=y CONFIG_SERIAL_8250_MANY_PORTS=y CONFIG_SERIAL_8250_SHARE_IRQ=y CONFIG_SERIAL_8250_RSA=y +CONFIG_SERIAL_OF_PLATFORM=y CONFIG_HW_RANDOM=y CONFIG_RAW_DRIVER=m CONFIG_I2C_CHARDEV=y @@ -229,7 +231,7 @@ CONFIG_MEDIA_CAMERA_SUPPORT=y CONFIG_MEDIA_USB_SUPPORT=y CONFIG_USB_VIDEO_CLASS=m CONFIG_DRM=y -CONFIG_DRM_RADEON=y +CONFIG_DRM_RADEON=m CONFIG_FB_RADEON=y CONFIG_LCD_CLASS_DEVICE=y CONFIG_LCD_PLATFORM=m diff --git a/arch/mips/configs/markeins_defconfig b/arch/mips/configs/markeins_defconfig deleted file mode 100644 index 507ad91b21e7..000000000000 --- a/arch/mips/configs/markeins_defconfig +++ /dev/null @@ -1,185 +0,0 @@ -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_PREEMPT=y -CONFIG_BSD_PROCESS_ACCT=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EXPERT=y -CONFIG_SLAB=y -CONFIG_NEC_MARKEINS=y -CONFIG_HZ_1000=y -CONFIG_PCI=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODULE_FORCE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_NET_KEY=y -CONFIG_NET_KEY_MIGRATE=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_ADVANCED_ROUTER=y -CONFIG_IP_MULTIPLE_TABLES=y -CONFIG_IP_ROUTE_MULTIPATH=y -CONFIG_IP_ROUTE_VERBOSE=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_SYN_COOKIES=y -CONFIG_INET_XFRM_MODE_TRANSPORT=m -CONFIG_INET_XFRM_MODE_TUNNEL=m -CONFIG_INET_XFRM_MODE_BEET=m -CONFIG_TCP_MD5SIG=y -CONFIG_IPV6_MIP6=m -CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m -CONFIG_IPV6_MULTIPLE_TABLES=y -CONFIG_IPV6_SUBTREES=y -CONFIG_NETWORK_SECMARK=y -CONFIG_NETFILTER=y -CONFIG_NF_CONNTRACK=m -CONFIG_NF_CONNTRACK_SECMARK=y -CONFIG_NF_CONNTRACK_EVENTS=y -CONFIG_NF_CONNTRACK_AMANDA=m -CONFIG_NF_CONNTRACK_FTP=m -CONFIG_NF_CONNTRACK_H323=m -CONFIG_NF_CONNTRACK_IRC=m -CONFIG_NF_CONNTRACK_PPTP=m -CONFIG_NF_CONNTRACK_SANE=m -CONFIG_NF_CONNTRACK_SIP=m -CONFIG_NF_CONNTRACK_TFTP=m -CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m -CONFIG_NETFILTER_XT_TARGET_CONNMARK=m -CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m -CONFIG_NETFILTER_XT_TARGET_DSCP=m -CONFIG_NETFILTER_XT_TARGET_MARK=m -CONFIG_NETFILTER_XT_TARGET_NFLOG=m -CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m -CONFIG_NETFILTER_XT_TARGET_SECMARK=m -CONFIG_NETFILTER_XT_TARGET_TCPMSS=m -CONFIG_NETFILTER_XT_MATCH_COMMENT=m -CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m -CONFIG_NETFILTER_XT_MATCH_CONNMARK=m -CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m -CONFIG_NETFILTER_XT_MATCH_DCCP=m -CONFIG_NETFILTER_XT_MATCH_DSCP=m -CONFIG_NETFILTER_XT_MATCH_ESP=m -CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m -CONFIG_NETFILTER_XT_MATCH_HELPER=m -CONFIG_NETFILTER_XT_MATCH_LENGTH=m -CONFIG_NETFILTER_XT_MATCH_LIMIT=m -CONFIG_NETFILTER_XT_MATCH_MAC=m -CONFIG_NETFILTER_XT_MATCH_MARK=m -CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m -CONFIG_NETFILTER_XT_MATCH_POLICY=m -CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m -CONFIG_NETFILTER_XT_MATCH_QUOTA=m -CONFIG_NETFILTER_XT_MATCH_REALM=m -CONFIG_NETFILTER_XT_MATCH_STATE=m -CONFIG_NETFILTER_XT_MATCH_STATISTIC=m -CONFIG_NETFILTER_XT_MATCH_STRING=m -CONFIG_NETFILTER_XT_MATCH_TCPMSS=m -CONFIG_IP_NF_IPTABLES=m -CONFIG_IP_NF_MATCH_AH=m -CONFIG_IP_NF_MATCH_ECN=m -CONFIG_IP_NF_MATCH_TTL=m -CONFIG_IP_NF_FILTER=m -CONFIG_IP_NF_TARGET_REJECT=m -CONFIG_IP_NF_MANGLE=m -CONFIG_IP_NF_TARGET_CLUSTERIP=m -CONFIG_IP_NF_TARGET_ECN=m -CONFIG_IP_NF_TARGET_TTL=m -CONFIG_IP_NF_RAW=m -CONFIG_IP_NF_ARPTABLES=m -CONFIG_IP_NF_ARPFILTER=m -CONFIG_IP_NF_ARP_MANGLE=m -CONFIG_IP6_NF_IPTABLES=m -CONFIG_IP6_NF_MATCH_AH=m -CONFIG_IP6_NF_MATCH_EUI64=m -CONFIG_IP6_NF_MATCH_FRAG=m -CONFIG_IP6_NF_MATCH_OPTS=m -CONFIG_IP6_NF_MATCH_HL=m -CONFIG_IP6_NF_MATCH_IPV6HEADER=m -CONFIG_IP6_NF_MATCH_MH=m -CONFIG_IP6_NF_MATCH_RT=m -CONFIG_IP6_NF_TARGET_HL=m -CONFIG_IP6_NF_FILTER=m -CONFIG_IP6_NF_TARGET_REJECT=m -CONFIG_IP6_NF_MANGLE=m -CONFIG_IP6_NF_RAW=m -CONFIG_FW_LOADER=m -CONFIG_MTD=y -CONFIG_MTD_CMDLINE_PARTS=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_PHYSMAP=y -CONFIG_BLK_DEV_LOOP=m -CONFIG_BLK_DEV_CRYPTOLOOP=m -CONFIG_SCSI=m -# CONFIG_SCSI_PROC_FS is not set -CONFIG_BLK_DEV_SD=m -CONFIG_CHR_DEV_SG=m -CONFIG_SCSI_SCAN_ASYNC=y -CONFIG_SCSI_AIC94XX=m -# CONFIG_AIC94XX_DEBUG is not set -CONFIG_NETDEVICES=y -CONFIG_TUN=m -CONFIG_CHELSIO_T3=m -CONFIG_NATSEMI=y -CONFIG_QLA3XXX=m -CONFIG_NETXEN_NIC=m -CONFIG_PPP=m -CONFIG_PPP_DEFLATE=m -CONFIG_PPP_ASYNC=m -CONFIG_PPP_SYNC_TTY=m -CONFIG_INPUT_EVDEV=m -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -# CONFIG_HW_RANDOM is not set -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_DEBUG_CORE=y -CONFIG_I2C_DEBUG_BUS=y -# CONFIG_HID is not set -CONFIG_EXT2_FS=y -CONFIG_EXT2_FS_XATTR=y -CONFIG_EXT2_FS_POSIX_ACL=y -CONFIG_EXT2_FS_SECURITY=y -CONFIG_EXT3_FS=m -CONFIG_XFS_FS=m -# CONFIG_DNOTIFY is not set -CONFIG_AUTOFS4_FS=m -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_NTFS_FS=m -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_TMPFS_POSIX_ACL=y -CONFIG_JFFS2_FS=y -CONFIG_JFFS2_COMPRESSION_OPTIONS=y -CONFIG_CRAMFS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V4=y -CONFIG_ROOT_NFS=y -CONFIG_NFSD=m -CONFIG_NFSD_V3=y -CONFIG_NLS_DEFAULT="" -CONFIG_NLS_CODEPAGE_437=m -CONFIG_NLS_ASCII=m -CONFIG_NLS_ISO8859_1=m -CONFIG_NLS_UTF8=m -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_LRW=m -CONFIG_CRYPTO_PCBC=m -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_XCBC=m -CONFIG_CRYPTO_CAMELLIA=m -CONFIG_CRYPTO_FCRYPT=m -CONFIG_CMDLINE_BOOL=y -CONFIG_CMDLINE="console=ttyS0,115200 mem=192m ip=bootp root=/dev/nfs rw" diff --git a/arch/mips/configs/msp71xx_defconfig b/arch/mips/configs/msp71xx_defconfig deleted file mode 100644 index 6ad1a2381226..000000000000 --- a/arch/mips/configs/msp71xx_defconfig +++ /dev/null @@ -1,77 +0,0 @@ -CONFIG_LOCALVERSION="-pmc" -# CONFIG_SWAP is not set -CONFIG_SYSVIPC=y -CONFIG_PREEMPT=y -CONFIG_LOG_BUF_SHIFT=14 -CONFIG_EXPERT=y -# CONFIG_SHMEM is not set -CONFIG_SLAB=y -CONFIG_PMC_MSP=y -CONFIG_PMC_MSP7120_GW=y -CONFIG_CPU_MIPS32_R2=y -CONFIG_NR_CPUS=2 -CONFIG_PCI=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_NET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=y -CONFIG_NET_KEY=y -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -CONFIG_IP_PNP_BOOTP=y -CONFIG_INET_AH=y -CONFIG_INET_ESP=y -CONFIG_INET_IPCOMP=y -# CONFIG_IPV6 is not set -CONFIG_NETFILTER=y -CONFIG_IP_NF_IPTABLES=y -CONFIG_IP_NF_FILTER=y -CONFIG_IP_NF_TARGET_REJECT=y -CONFIG_BRIDGE=y -# CONFIG_PREVENT_FIRMWARE_BUILD is not set -# CONFIG_FW_LOADER is not set -CONFIG_MTD=y -CONFIG_MTD_BLOCK=y -CONFIG_MTD_CFI=y -CONFIG_MTD_CFI_AMDSTD=y -CONFIG_MTD_RAM=y -CONFIG_MTD_PMC_MSP_EVM=y -CONFIG_BLK_DEV_RAM=y -CONFIG_SCSI=y -CONFIG_BLK_DEV_SD=y -CONFIG_NETDEVICES=y -CONFIG_DUMMY=y -CONFIG_PPP=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -# CONFIG_VT is not set -# CONFIG_LEGACY_PTYS is not set -# CONFIG_SERIAL_8250_PCI is not set -CONFIG_SERIAL_8250_NR_UARTS=2 -CONFIG_SERIAL_8250_RUNTIME_UARTS=2 -# CONFIG_HW_RANDOM is not set -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_PMCMSP=y -# CONFIG_USB_HID is not set -CONFIG_USB=y -CONFIG_USB_MON=y -CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_ROOT_HUB_TT=y -# CONFIG_USB_EHCI_TT_NEWSCHED is not set -CONFIG_USB_STORAGE=y -CONFIG_EXT2_FS=y -# CONFIG_DNOTIFY is not set -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_JFFS2_FS=y -CONFIG_SQUASHFS=y -CONFIG_SQUASHFS_EMBEDDED=y -CONFIG_NLS_CODEPAGE_437=y -CONFIG_NLS_ISO8859_1=y -CONFIG_MAGIC_SYSRQ=y diff --git a/arch/mips/configs/pistachio_defconfig b/arch/mips/configs/pistachio_defconfig index 24e07180c57d..b9adf15ebbec 100644 --- a/arch/mips/configs/pistachio_defconfig +++ b/arch/mips/configs/pistachio_defconfig @@ -127,7 +127,6 @@ CONFIG_DEBUG_DEVRES=y CONFIG_CONNECTOR=y CONFIG_MTD=y CONFIG_MTD_BLOCK=y -CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y CONFIG_MTD_UBI=y CONFIG_MTD_UBI_BLOCK=y diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig index 8c2ead53007a..fec5851c164b 100644 --- a/arch/mips/configs/rt305x_defconfig +++ b/arch/mips/configs/rt305x_defconfig @@ -76,7 +76,6 @@ CONFIG_MTD_CFI_AMDSTD=y CONFIG_MTD_COMPLEX_MAPPINGS=y CONFIG_MTD_PHYSMAP=y CONFIG_MTD_PHYSMAP_OF=y -CONFIG_MTD_M25P80=y CONFIG_MTD_SPI_NOR=y CONFIG_EEPROM_93CX6=m CONFIG_SCSI=y diff --git a/arch/mips/dec/Platform b/arch/mips/dec/Platform index cf55a6f4e720..c82391e832f9 100644 --- a/arch/mips/dec/Platform +++ b/arch/mips/dec/Platform @@ -1,7 +1,6 @@ # # DECstation family # -platform-$(CONFIG_MACH_DECSTATION) += dec/ cflags-$(CONFIG_MACH_DECSTATION) += \ -I$(srctree)/arch/mips/include/asm/mach-dec libs-$(CONFIG_MACH_DECSTATION) += arch/mips/dec/prom/ diff --git a/arch/mips/dec/int-handler.S b/arch/mips/dec/int-handler.S index a25ef822e725..ea5b5a83f1e1 100644 --- a/arch/mips/dec/int-handler.S +++ b/arch/mips/dec/int-handler.S @@ -304,8 +304,8 @@ spurious: */ FEXPORT(dec_intr_unimplemented) move a1,t0 # cheats way of printing an arg! - PANIC("Unimplemented cpu interrupt! CP0_CAUSE: 0x%08x"); + ASM_PANIC("Unimplemented cpu interrupt! CP0_CAUSE: 0x%08x"); FEXPORT(asic_intr_unimplemented) move a1,t0 # cheats way of printing an arg! - PANIC("Unimplemented asic interrupt! ASIC ISR: 0x%08x"); + ASM_PANIC("Unimplemented asic interrupt! ASIC ISR: 0x%08x"); diff --git a/arch/mips/dec/tc.c b/arch/mips/dec/tc.c index 732027c79834..dba58397668e 100644 --- a/arch/mips/dec/tc.c +++ b/arch/mips/dec/tc.c @@ -52,7 +52,7 @@ int __init tc_bus_get_info(struct tc_bus *tbus) case MACH_DS5900: tbus->ext_slot_base = 0x20000000; tbus->ext_slot_size = 0x20000000; - /* fall through */ + fallthrough; case MACH_DS5000_1XX: tbus->num_tcslots = 3; break; diff --git a/arch/mips/emma/Makefile b/arch/mips/emma/Makefile deleted file mode 100644 index bc03082064ca..000000000000 --- a/arch/mips/emma/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_SOC_EMMA2RH) += common/ - -# -# NEC EMMA2RH Mark-eins -# -obj-$(CONFIG_NEC_MARKEINS) += markeins/ diff --git a/arch/mips/emma/Platform b/arch/mips/emma/Platform deleted file mode 100644 index 0282f7f99b88..000000000000 --- a/arch/mips/emma/Platform +++ /dev/null @@ -1,4 +0,0 @@ -platform-$(CONFIG_SOC_EMMA2RH) += emma/ -cflags-$(CONFIG_SOC_EMMA2RH) += \ - -I$(srctree)/arch/mips/include/asm/mach-emma2rh -load-$(CONFIG_NEC_MARKEINS) += 0xffffffff88100000 diff --git a/arch/mips/emma/common/Makefile b/arch/mips/emma/common/Makefile deleted file mode 100644 index a754abd1beb9..000000000000 --- a/arch/mips/emma/common/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-or-later -# -# Copyright (C) NEC Electronics Corporation 2005-2006 -# - -obj-$(CONFIG_NEC_MARKEINS) += prom.o diff --git a/arch/mips/emma/common/prom.c b/arch/mips/emma/common/prom.c deleted file mode 100644 index 7c3a6f32beda..000000000000 --- a/arch/mips/emma/common/prom.c +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/ddb5xxx/common/prom.c - * - * Copyright 2001 MontaVista Software Inc. - */ -#include <linux/init.h> -#include <linux/mm.h> -#include <linux/sched.h> -#include <linux/memblock.h> - -#include <asm/addrspace.h> -#include <asm/bootinfo.h> -#include <asm/emma/emma2rh.h> - -const char *get_system_type(void) -{ -#ifdef CONFIG_NEC_MARKEINS - return "NEC EMMA2RH Mark-eins"; -#else -#error Unknown NEC board -#endif -} - -/* [jsun@junsun.net] PMON passes arguments in C main() style */ -void __init prom_init(void) -{ - int argc = fw_arg0; - char **arg = (char **)fw_arg1; - int i; - - /* if user passes kernel args, ignore the default one */ - if (argc > 1) - arcs_cmdline[0] = '\0'; - - /* arg[0] is "g", the rest is boot parameters */ - for (i = 1; i < argc; i++) { - if (strlen(arcs_cmdline) + strlen(arg[i]) + 1 - >= sizeof(arcs_cmdline)) - break; - strcat(arcs_cmdline, arg[i]); - strcat(arcs_cmdline, " "); - } - -#ifdef CONFIG_NEC_MARKEINS - add_memory_region(0, EMMA2RH_RAM_SIZE, BOOT_MEM_RAM); -#else -#error Unknown NEC board -#endif -} - -void __init prom_free_prom_memory(void) -{ -} diff --git a/arch/mips/emma/markeins/Makefile b/arch/mips/emma/markeins/Makefile deleted file mode 100644 index 8c8649069504..000000000000 --- a/arch/mips/emma/markeins/Makefile +++ /dev/null @@ -1,6 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-or-later -# -# Copyright (C) NEC Electronics Corporation 2005-2006 -# - -obj-$(CONFIG_NEC_MARKEINS) += irq.o setup.o led.o platform.o diff --git a/arch/mips/emma/markeins/irq.c b/arch/mips/emma/markeins/irq.c deleted file mode 100644 index 4aebf559be2e..000000000000 --- a/arch/mips/emma/markeins/irq.c +++ /dev/null @@ -1,293 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/ddb5xxx/ddb5477/irq.c - * - * Copyright 2001 MontaVista Software Inc. - */ -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/irq.h> -#include <linux/types.h> -#include <linux/ptrace.h> -#include <linux/delay.h> - -#include <asm/irq_cpu.h> -#include <asm/mipsregs.h> -#include <asm/addrspace.h> -#include <asm/bootinfo.h> - -#include <asm/emma/emma2rh.h> - -static void emma2rh_irq_enable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_IRQ_BASE; - u32 reg_value, reg_bitmask, reg_index; - - reg_index = EMMA2RH_BHIF_INT_EN_0 + - (EMMA2RH_BHIF_INT_EN_1 - EMMA2RH_BHIF_INT_EN_0) * (irq / 32); - reg_value = emma2rh_in32(reg_index); - reg_bitmask = 0x1 << (irq % 32); - emma2rh_out32(reg_index, reg_value | reg_bitmask); -} - -static void emma2rh_irq_disable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_IRQ_BASE; - u32 reg_value, reg_bitmask, reg_index; - - reg_index = EMMA2RH_BHIF_INT_EN_0 + - (EMMA2RH_BHIF_INT_EN_1 - EMMA2RH_BHIF_INT_EN_0) * (irq / 32); - reg_value = emma2rh_in32(reg_index); - reg_bitmask = 0x1 << (irq % 32); - emma2rh_out32(reg_index, reg_value & ~reg_bitmask); -} - -struct irq_chip emma2rh_irq_controller = { - .name = "emma2rh_irq", - .irq_mask = emma2rh_irq_disable, - .irq_unmask = emma2rh_irq_enable, -}; - -void emma2rh_irq_init(void) -{ - u32 i; - - for (i = 0; i < NUM_EMMA2RH_IRQ; i++) - irq_set_chip_and_handler_name(EMMA2RH_IRQ_BASE + i, - &emma2rh_irq_controller, - handle_level_irq, "level"); -} - -static void emma2rh_sw_irq_enable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_SW_IRQ_BASE; - u32 reg; - - reg = emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN); - reg |= 1 << irq; - emma2rh_out32(EMMA2RH_BHIF_SW_INT_EN, reg); -} - -static void emma2rh_sw_irq_disable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_SW_IRQ_BASE; - u32 reg; - - reg = emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN); - reg &= ~(1 << irq); - emma2rh_out32(EMMA2RH_BHIF_SW_INT_EN, reg); -} - -struct irq_chip emma2rh_sw_irq_controller = { - .name = "emma2rh_sw_irq", - .irq_mask = emma2rh_sw_irq_disable, - .irq_unmask = emma2rh_sw_irq_enable, -}; - -void emma2rh_sw_irq_init(void) -{ - u32 i; - - for (i = 0; i < NUM_EMMA2RH_IRQ_SW; i++) - irq_set_chip_and_handler_name(EMMA2RH_SW_IRQ_BASE + i, - &emma2rh_sw_irq_controller, - handle_level_irq, "level"); -} - -static void emma2rh_gpio_irq_enable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE; - u32 reg; - - reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK); - reg |= 1 << irq; - emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg); -} - -static void emma2rh_gpio_irq_disable(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE; - u32 reg; - - reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK); - reg &= ~(1 << irq); - emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg); -} - -static void emma2rh_gpio_irq_ack(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE; - - emma2rh_out32(EMMA2RH_GPIO_INT_ST, ~(1 << irq)); -} - -static void emma2rh_gpio_irq_mask_ack(struct irq_data *d) -{ - unsigned int irq = d->irq - EMMA2RH_GPIO_IRQ_BASE; - u32 reg; - - emma2rh_out32(EMMA2RH_GPIO_INT_ST, ~(1 << irq)); - - reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK); - reg &= ~(1 << irq); - emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg); -} - -struct irq_chip emma2rh_gpio_irq_controller = { - .name = "emma2rh_gpio_irq", - .irq_ack = emma2rh_gpio_irq_ack, - .irq_mask = emma2rh_gpio_irq_disable, - .irq_mask_ack = emma2rh_gpio_irq_mask_ack, - .irq_unmask = emma2rh_gpio_irq_enable, -}; - -void emma2rh_gpio_irq_init(void) -{ - u32 i; - - for (i = 0; i < NUM_EMMA2RH_IRQ_GPIO; i++) - irq_set_chip_and_handler_name(EMMA2RH_GPIO_IRQ_BASE + i, - &emma2rh_gpio_irq_controller, - handle_edge_irq, "edge"); -} - -/* - * the first level int-handler will jump here if it is a emma2rh irq - */ -void emma2rh_irq_dispatch(void) -{ - u32 intStatus; - u32 bitmask; - u32 i; - - intStatus = emma2rh_in32(EMMA2RH_BHIF_INT_ST_0) & - emma2rh_in32(EMMA2RH_BHIF_INT_EN_0); - -#ifdef EMMA2RH_SW_CASCADE - if (intStatus & (1UL << EMMA2RH_SW_CASCADE)) { - u32 swIntStatus; - swIntStatus = emma2rh_in32(EMMA2RH_BHIF_SW_INT) - & emma2rh_in32(EMMA2RH_BHIF_SW_INT_EN); - for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) { - if (swIntStatus & bitmask) { - do_IRQ(EMMA2RH_SW_IRQ_BASE + i); - return; - } - } - } - /* Skip S/W interrupt */ - intStatus &= ~(1UL << EMMA2RH_SW_CASCADE); -#endif - - for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) { - if (intStatus & bitmask) { - do_IRQ(EMMA2RH_IRQ_BASE + i); - return; - } - } - - intStatus = emma2rh_in32(EMMA2RH_BHIF_INT_ST_1) & - emma2rh_in32(EMMA2RH_BHIF_INT_EN_1); - -#ifdef EMMA2RH_GPIO_CASCADE - if (intStatus & (1UL << (EMMA2RH_GPIO_CASCADE % 32))) { - u32 gpioIntStatus; - gpioIntStatus = emma2rh_in32(EMMA2RH_GPIO_INT_ST) - & emma2rh_in32(EMMA2RH_GPIO_INT_MASK); - for (i = 0, bitmask = 1; i < 32; i++, bitmask <<= 1) { - if (gpioIntStatus & bitmask) { - do_IRQ(EMMA2RH_GPIO_IRQ_BASE + i); - return; - } - } - } - /* Skip GPIO interrupt */ - intStatus &= ~(1UL << (EMMA2RH_GPIO_CASCADE % 32)); -#endif - - for (i = 32, bitmask = 1; i < 64; i++, bitmask <<= 1) { - if (intStatus & bitmask) { - do_IRQ(EMMA2RH_IRQ_BASE + i); - return; - } - } - - intStatus = emma2rh_in32(EMMA2RH_BHIF_INT_ST_2) & - emma2rh_in32(EMMA2RH_BHIF_INT_EN_2); - - for (i = 64, bitmask = 1; i < 96; i++, bitmask <<= 1) { - if (intStatus & bitmask) { - do_IRQ(EMMA2RH_IRQ_BASE + i); - return; - } - } -} - -void __init arch_init_irq(void) -{ - u32 reg; - int irq; - - /* by default, interrupts are disabled. */ - emma2rh_out32(EMMA2RH_BHIF_INT_EN_0, 0); - emma2rh_out32(EMMA2RH_BHIF_INT_EN_1, 0); - emma2rh_out32(EMMA2RH_BHIF_INT_EN_2, 0); - emma2rh_out32(EMMA2RH_BHIF_INT1_EN_0, 0); - emma2rh_out32(EMMA2RH_BHIF_INT1_EN_1, 0); - emma2rh_out32(EMMA2RH_BHIF_INT1_EN_2, 0); - emma2rh_out32(EMMA2RH_BHIF_SW_INT_EN, 0); - - clear_c0_status(0xff00); - set_c0_status(0x0400); - -#define GPIO_PCI (0xf<<15) - /* setup GPIO interrupt for PCI interface */ - /* direction input */ - reg = emma2rh_in32(EMMA2RH_GPIO_DIR); - emma2rh_out32(EMMA2RH_GPIO_DIR, reg & ~GPIO_PCI); - /* disable interrupt */ - reg = emma2rh_in32(EMMA2RH_GPIO_INT_MASK); - emma2rh_out32(EMMA2RH_GPIO_INT_MASK, reg & ~GPIO_PCI); - /* level triggerd */ - reg = emma2rh_in32(EMMA2RH_GPIO_INT_MODE); - emma2rh_out32(EMMA2RH_GPIO_INT_MODE, reg | GPIO_PCI); - reg = emma2rh_in32(EMMA2RH_GPIO_INT_CND_A); - emma2rh_out32(EMMA2RH_GPIO_INT_CND_A, reg & (~GPIO_PCI)); - /* interrupt clear */ - emma2rh_out32(EMMA2RH_GPIO_INT_ST, ~GPIO_PCI); - - /* init all controllers */ - emma2rh_irq_init(); - emma2rh_sw_irq_init(); - emma2rh_gpio_irq_init(); - mips_cpu_irq_init(); - - /* setup cascade interrupts */ - irq = EMMA2RH_IRQ_BASE + EMMA2RH_SW_CASCADE; - if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", irq); - irq = EMMA2RH_IRQ_BASE + EMMA2RH_GPIO_CASCADE; - if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", irq); - irq = MIPS_CPU_IRQ_BASE + 2; - if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", irq); -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned int pending = read_c0_status() & read_c0_cause() & ST0_IM; - - if (pending & STATUSF_IP7) - do_IRQ(MIPS_CPU_IRQ_BASE + 7); - else if (pending & STATUSF_IP2) - emma2rh_irq_dispatch(); - else if (pending & STATUSF_IP1) - do_IRQ(MIPS_CPU_IRQ_BASE + 1); - else if (pending & STATUSF_IP0) - do_IRQ(MIPS_CPU_IRQ_BASE + 0); - else - spurious_interrupt(); -} diff --git a/arch/mips/emma/markeins/led.c b/arch/mips/emma/markeins/led.c deleted file mode 100644 index d377542c0ec4..000000000000 --- a/arch/mips/emma/markeins/led.c +++ /dev/null @@ -1,44 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - */ -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/string.h> -#include <asm/emma/emma2rh.h> - -const unsigned long clear = 0x20202020; - -#define LED_BASE 0xb1400038 - -void markeins_led_clear(void) -{ - emma2rh_out32(LED_BASE, clear); - emma2rh_out32(LED_BASE + 4, clear); -} - -void markeins_led(const char *str) -{ - int i; - int len = strlen(str); - - markeins_led_clear(); - if (len > 8) - len = 8; - - if (emma2rh_in32(0xb0000800) & (0x1 << 18)) - for (i = 0; i < len; i++) - emma2rh_out8(LED_BASE + i, str[i]); - else - for (i = 0; i < len; i++) - emma2rh_out8(LED_BASE + (i & 4) + (3 - (i & 3)), - str[i]); -} - -void markeins_led_hex(u32 val) -{ - char str[10]; - - sprintf(str, "%08x", val); - markeins_led(str); -} diff --git a/arch/mips/emma/markeins/platform.c b/arch/mips/emma/markeins/platform.c deleted file mode 100644 index 97eeb9e8fb2b..000000000000 --- a/arch/mips/emma/markeins/platform.c +++ /dev/null @@ -1,199 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright(C) MontaVista Software Inc, 2006 - * - * Author: dmitry pervushin <dpervushin@ru.mvista.com> - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/ioport.h> -#include <linux/serial_8250.h> -#include <linux/mtd/physmap.h> - -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/addrspace.h> -#include <asm/time.h> -#include <asm/bcache.h> -#include <asm/irq.h> -#include <asm/reboot.h> -#include <asm/traps.h> - -#include <asm/emma/emma2rh.h> - - -#define I2C_EMMA2RH "emma2rh-iic" /* must be in sync with IIC driver */ - -static struct resource i2c_emma_resources_0[] = { - { - .name = NULL, - .start = EMMA2RH_IRQ_PIIC0, - .end = EMMA2RH_IRQ_PIIC0, - .flags = IORESOURCE_IRQ - }, { - .name = NULL, - .start = EMMA2RH_PIIC0_BASE, - .end = EMMA2RH_PIIC0_BASE + 0x1000, - .flags = 0 - }, -}; - -struct resource i2c_emma_resources_1[] = { - { - .name = NULL, - .start = EMMA2RH_IRQ_PIIC1, - .end = EMMA2RH_IRQ_PIIC1, - .flags = IORESOURCE_IRQ - }, { - .name = NULL, - .start = EMMA2RH_PIIC1_BASE, - .end = EMMA2RH_PIIC1_BASE + 0x1000, - .flags = 0 - }, -}; - -struct resource i2c_emma_resources_2[] = { - { - .name = NULL, - .start = EMMA2RH_IRQ_PIIC2, - .end = EMMA2RH_IRQ_PIIC2, - .flags = IORESOURCE_IRQ - }, { - .name = NULL, - .start = EMMA2RH_PIIC2_BASE, - .end = EMMA2RH_PIIC2_BASE + 0x1000, - .flags = 0 - }, -}; - -struct platform_device i2c_emma_devices[] = { - [0] = { - .name = I2C_EMMA2RH, - .id = 0, - .resource = i2c_emma_resources_0, - .num_resources = ARRAY_SIZE(i2c_emma_resources_0), - }, - [1] = { - .name = I2C_EMMA2RH, - .id = 1, - .resource = i2c_emma_resources_1, - .num_resources = ARRAY_SIZE(i2c_emma_resources_1), - }, - [2] = { - .name = I2C_EMMA2RH, - .id = 2, - .resource = i2c_emma_resources_2, - .num_resources = ARRAY_SIZE(i2c_emma_resources_2), - }, -}; - -#define EMMA2RH_SERIAL_CLOCK 18544000 -#define EMMA2RH_SERIAL_FLAGS UPF_BOOT_AUTOCONF | UPF_SKIP_TEST - -static struct plat_serial8250_port platform_serial_ports[] = { - [0] = { - .membase= (void __iomem*)KSEG1ADDR(EMMA2RH_PFUR0_BASE + 3), - .mapbase = EMMA2RH_PFUR0_BASE + 3, - .irq = EMMA2RH_IRQ_PFUR0, - .uartclk = EMMA2RH_SERIAL_CLOCK, - .regshift = 4, - .iotype = UPIO_MEM, - .flags = EMMA2RH_SERIAL_FLAGS, - }, [1] = { - .membase = (void __iomem*)KSEG1ADDR(EMMA2RH_PFUR1_BASE + 3), - .mapbase = EMMA2RH_PFUR1_BASE + 3, - .irq = EMMA2RH_IRQ_PFUR1, - .uartclk = EMMA2RH_SERIAL_CLOCK, - .regshift = 4, - .iotype = UPIO_MEM, - .flags = EMMA2RH_SERIAL_FLAGS, - }, [2] = { - .membase = (void __iomem*)KSEG1ADDR(EMMA2RH_PFUR2_BASE + 3), - .mapbase = EMMA2RH_PFUR2_BASE + 3, - .irq = EMMA2RH_IRQ_PFUR2, - .uartclk = EMMA2RH_SERIAL_CLOCK, - .regshift = 4, - .iotype = UPIO_MEM, - .flags = EMMA2RH_SERIAL_FLAGS, - }, [3] = { - .flags = 0, - }, -}; - -static struct platform_device serial_emma = { - .name = "serial8250", - .dev = { - .platform_data = &platform_serial_ports, - }, -}; - -static struct mtd_partition markeins_parts[] = { - [0] = { - .name = "RootFS", - .offset = 0x00000000, - .size = 0x00c00000, - }, - [1] = { - .name = "boot code area", - .offset = MTDPART_OFS_APPEND, - .size = 0x00100000, - }, - [2] = { - .name = "kernel image", - .offset = MTDPART_OFS_APPEND, - .size = 0x00300000, - }, - [3] = { - .name = "RootFS2", - .offset = MTDPART_OFS_APPEND, - .size = 0x00c00000, - }, - [4] = { - .name = "boot code area2", - .offset = MTDPART_OFS_APPEND, - .size = 0x00100000, - }, - [5] = { - .name = "kernel image2", - .offset = MTDPART_OFS_APPEND, - .size = MTDPART_SIZ_FULL, - }, -}; - -static struct physmap_flash_data markeins_flash_data = { - .width = 2, - .nr_parts = ARRAY_SIZE(markeins_parts), - .parts = markeins_parts -}; - -static struct resource markeins_flash_resource = { - .start = 0x1e000000, - .end = 0x02000000, - .flags = IORESOURCE_MEM -}; - -static struct platform_device markeins_flash_device = { - .name = "physmap-flash", - .id = 0, - .dev = { - .platform_data = &markeins_flash_data, - }, - .num_resources = 1, - .resource = &markeins_flash_resource, -}; - -static struct platform_device *devices[] = { - i2c_emma_devices, - i2c_emma_devices + 1, - i2c_emma_devices + 2, - &serial_emma, - &markeins_flash_device, -}; - -static int __init platform_devices_setup(void) -{ - return platform_add_devices(devices, ARRAY_SIZE(devices)); -} - -arch_initcall(platform_devices_setup); diff --git a/arch/mips/emma/markeins/setup.c b/arch/mips/emma/markeins/setup.c deleted file mode 100644 index c8a91c2a63bc..000000000000 --- a/arch/mips/emma/markeins/setup.c +++ /dev/null @@ -1,115 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/ddb5xxx/ddb5477/setup.c. - * - * Copyright 2001 MontaVista Software Inc. - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/types.h> - -#include <asm/time.h> -#include <asm/reboot.h> - -#include <asm/emma/emma2rh.h> - -#define USE_CPU_COUNTER_TIMER /* whether we use cpu counter */ - -extern void markeins_led(const char *); - -static int bus_frequency; - -static void markeins_machine_restart(char *command) -{ - static void (*back_to_prom) (void) = (void (*)(void))0xbfc00000; - - printk("cannot EMMA2RH Mark-eins restart.\n"); - markeins_led("restart."); - back_to_prom(); -} - -static void markeins_machine_halt(void) -{ - printk("EMMA2RH Mark-eins halted.\n"); - markeins_led("halted."); - while (1) ; -} - -static void markeins_machine_power_off(void) -{ - markeins_led("poweroff."); - while (1) ; -} - -static unsigned long __initdata emma2rh_clock[4] = { - 166500000, 187312500, 199800000, 210600000 -}; - -static unsigned int __init detect_bus_frequency(unsigned long rtc_base) -{ - u32 reg; - - /* detect from boot strap */ - reg = emma2rh_in32(EMMA2RH_BHIF_STRAP_0); - reg = (reg >> 4) & 0x3; - - return emma2rh_clock[reg]; -} - -void __init plat_time_init(void) -{ - u32 reg; - if (bus_frequency == 0) - bus_frequency = detect_bus_frequency(0); - - reg = emma2rh_in32(EMMA2RH_BHIF_STRAP_0); - if ((reg & 0x3) == 0) - reg = (reg >> 6) & 0x3; - else { - reg = emma2rh_in32(EMMA2RH_BHIF_MAIN_CTRL); - reg = (reg >> 4) & 0x3; - } - mips_hpt_frequency = (bus_frequency * (4 + reg)) / 4 / 2; -} - -static void markeins_board_init(void); -extern void markeins_irq_setup(void); - -static inline void __init markeins_sio_setup(void) -{ -} - -void __init plat_mem_setup(void) -{ - /* initialize board - we don't trust the loader */ - markeins_board_init(); - - set_io_port_base(KSEG1ADDR(EMMA2RH_PCI_IO_BASE)); - - _machine_restart = markeins_machine_restart; - _machine_halt = markeins_machine_halt; - pm_power_off = markeins_machine_power_off; - - /* setup resource limits */ - ioport_resource.start = EMMA2RH_PCI_IO_BASE; - ioport_resource.end = EMMA2RH_PCI_IO_BASE + EMMA2RH_PCI_IO_SIZE - 1; - iomem_resource.start = EMMA2RH_IO_BASE; - iomem_resource.end = EMMA2RH_ROM_BASE - 1; - - markeins_sio_setup(); -} - -static void __init markeins_board_init(void) -{ - u32 val; - - val = emma2rh_in32(EMMA2RH_PBRD_INT_EN); /* open serial interrupts. */ - emma2rh_out32(EMMA2RH_PBRD_INT_EN, val | 0xaa); - val = emma2rh_in32(EMMA2RH_PBRD_CLKSEL); /* set serial clocks. */ - emma2rh_out32(EMMA2RH_PBRD_CLKSEL, val | 0x5); /* 18MHz */ - emma2rh_out32(EMMA2RH_PCI_CONTROL, 0); - - markeins_led("MVL E2RH"); -} diff --git a/arch/mips/fw/cfe/cfe_api.c b/arch/mips/fw/cfe/cfe_api.c index c020b29f561c..0c9c97ab291e 100644 --- a/arch/mips/fw/cfe/cfe_api.c +++ b/arch/mips/fw/cfe/cfe_api.c @@ -243,11 +243,6 @@ int cfe_getfwinfo(cfe_fwinfo_t * info) info->fwi_bootarea_pa = xiocb.plist.xiocb_fwinfo.fwi_bootarea_pa; info->fwi_bootarea_size = xiocb.plist.xiocb_fwinfo.fwi_bootarea_size; -#if 0 - info->fwi_reserved1 = xiocb.plist.xiocb_fwinfo.fwi_reserved1; - info->fwi_reserved2 = xiocb.plist.xiocb_fwinfo.fwi_reserved2; - info->fwi_reserved3 = xiocb.plist.xiocb_fwinfo.fwi_reserved3; -#endif return 0; } diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform index eaa19d189324..53c33cb72974 100644 --- a/arch/mips/generic/Platform +++ b/arch/mips/generic/Platform @@ -8,7 +8,6 @@ # option) any later version. # -platform-$(CONFIG_MIPS_GENERIC) += generic/ cflags-$(CONFIG_MIPS_GENERIC) += -I$(srctree)/arch/mips/include/asm/mach-generic load-$(CONFIG_MIPS_GENERIC) += 0xffffffff80100000 all-$(CONFIG_MIPS_GENERIC) := vmlinux.gz.itb diff --git a/arch/mips/include/asm/asm-eva.h b/arch/mips/include/asm/asm-eva.h index d80be38c4144..e327ebc76753 100644 --- a/arch/mips/include/asm/asm-eva.h +++ b/arch/mips/include/asm/asm-eva.h @@ -180,7 +180,7 @@ #define user_ld(reg, addr) kernel_lw(reg, addr) #else #define user_sd(reg, addr) kernel_sd(reg, addr) -#define user_ld(reg, addr) kernel_sd(reg, addr) +#define user_ld(reg, addr) kernel_ld(reg, addr) #endif /* CONFIG_32BIT */ #endif /* CONFIG_EVA */ diff --git a/arch/mips/include/asm/asm.h b/arch/mips/include/asm/asm.h index c23527ba65d0..3682d1a0bb80 100644 --- a/arch/mips/include/asm/asm.h +++ b/arch/mips/include/asm/asm.h @@ -74,10 +74,15 @@ symbol: .insn .globl symbol; \ symbol = value -#define PANIC(msg) \ +#define TEXT(msg) \ + .pushsection .data; \ +8: .asciiz msg; \ + .popsection; + +#define ASM_PANIC(msg) \ .set push; \ .set reorder; \ - PTR_LA a0, 8f; \ + PTR_LA a0, 8f; \ jal panic; \ 9: b 9b; \ .set pop; \ @@ -87,22 +92,17 @@ symbol = value * Print formatted string */ #ifdef CONFIG_PRINTK -#define PRINT(string) \ +#define ASM_PRINT(string) \ .set push; \ .set reorder; \ - PTR_LA a0, 8f; \ + PTR_LA a0, 8f; \ jal printk; \ .set pop; \ TEXT(string) #else -#define PRINT(string) +#define ASM_PRINT(string) #endif -#define TEXT(msg) \ - .pushsection .data; \ -8: .asciiz msg; \ - .popsection; - /* * Stack alignment */ @@ -202,7 +202,9 @@ symbol = value #define LONG_SRA sra #define LONG_SRAV srav +#ifdef __ASSEMBLY__ #define LONG .word +#endif #define LONGSIZE 4 #define LONGMASK 3 #define LONGLOG 2 @@ -225,7 +227,9 @@ symbol = value #define LONG_SRA dsra #define LONG_SRAV dsrav +#ifdef __ASSEMBLY__ #define LONG .dword +#endif #define LONGSIZE 8 #define LONGMASK 7 #define LONGLOG 3 diff --git a/arch/mips/include/asm/asmmacro.h b/arch/mips/include/asm/asmmacro.h index 655f40ddb6d1..86f2323ebe6b 100644 --- a/arch/mips/include/asm/asmmacro.h +++ b/arch/mips/include/asm/asmmacro.h @@ -44,7 +44,8 @@ .endm #endif -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6) .macro local_irq_enable reg=t0 ei irq_enable_hazard @@ -54,7 +55,7 @@ di irq_disable_hazard .endm -#else +#else /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */ .macro local_irq_enable reg=t0 mfc0 \reg, CP0_STATUS ori \reg, \reg, 1 @@ -79,7 +80,7 @@ sw \reg, TI_PRE_COUNT($28) #endif .endm -#endif /* CONFIG_CPU_MIPSR2 */ +#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */ .macro fpu_save_16even thread tmp=t0 .set push @@ -131,7 +132,7 @@ .macro fpu_save_double thread status tmp #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) sll \tmp, \status, 5 bgez \tmp, 10f fpu_save_16odd \thread @@ -190,7 +191,7 @@ .macro fpu_restore_double thread status tmp #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) sll \tmp, \status, 5 bgez \tmp, 10f # 16 register mode? @@ -200,16 +201,17 @@ fpu_restore_16even \thread \tmp .endm -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6) .macro _EXT rd, rs, p, s ext \rd, \rs, \p, \s .endm -#else /* !CONFIG_CPU_MIPSR2 || !CONFIG_CPU_MIPSR6 */ +#else /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */ .macro _EXT rd, rs, p, s srl \rd, \rs, \p andi \rd, \rd, (1 << \s) - 1 .endm -#endif /* !CONFIG_CPU_MIPSR2 || !CONFIG_CPU_MIPSR6 */ +#endif /* !CONFIG_CPU_MIPSR2 && !CONFIG_CPU_MIPSR5 && !CONFIG_CPU_MIPSR6 */ /* * Temporary until all gas have MT ASE support diff --git a/arch/mips/include/asm/bootinfo.h b/arch/mips/include/asm/bootinfo.h index 61727785a247..c3bd9b2d66e4 100644 --- a/arch/mips/include/asm/bootinfo.h +++ b/arch/mips/include/asm/bootinfo.h @@ -42,17 +42,6 @@ #define MACH_DS5900 10 /* DECsystem 5900 */ /* - * Valid machtype for group PMC-MSP - */ -#define MACH_MSP4200_EVAL 0 /* PMC-Sierra MSP4200 Evaluation */ -#define MACH_MSP4200_GW 1 /* PMC-Sierra MSP4200 Gateway demo */ -#define MACH_MSP4200_FPGA 2 /* PMC-Sierra MSP4200 Emulation */ -#define MACH_MSP7120_EVAL 3 /* PMC-Sierra MSP7120 Evaluation */ -#define MACH_MSP7120_GW 4 /* PMC-Sierra MSP7120 Residential GW */ -#define MACH_MSP7120_FPGA 5 /* PMC-Sierra MSP7120 Emulation */ -#define MACH_MSP_OTHER 255 /* PMC-Sierra unknown board type */ - -/* * Valid machtype for group Mikrotik */ #define MACH_MIKROTIK_RB532 0 /* Mikrotik RouterBoard 532 */ @@ -121,7 +110,7 @@ extern unsigned long fw_passed_dtb; #endif /* - * Platform memory detection hook called by setup_arch + * Platform memory detection hook called by arch_mem_init() */ extern void plat_mem_setup(void); diff --git a/arch/mips/include/asm/branch.h b/arch/mips/include/asm/branch.h index da80878f2c0d..fa3dcbf56fa9 100644 --- a/arch/mips/include/asm/branch.h +++ b/arch/mips/include/asm/branch.h @@ -27,6 +27,9 @@ extern int __MIPS16e_compute_return_epc(struct pt_regs *regs); #define MM_POOL32A_MINOR_SHIFT 0x6 #define MM_MIPS32_COND_FC 0x30 +int isBranchInstr(struct pt_regs *regs, + struct mm_decoded_insn dec_insn, unsigned long *contpc); + extern int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, unsigned long *contpc); diff --git a/arch/mips/include/asm/cacheops.h b/arch/mips/include/asm/cacheops.h index 8031fbc6b69a..50253efecb56 100644 --- a/arch/mips/include/asm/cacheops.h +++ b/arch/mips/include/asm/cacheops.h @@ -48,7 +48,7 @@ * R4000-specific cacheops */ #define Create_Dirty_Excl_D (Cache_D | 0x0c) -#define Fill (Cache_I | 0x14) +#define Fill_I (Cache_I | 0x14) #define Hit_Writeback_I (Cache_I | Hit_Writeback) #define Hit_Writeback_D (Cache_D | Hit_Writeback) diff --git a/arch/mips/include/asm/clock.h b/arch/mips/include/asm/clock.h deleted file mode 100644 index 5a8f96ebe5fa..000000000000 --- a/arch/mips/include/asm/clock.h +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MIPS_CLOCK_H -#define __ASM_MIPS_CLOCK_H - -#include <linux/kref.h> -#include <linux/list.h> -#include <linux/seq_file.h> -#include <linux/clk.h> - -struct clk; - -struct clk_ops { - void (*init) (struct clk *clk); - void (*enable) (struct clk *clk); - void (*disable) (struct clk *clk); - void (*recalc) (struct clk *clk); - int (*set_rate) (struct clk *clk, unsigned long rate, int algo_id); - long (*round_rate) (struct clk *clk, unsigned long rate); -}; - -struct clk { - struct list_head node; - const char *name; - int id; - struct module *owner; - - struct clk *parent; - struct clk_ops *ops; - - struct kref kref; - - unsigned long rate; - unsigned long flags; -}; - -#define CLK_ALWAYS_ENABLED (1 << 0) -#define CLK_RATE_PROPAGATES (1 << 1) - -int clk_init(void); - -int __clk_enable(struct clk *); -void __clk_disable(struct clk *); - -void clk_recalc_rate(struct clk *); - -int clk_register(struct clk *); -void clk_unregister(struct clk *); - -#endif /* __ASM_MIPS_CLOCK_H */ diff --git a/arch/mips/include/asm/compiler.h b/arch/mips/include/asm/compiler.h index f77e99f1722e..a2cb2d2b1c07 100644 --- a/arch/mips/include/asm/compiler.h +++ b/arch/mips/include/asm/compiler.h @@ -57,6 +57,11 @@ #define MIPS_ISA_ARCH_LEVEL MIPS_ISA_LEVEL #define MIPS_ISA_LEVEL_RAW mips64r6 #define MIPS_ISA_ARCH_LEVEL_RAW MIPS_ISA_LEVEL_RAW +#elif defined(CONFIG_CPU_MIPSR5) +#define MIPS_ISA_LEVEL "mips64r5" +#define MIPS_ISA_ARCH_LEVEL MIPS_ISA_LEVEL +#define MIPS_ISA_LEVEL_RAW mips64r5 +#define MIPS_ISA_ARCH_LEVEL_RAW MIPS_ISA_LEVEL_RAW #else /* MIPS64 is a superset of MIPS32 */ #define MIPS_ISA_LEVEL "mips64r2" diff --git a/arch/mips/include/asm/cpu-features.h b/arch/mips/include/asm/cpu-features.h index de44c92b1c1f..caecbae4b599 100644 --- a/arch/mips/include/asm/cpu-features.h +++ b/arch/mips/include/asm/cpu-features.h @@ -284,14 +284,23 @@ #ifndef cpu_has_mips32r2 # define cpu_has_mips32r2 __isa_range_or_flag(2, 6, MIPS_CPU_ISA_M32R2) #endif +#ifndef cpu_has_mips32r5 +# define cpu_has_mips32r5 __isa_range_or_flag(5, 6, MIPS_CPU_ISA_M32R5) +#endif #ifndef cpu_has_mips32r6 # define cpu_has_mips32r6 __isa_ge_or_flag(6, MIPS_CPU_ISA_M32R6) #endif #ifndef cpu_has_mips64r1 -# define cpu_has_mips64r1 __isa_range_or_flag(1, 6, MIPS_CPU_ISA_M64R1) +# define cpu_has_mips64r1 (cpu_has_64bits && \ + __isa_range_or_flag(1, 6, MIPS_CPU_ISA_M64R1)) #endif #ifndef cpu_has_mips64r2 -# define cpu_has_mips64r2 __isa_range_or_flag(2, 6, MIPS_CPU_ISA_M64R2) +# define cpu_has_mips64r2 (cpu_has_64bits && \ + __isa_range_or_flag(2, 6, MIPS_CPU_ISA_M64R2)) +#endif +#ifndef cpu_has_mips64r5 +# define cpu_has_mips64r5 (cpu_has_64bits && \ + __isa_range_or_flag(5, 6, MIPS_CPU_ISA_M64R5)) #endif #ifndef cpu_has_mips64r6 # define cpu_has_mips64r6 __isa_ge_and_flag(6, MIPS_CPU_ISA_M64R6) @@ -313,19 +322,25 @@ (cpu_has_mips_3 | cpu_has_mips_4_5_64_r2_r6) #define cpu_has_mips_4_5_64_r2_r6 \ (cpu_has_mips_4_5 | cpu_has_mips64r1 | \ - cpu_has_mips_r2 | cpu_has_mips_r6) + cpu_has_mips_r2 | cpu_has_mips_r5 | \ + cpu_has_mips_r6) -#define cpu_has_mips32 (cpu_has_mips32r1 | cpu_has_mips32r2 | cpu_has_mips32r6) -#define cpu_has_mips64 (cpu_has_mips64r1 | cpu_has_mips64r2 | cpu_has_mips64r6) +#define cpu_has_mips32 (cpu_has_mips32r1 | cpu_has_mips32r2 | \ + cpu_has_mips32r5 | cpu_has_mips32r6) +#define cpu_has_mips64 (cpu_has_mips64r1 | cpu_has_mips64r2 | \ + cpu_has_mips64r5 | cpu_has_mips64r6) #define cpu_has_mips_r1 (cpu_has_mips32r1 | cpu_has_mips64r1) #define cpu_has_mips_r2 (cpu_has_mips32r2 | cpu_has_mips64r2) +#define cpu_has_mips_r5 (cpu_has_mips32r5 | cpu_has_mips64r5) #define cpu_has_mips_r6 (cpu_has_mips32r6 | cpu_has_mips64r6) #define cpu_has_mips_r (cpu_has_mips32r1 | cpu_has_mips32r2 | \ - cpu_has_mips32r6 | cpu_has_mips64r1 | \ - cpu_has_mips64r2 | cpu_has_mips64r6) + cpu_has_mips32r5 | cpu_has_mips32r6 | \ + cpu_has_mips64r1 | cpu_has_mips64r2 | \ + cpu_has_mips64r5 | cpu_has_mips64r6) -/* MIPSR2 and MIPSR6 have a lot of similarities */ -#define cpu_has_mips_r2_r6 (cpu_has_mips_r2 | cpu_has_mips_r6) +/* MIPSR2 - MIPSR6 have a lot of similarities */ +#define cpu_has_mips_r2_r6 (cpu_has_mips_r2 | cpu_has_mips_r5 | \ + cpu_has_mips_r6) /* * cpu_has_mips_r2_exec_hazard - return if IHB is required on current processor @@ -435,9 +450,6 @@ # ifndef cpu_has_64bit_gp_regs # define cpu_has_64bit_gp_regs 0 # endif -# ifndef cpu_has_64bit_addresses -# define cpu_has_64bit_addresses 0 -# endif # ifndef cpu_vmbits # define cpu_vmbits 31 # endif @@ -456,9 +468,6 @@ # ifndef cpu_has_64bit_gp_regs # define cpu_has_64bit_gp_regs 1 # endif -# ifndef cpu_has_64bit_addresses -# define cpu_has_64bit_addresses 1 -# endif # ifndef cpu_vmbits # define cpu_vmbits cpu_data[0].vmbits # define __NEED_VMBITS_PROBE @@ -620,6 +629,14 @@ # endif #endif +#ifndef cpu_has_mm_sysad +# define cpu_has_mm_sysad __opt(MIPS_CPU_MM_SYSAD) +#endif + +#ifndef cpu_has_mm_full +# define cpu_has_mm_full __opt(MIPS_CPU_MM_FULL) +#endif + /* * Guest capabilities */ diff --git a/arch/mips/include/asm/cpu-info.h b/arch/mips/include/asm/cpu-info.h index ed7ffe4e63a3..a600670d00e9 100644 --- a/arch/mips/include/asm/cpu-info.h +++ b/arch/mips/include/asm/cpu-info.h @@ -105,6 +105,15 @@ struct cpuinfo_mips { unsigned int gtoffset_mask; unsigned int guestid_mask; unsigned int guestid_cache; + +#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION + /* CPUCFG data for this CPU, synthesized at probe time. + * + * CPUCFG select 0 is PRId, 4 and above are unimplemented for now. + * So the only stored values are for CPUCFG selects 1-3 inclusive. + */ + u32 loongson3_cpucfg_data[3]; +#endif } __attribute__((aligned(SMP_CACHE_BYTES))); extern struct cpuinfo_mips cpu_data[]; @@ -142,7 +151,7 @@ struct proc_cpuinfo_notifier_args { static inline unsigned int cpu_cluster(struct cpuinfo_mips *cpuinfo) { /* Optimisation for systems where multiple clusters aren't used */ - if (!IS_ENABLED(CONFIG_CPU_MIPSR6)) + if (!IS_ENABLED(CONFIG_CPU_MIPSR5) && !IS_ENABLED(CONFIG_CPU_MIPSR6)) return 0; return (cpuinfo->globalnumber & MIPS_GLOBALNUMBER_CLUSTER) >> diff --git a/arch/mips/include/asm/cpu-type.h b/arch/mips/include/asm/cpu-type.h index 49f0061a6051..75a7a382da09 100644 --- a/arch/mips/include/asm/cpu-type.h +++ b/arch/mips/include/asm/cpu-type.h @@ -51,13 +51,18 @@ static inline int __pure __get_cpu_type(const int cpu_type) case CPU_M14KEC: case CPU_INTERAPTIV: case CPU_PROAPTIV: - case CPU_P5600: +#endif + +#ifdef CONFIG_SYS_HAS_CPU_MIPS32_R5 case CPU_M5150: + case CPU_P5600: #endif #if defined(CONFIG_SYS_HAS_CPU_MIPS32_R2) || \ + defined(CONFIG_SYS_HAS_CPU_MIPS32_R5) || \ defined(CONFIG_SYS_HAS_CPU_MIPS32_R6) || \ defined(CONFIG_SYS_HAS_CPU_MIPS64_R2) || \ + defined(CONFIG_SYS_HAS_CPU_MIPS64_R5) || \ defined(CONFIG_SYS_HAS_CPU_MIPS64_R6) case CPU_QEMU_GENERIC: #endif diff --git a/arch/mips/include/asm/cpu.h b/arch/mips/include/asm/cpu.h index 216a22916740..104a509312b3 100644 --- a/arch/mips/include/asm/cpu.h +++ b/arch/mips/include/asm/cpu.h @@ -250,6 +250,10 @@ #define PRID_REV_LOONGSON1C 0x0020 /* Same as Loongson-1B */ #define PRID_REV_LOONGSON2E 0x0002 #define PRID_REV_LOONGSON2F 0x0003 +#define PRID_REV_LOONGSON2K_R1_0 0x0000 +#define PRID_REV_LOONGSON2K_R1_1 0x0001 +#define PRID_REV_LOONGSON2K_R1_2 0x0002 +#define PRID_REV_LOONGSON2K_R1_3 0x0003 #define PRID_REV_LOONGSON3A_R1 0x0005 #define PRID_REV_LOONGSON3B_R1 0x0006 #define PRID_REV_LOONGSON3B_R2 0x0007 @@ -343,14 +347,16 @@ enum cpu_type_enum { #define MIPS_CPU_ISA_M32R2 0x00000020 #define MIPS_CPU_ISA_M64R1 0x00000040 #define MIPS_CPU_ISA_M64R2 0x00000080 -#define MIPS_CPU_ISA_M32R6 0x00000100 -#define MIPS_CPU_ISA_M64R6 0x00000200 +#define MIPS_CPU_ISA_M32R5 0x00000100 +#define MIPS_CPU_ISA_M64R5 0x00000200 +#define MIPS_CPU_ISA_M32R6 0x00000400 +#define MIPS_CPU_ISA_M64R6 0x00000800 #define MIPS_CPU_ISA_32BIT (MIPS_CPU_ISA_II | MIPS_CPU_ISA_M32R1 | \ - MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M32R6) + MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M32R6) #define MIPS_CPU_ISA_64BIT (MIPS_CPU_ISA_III | MIPS_CPU_ISA_IV | \ MIPS_CPU_ISA_V | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M64R2 | \ - MIPS_CPU_ISA_M64R6) + MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M64R6) /* * CPU Option encodings @@ -416,7 +422,9 @@ enum cpu_type_enum { #define MIPS_CPU_MT_PER_TC_PERF_COUNTERS \ BIT_ULL(56) /* CPU has perf counters implemented per TC (MIPSMT ASE) */ #define MIPS_CPU_MMID BIT_ULL(57) /* CPU supports MemoryMapIDs */ -#define MIPS_CPU_MAC_2008_ONLY BIT_ULL(58) /* CPU Only support MAC2008 Fused multiply-add instruction */ +#define MIPS_CPU_MM_SYSAD BIT_ULL(58) /* CPU supports write-through SysAD Valid merge */ +#define MIPS_CPU_MM_FULL BIT_ULL(59) /* CPU supports write-through full merge */ +#define MIPS_CPU_MAC_2008_ONLY BIT_ULL(60) /* CPU Only support MAC2008 Fused multiply-add instruction */ /* * CPU ASE encodings diff --git a/arch/mips/include/asm/emma/emma2rh.h b/arch/mips/include/asm/emma/emma2rh.h deleted file mode 100644 index a25cdb378fe8..000000000000 --- a/arch/mips/include/asm/emma/emma2rh.h +++ /dev/null @@ -1,248 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) NEC Electronics Corporation 2005-2006 - * - * This file based on include/asm-mips/ddb5xxx/ddb5xxx.h - * Copyright 2001 MontaVista Software Inc. - */ -#ifndef __ASM_EMMA_EMMA2RH_H -#define __ASM_EMMA_EMMA2RH_H - -#include <irq.h> - -/* - * EMMA2RH registers - */ -#define REGBASE 0x10000000 - -#define EMMA2RH_BHIF_STRAP_0 (0x000010+REGBASE) -#define EMMA2RH_BHIF_INT_ST_0 (0x000030+REGBASE) -#define EMMA2RH_BHIF_INT_ST_1 (0x000034+REGBASE) -#define EMMA2RH_BHIF_INT_ST_2 (0x000038+REGBASE) -#define EMMA2RH_BHIF_INT_EN_0 (0x000040+REGBASE) -#define EMMA2RH_BHIF_INT_EN_1 (0x000044+REGBASE) -#define EMMA2RH_BHIF_INT_EN_2 (0x000048+REGBASE) -#define EMMA2RH_BHIF_INT1_EN_0 (0x000050+REGBASE) -#define EMMA2RH_BHIF_INT1_EN_1 (0x000054+REGBASE) -#define EMMA2RH_BHIF_INT1_EN_2 (0x000058+REGBASE) -#define EMMA2RH_BHIF_SW_INT (0x000070+REGBASE) -#define EMMA2RH_BHIF_SW_INT_EN (0x000080+REGBASE) -#define EMMA2RH_BHIF_SW_INT_CLR (0x000090+REGBASE) -#define EMMA2RH_BHIF_MAIN_CTRL (0x0000b4+REGBASE) -#define EMMA2RH_BHIF_EXCEPT_VECT_BASE_ADDRESS (0x0000c0+REGBASE) -#define EMMA2RH_GPIO_DIR (0x110d20+REGBASE) -#define EMMA2RH_GPIO_INT_ST (0x110d30+REGBASE) -#define EMMA2RH_GPIO_INT_MASK (0x110d3c+REGBASE) -#define EMMA2RH_GPIO_INT_MODE (0x110d48+REGBASE) -#define EMMA2RH_GPIO_INT_CND_A (0x110d54+REGBASE) -#define EMMA2RH_GPIO_INT_CND_B (0x110d60+REGBASE) -#define EMMA2RH_PBRD_INT_EN (0x100010+REGBASE) -#define EMMA2RH_PBRD_CLKSEL (0x100028+REGBASE) -#define EMMA2RH_PFUR0_BASE (0x101000+REGBASE) -#define EMMA2RH_PFUR1_BASE (0x102000+REGBASE) -#define EMMA2RH_PFUR2_BASE (0x103000+REGBASE) -#define EMMA2RH_PIIC0_BASE (0x107000+REGBASE) -#define EMMA2RH_PIIC1_BASE (0x108000+REGBASE) -#define EMMA2RH_PIIC2_BASE (0x109000+REGBASE) -#define EMMA2RH_PCI_CONTROL (0x200000+REGBASE) -#define EMMA2RH_PCI_ARBIT_CTR (0x200004+REGBASE) -#define EMMA2RH_PCI_IWIN0_CTR (0x200010+REGBASE) -#define EMMA2RH_PCI_IWIN1_CTR (0x200014+REGBASE) -#define EMMA2RH_PCI_INIT_ESWP (0x200018+REGBASE) -#define EMMA2RH_PCI_INT (0x200020+REGBASE) -#define EMMA2RH_PCI_INT_EN (0x200024+REGBASE) -#define EMMA2RH_PCI_TWIN_CTR (0x200030+REGBASE) -#define EMMA2RH_PCI_TWIN_BADR (0x200034+REGBASE) -#define EMMA2RH_PCI_TWIN0_DADR (0x200038+REGBASE) -#define EMMA2RH_PCI_TWIN1_DADR (0x20003c+REGBASE) - -/* - * Memory map (physical address) - * - * Note most of the following address must be properly aligned by the - * corresponding size. For example, if PCI_IO_SIZE is 16MB, then - * PCI_IO_BASE must be aligned along 16MB boundary. - */ - -/* the actual ram size is detected at run-time */ -#define EMMA2RH_RAM_BASE 0x00000000 -#define EMMA2RH_RAM_SIZE 0x10000000 /* less than 256MB */ - -#define EMMA2RH_IO_BASE 0x10000000 -#define EMMA2RH_IO_SIZE 0x01000000 /* 16 MB */ - -#define EMMA2RH_GENERALIO_BASE 0x11000000 -#define EMMA2RH_GENERALIO_SIZE 0x01000000 /* 16 MB */ - -#define EMMA2RH_PCI_IO_BASE 0x12000000 -#define EMMA2RH_PCI_IO_SIZE 0x02000000 /* 32 MB */ - -#define EMMA2RH_PCI_MEM_BASE 0x14000000 -#define EMMA2RH_PCI_MEM_SIZE 0x08000000 /* 128 MB */ - -#define EMMA2RH_ROM_BASE 0x1c000000 -#define EMMA2RH_ROM_SIZE 0x04000000 /* 64 MB */ - -#define EMMA2RH_PCI_CONFIG_BASE EMMA2RH_PCI_IO_BASE -#define EMMA2RH_PCI_CONFIG_SIZE EMMA2RH_PCI_IO_SIZE - -#define NUM_EMMA2RH_IRQ 96 - -#define EMMA2RH_IRQ_BASE (MIPS_CPU_IRQ_BASE + 8) - -/* - * emma2rh irq defs - */ - -#define EMMA2RH_IRQ_INT(n) (EMMA2RH_IRQ_BASE + (n)) - -#define EMMA2RH_IRQ_PFUR0 EMMA2RH_IRQ_INT(49) -#define EMMA2RH_IRQ_PFUR1 EMMA2RH_IRQ_INT(50) -#define EMMA2RH_IRQ_PFUR2 EMMA2RH_IRQ_INT(51) -#define EMMA2RH_IRQ_PIIC0 EMMA2RH_IRQ_INT(56) -#define EMMA2RH_IRQ_PIIC1 EMMA2RH_IRQ_INT(57) -#define EMMA2RH_IRQ_PIIC2 EMMA2RH_IRQ_INT(58) - -/* - * EMMA2RH Register Access - */ - -#define EMMA2RH_BASE (0xa0000000) - -static inline void emma2rh_sync(void) -{ - volatile u32 *p = (volatile u32 *)0xbfc00000; - (void)(*p); -} - -static inline void emma2rh_out32(u32 offset, u32 val) -{ - *(volatile u32 *)(EMMA2RH_BASE | offset) = val; - emma2rh_sync(); -} - -static inline u32 emma2rh_in32(u32 offset) -{ - u32 val = *(volatile u32 *)(EMMA2RH_BASE | offset); - return val; -} - -static inline void emma2rh_out16(u32 offset, u16 val) -{ - *(volatile u16 *)(EMMA2RH_BASE | offset) = val; - emma2rh_sync(); -} - -static inline u16 emma2rh_in16(u32 offset) -{ - u16 val = *(volatile u16 *)(EMMA2RH_BASE | offset); - return val; -} - -static inline void emma2rh_out8(u32 offset, u8 val) -{ - *(volatile u8 *)(EMMA2RH_BASE | offset) = val; - emma2rh_sync(); -} - -static inline u8 emma2rh_in8(u32 offset) -{ - u8 val = *(volatile u8 *)(EMMA2RH_BASE | offset); - return val; -} - -/** - * IIC registers map - **/ - -/*---------------------------------------------------------------------------*/ -/* CNT - Control register (00H R/W) */ -/*---------------------------------------------------------------------------*/ -#define SPT 0x00000001 -#define STT 0x00000002 -#define ACKE 0x00000004 -#define WTIM 0x00000008 -#define SPIE 0x00000010 -#define WREL 0x00000020 -#define LREL 0x00000040 -#define IICE 0x00000080 -#define CNT_RESERVED 0x000000ff /* reserved bit 0 */ - -#define I2C_EMMA_START (IICE | STT) -#define I2C_EMMA_STOP (IICE | SPT) -#define I2C_EMMA_REPSTART I2C_EMMA_START - -/*---------------------------------------------------------------------------*/ -/* STA - Status register (10H Read) */ -/*---------------------------------------------------------------------------*/ -#define MSTS 0x00000080 -#define ALD 0x00000040 -#define EXC 0x00000020 -#define COI 0x00000010 -#define TRC 0x00000008 -#define ACKD 0x00000004 -#define STD 0x00000002 -#define SPD 0x00000001 - -/*---------------------------------------------------------------------------*/ -/* CSEL - Clock select register (20H R/W) */ -/*---------------------------------------------------------------------------*/ -#define FCL 0x00000080 -#define ND50 0x00000040 -#define CLD 0x00000020 -#define DAD 0x00000010 -#define SMC 0x00000008 -#define DFC 0x00000004 -#define CL 0x00000003 -#define CSEL_RESERVED 0x000000ff /* reserved bit 0 */ - -#define FAST397 0x0000008b -#define FAST297 0x0000008a -#define FAST347 0x0000000b -#define FAST260 0x0000000a -#define FAST130 0x00000008 -#define STANDARD108 0x00000083 -#define STANDARD83 0x00000082 -#define STANDARD95 0x00000003 -#define STANDARD73 0x00000002 -#define STANDARD36 0x00000001 -#define STANDARD71 0x00000000 - -/*---------------------------------------------------------------------------*/ -/* SVA - Slave address register (30H R/W) */ -/*---------------------------------------------------------------------------*/ -#define SVA 0x000000fe - -/*---------------------------------------------------------------------------*/ -/* SHR - Shift register (40H R/W) */ -/*---------------------------------------------------------------------------*/ -#define SR 0x000000ff - -/*---------------------------------------------------------------------------*/ -/* INT - Interrupt register (50H R/W) */ -/* INTM - Interrupt mask register (60H R/W) */ -/*---------------------------------------------------------------------------*/ -#define INTE0 0x00000001 - -/*********************************************************************** - * I2C registers - *********************************************************************** - */ -#define I2C_EMMA_CNT 0x00 -#define I2C_EMMA_STA 0x10 -#define I2C_EMMA_CSEL 0x20 -#define I2C_EMMA_SVA 0x30 -#define I2C_EMMA_SHR 0x40 -#define I2C_EMMA_INT 0x50 -#define I2C_EMMA_INTM 0x60 - -/* - * include the board dependent part - */ -#ifdef CONFIG_NEC_MARKEINS -#include <asm/emma/markeins.h> -#else -#error "Unknown EMMA2RH board!" -#endif - -#endif /* __ASM_EMMA_EMMA2RH_H */ diff --git a/arch/mips/include/asm/emma/markeins.h b/arch/mips/include/asm/emma/markeins.h deleted file mode 100644 index 2d7e1339d36f..000000000000 --- a/arch/mips/include/asm/emma/markeins.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) NEC Electronics Corporation 2005-2006 - * - * This file based on include/asm-mips/ddb5xxx/ddb5xxx.h - * Copyright 2001 MontaVista Software Inc. - */ - -#ifndef MARKEINS_H -#define MARKEINS_H - -#define NUM_EMMA2RH_IRQ_SW 32 -#define NUM_EMMA2RH_IRQ_GPIO 32 - -#define EMMA2RH_SW_CASCADE (EMMA2RH_IRQ_INT(7) - EMMA2RH_IRQ_INT(0)) -#define EMMA2RH_GPIO_CASCADE (EMMA2RH_IRQ_INT(46) - EMMA2RH_IRQ_INT(0)) - -#define EMMA2RH_SW_IRQ_BASE (EMMA2RH_IRQ_BASE + NUM_EMMA2RH_IRQ) -#define EMMA2RH_GPIO_IRQ_BASE (EMMA2RH_SW_IRQ_BASE + NUM_EMMA2RH_IRQ_SW) - -#define EMMA2RH_SW_IRQ_INT(n) (EMMA2RH_SW_IRQ_BASE + (n)) - -#define MARKEINS_PCI_IRQ_INTA EMMA2RH_GPIO_IRQ_BASE+15 -#define MARKEINS_PCI_IRQ_INTB EMMA2RH_GPIO_IRQ_BASE+16 -#define MARKEINS_PCI_IRQ_INTC EMMA2RH_GPIO_IRQ_BASE+17 -#define MARKEINS_PCI_IRQ_INTD EMMA2RH_GPIO_IRQ_BASE+18 - -#endif /* CONFIG_MARKEINS */ diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index 9476e0498d59..08f9dd6903b7 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -71,12 +71,12 @@ static inline int __enable_fpu(enum fpu_mode mode) goto fr_common; case FPU_64BIT: -#if !(defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) \ - || defined(CONFIG_64BIT)) +#if !(defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6) || defined(CONFIG_64BIT)) /* we only have a 32-bit FPU */ return SIGFPE; #endif - /* fall through */ + fallthrough; case FPU_32BIT: if (cpu_has_fre) { /* clear FRE */ diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h index bb7c71ffe5b7..f67759e81210 100644 --- a/arch/mips/include/asm/fpu_emulator.h +++ b/arch/mips/include/asm/fpu_emulator.h @@ -172,10 +172,6 @@ void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, struct task_struct *tsk); int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31); -int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, - unsigned long *contpc); -int mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, - unsigned long *contpc); /* * Mask the FCSR Cause bits according to the Enable bits, observing diff --git a/arch/mips/include/asm/hazards.h b/arch/mips/include/asm/hazards.h index a0b92205f933..f855478d12fa 100644 --- a/arch/mips/include/asm/hazards.h +++ b/arch/mips/include/asm/hazards.h @@ -22,8 +22,9 @@ /* * TLB hazards */ -#if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6)) && \ - !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_CPU_LOONGSON64) +#if (defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6)) && \ + !defined(CONFIG_CPU_CAVIUM_OCTEON) && !defined(CONFIG_CPU_LOONGSON64) /* * MIPSR2 defines ehb for hazard avoidance @@ -278,7 +279,8 @@ do { \ #define __disable_fpu_hazard -#elif defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) +#elif defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6) #define __enable_fpu_hazard \ ___ehb diff --git a/arch/mips/include/asm/io.h b/arch/mips/include/asm/io.h index cf1f2a4a2418..346fffd9e972 100644 --- a/arch/mips/include/asm/io.h +++ b/arch/mips/include/asm/io.h @@ -30,8 +30,6 @@ #include <asm/pgtable-bits.h> #include <asm/processor.h> #include <asm/string.h> - -#include <ioremap.h> #include <mangle-port.h> /* @@ -153,66 +151,9 @@ static inline void *isa_bus_to_virt(unsigned long address) */ #define page_to_phys(page) ((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT) -extern void __iomem * __ioremap(phys_addr_t offset, phys_addr_t size, unsigned long flags); -extern void __iounmap(const volatile void __iomem *addr); - -static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long size, - unsigned long flags) -{ - void __iomem *addr = plat_ioremap(offset, size, flags); - - if (addr) - return addr; - -#define __IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL)) - - if (cpu_has_64bit_addresses) { - u64 base = UNCAC_BASE; - - /* - * R10000 supports a 2 bit uncached attribute therefore - * UNCAC_BASE may not equal IO_BASE. - */ - if (flags == _CACHE_UNCACHED) - base = (u64) IO_BASE; - return (void __iomem *) (unsigned long) (base + offset); - } else if (__builtin_constant_p(offset) && - __builtin_constant_p(size) && __builtin_constant_p(flags)) { - phys_addr_t phys_addr, last_addr; - - phys_addr = fixup_bigphys_addr(offset, size); - - /* Don't allow wraparound or zero size. */ - last_addr = phys_addr + size - 1; - if (!size || last_addr < phys_addr) - return NULL; - - /* - * Map uncached objects in the low 512MB of address - * space using KSEG1. - */ - if (__IS_LOW512(phys_addr) && __IS_LOW512(last_addr) && - flags == _CACHE_UNCACHED) - return (void __iomem *) - (unsigned long)CKSEG1ADDR(phys_addr); - } - - return __ioremap(offset, size, flags); - -#undef __IS_LOW512 -} - -/* - * ioremap_prot - map bus memory into CPU space - * @offset: bus address of the memory - * @size: size of the resource to map - - * ioremap_prot gives the caller control over cache coherency attributes (CCA) - */ -static inline void __iomem *ioremap_prot(phys_addr_t offset, - unsigned long size, unsigned long prot_val) { - return __ioremap_mode(offset, size, prot_val & _CACHE_MASK); -} +void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, + unsigned long prot_val); +void iounmap(const volatile void __iomem *addr); /* * ioremap - map bus memory into CPU space @@ -226,7 +167,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, * address. */ #define ioremap(offset, size) \ - __ioremap_mode((offset), (size), _CACHE_UNCACHED) + ioremap_prot((offset), (size), _CACHE_UNCACHED) #define ioremap_uc ioremap /* @@ -245,7 +186,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, * memory-like regions on I/O busses. */ #define ioremap_cache(offset, size) \ - __ioremap_mode((offset), (size), _page_cachable_default) + ioremap_prot((offset), (size), _page_cachable_default) /* * ioremap_wc - map bus memory into CPU space @@ -266,23 +207,7 @@ static inline void __iomem *ioremap_prot(phys_addr_t offset, * _CACHE_UNCACHED option (see cpu_probe() method). */ #define ioremap_wc(offset, size) \ - __ioremap_mode((offset), (size), boot_cpu_data.writecombine) - -static inline void iounmap(const volatile void __iomem *addr) -{ - if (plat_iounmap(addr)) - return; - -#define __IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1) - - if (cpu_has_64bit_addresses || - (__builtin_constant_p(addr) && __IS_KSEG1(addr))) - return; - - __iounmap(addr); - -#undef __IS_KSEG1 -} + ioremap_prot((offset), (size), boot_cpu_data.writecombine) #if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON64) #define war_io_reorder_wmb() wmb() diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 2c343c346b79..e28b5a946e26 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -174,6 +174,8 @@ struct kvm_vcpu_stat { #endif u64 halt_successful_poll; u64 halt_attempted_poll; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; u64 halt_poll_invalid; u64 halt_wakeup; }; diff --git a/arch/mips/include/asm/lasat/ds1603.h b/arch/mips/include/asm/lasat/ds1603.h deleted file mode 100644 index ab833be9637d..000000000000 --- a/arch/mips/include/asm/lasat/ds1603.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <asm/addrspace.h> - -/* Lasat 100 */ -#define DS1603_REG_100 (KSEG1ADDR(0x1c810000)) -#define DS1603_RST_100 (1 << 2) -#define DS1603_CLK_100 (1 << 0) -#define DS1603_DATA_SHIFT_100 1 -#define DS1603_DATA_100 (1 << DS1603_DATA_SHIFT_100) - -/* Lasat 200 */ -#define DS1603_REG_200 (KSEG1ADDR(0x11000000)) -#define DS1603_RST_200 (1 << 3) -#define DS1603_CLK_200 (1 << 4) -#define DS1603_DATA_200 (1 << 5) - -#define DS1603_DATA_REG_200 (DS1603_REG_200 + 0x10000) -#define DS1603_DATA_READ_SHIFT_200 9 -#define DS1603_DATA_READ_200 (1 << DS1603_DATA_READ_SHIFT_200) diff --git a/arch/mips/include/asm/lasat/eeprom.h b/arch/mips/include/asm/lasat/eeprom.h deleted file mode 100644 index 24001a5cbb11..000000000000 --- a/arch/mips/include/asm/lasat/eeprom.h +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <asm/addrspace.h> - -/* lasat 100 */ -#define AT93C_REG_100 KSEG1ADDR(0x1c810000) -#define AT93C_RDATA_REG_100 AT93C_REG_100 -#define AT93C_RDATA_SHIFT_100 4 -#define AT93C_WDATA_SHIFT_100 4 -#define AT93C_CS_M_100 (1 << 5) -#define AT93C_CLK_M_100 (1 << 3) - -/* lasat 200 */ -#define AT93C_REG_200 KSEG1ADDR(0x11000000) -#define AT93C_RDATA_REG_200 (AT93C_REG_200+0x10000) -#define AT93C_RDATA_SHIFT_200 8 -#define AT93C_WDATA_SHIFT_200 2 -#define AT93C_CS_M_200 (1 << 0) -#define AT93C_CLK_M_200 (1 << 1) diff --git a/arch/mips/include/asm/lasat/head.h b/arch/mips/include/asm/lasat/head.h deleted file mode 100644 index 20b0ecedd4b5..000000000000 --- a/arch/mips/include/asm/lasat/head.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Image header stuff - */ -#ifndef _HEAD_H -#define _HEAD_H - -#define LASAT_K_MAGIC0_VAL 0xfedeabba -#define LASAT_K_MAGIC1_VAL 0x00bedead - -#ifndef _LANGUAGE_ASSEMBLY -#include <linux/types.h> -struct bootloader_header { - u32 magic[2]; - u32 version; - u32 image_start; - u32 image_size; - u32 kernel_start; - u32 kernel_entry; -}; -#endif - -#endif /* _HEAD_H */ diff --git a/arch/mips/include/asm/lasat/lasat.h b/arch/mips/include/asm/lasat/lasat.h deleted file mode 100644 index 483be606960d..000000000000 --- a/arch/mips/include/asm/lasat/lasat.h +++ /dev/null @@ -1,245 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * lasat.h - * - * Thomas Horsten <thh@lasat.com> - * Copyright (C) 2000 LASAT Networks A/S. - * - * Configuration for LASAT boards, loads the appropriate include files. - */ -#ifndef _LASAT_H -#define _LASAT_H - -#ifndef _LANGUAGE_ASSEMBLY - -extern struct lasat_misc { - volatile u32 *reset_reg; - volatile u32 *flash_wp_reg; - u32 flash_wp_bit; -} *lasat_misc; - -enum lasat_mtdparts { - LASAT_MTD_BOOTLOADER, - LASAT_MTD_SERVICE, - LASAT_MTD_NORMAL, - LASAT_MTD_CONFIG, - LASAT_MTD_FS, - LASAT_MTD_LAST -}; - -/* - * The format of the data record in the EEPROM. - * See the LASAT Hardware Configuration field specification for a detailed - * description of the config field. - */ -#include <linux/types.h> - -#define LASAT_EEPROM_VERSION 7 -struct lasat_eeprom_struct { - unsigned int version; - unsigned int cfg[3]; - unsigned char hwaddr[6]; - unsigned char print_partno[12]; - unsigned char term0; - unsigned char print_serial[14]; - unsigned char term1; - unsigned char prod_partno[12]; - unsigned char term2; - unsigned char prod_serial[14]; - unsigned char term3; - unsigned char passwd_hash[16]; - unsigned char pwdnull; - unsigned char vendid; - unsigned char ts_ref; - unsigned char ts_signoff; - unsigned char reserved[11]; - unsigned char debugaccess; - unsigned short prid; - unsigned int serviceflag; - unsigned int ipaddr; - unsigned int netmask; - unsigned int crc32; -}; - -struct lasat_eeprom_struct_pre7 { - unsigned int version; - unsigned int flags[3]; - unsigned char hwaddr0[6]; - unsigned char hwaddr1[6]; - unsigned char print_partno[9]; - unsigned char term0; - unsigned char print_serial[14]; - unsigned char term1; - unsigned char prod_partno[9]; - unsigned char term2; - unsigned char prod_serial[14]; - unsigned char term3; - unsigned char passwd_hash[24]; - unsigned char pwdnull; - unsigned char vendor; - unsigned char ts_ref; - unsigned char ts_signoff; - unsigned char reserved[6]; - unsigned int writecount; - unsigned int ipaddr; - unsigned int netmask; - unsigned int crc32; -}; - -/* Configuration descriptor encoding - see the doc for details */ - -#define LASAT_W0_DSCTYPE(v) (((v)) & 0xf) -#define LASAT_W0_BMID(v) (((v) >> 0x04) & 0xf) -#define LASAT_W0_CPUTYPE(v) (((v) >> 0x08) & 0xf) -#define LASAT_W0_BUSSPEED(v) (((v) >> 0x0c) & 0xf) -#define LASAT_W0_CPUCLK(v) (((v) >> 0x10) & 0xf) -#define LASAT_W0_SDRAMBANKSZ(v) (((v) >> 0x14) & 0xf) -#define LASAT_W0_SDRAMBANKS(v) (((v) >> 0x18) & 0xf) -#define LASAT_W0_L2CACHE(v) (((v) >> 0x1c) & 0xf) - -#define LASAT_W1_EDHAC(v) (((v)) & 0xf) -#define LASAT_W1_HIFN(v) (((v) >> 0x04) & 0x1) -#define LASAT_W1_ISDN(v) (((v) >> 0x05) & 0x1) -#define LASAT_W1_IDE(v) (((v) >> 0x06) & 0x1) -#define LASAT_W1_HDLC(v) (((v) >> 0x07) & 0x1) -#define LASAT_W1_USVERSION(v) (((v) >> 0x08) & 0x1) -#define LASAT_W1_4MACS(v) (((v) >> 0x09) & 0x1) -#define LASAT_W1_EXTSERIAL(v) (((v) >> 0x0a) & 0x1) -#define LASAT_W1_FLASHSIZE(v) (((v) >> 0x0c) & 0xf) -#define LASAT_W1_PCISLOTS(v) (((v) >> 0x10) & 0xf) -#define LASAT_W1_PCI1OPT(v) (((v) >> 0x14) & 0xf) -#define LASAT_W1_PCI2OPT(v) (((v) >> 0x18) & 0xf) -#define LASAT_W1_PCI3OPT(v) (((v) >> 0x1c) & 0xf) - -/* Routines specific to LASAT boards */ - -#define LASAT_BMID_MASQUERADE2 0 -#define LASAT_BMID_MASQUERADEPRO 1 -#define LASAT_BMID_SAFEPIPE25 2 -#define LASAT_BMID_SAFEPIPE50 3 -#define LASAT_BMID_SAFEPIPE100 4 -#define LASAT_BMID_SAFEPIPE5000 5 -#define LASAT_BMID_SAFEPIPE7000 6 -#define LASAT_BMID_SAFEPIPE1000 7 -#if 0 -#define LASAT_BMID_SAFEPIPE30 7 -#define LASAT_BMID_SAFEPIPE5100 8 -#define LASAT_BMID_SAFEPIPE7100 9 -#endif -#define LASAT_BMID_UNKNOWN 0xf -#define LASAT_MAX_BMID_NAMES 9 /* no larger than 15! */ - -#define LASAT_HAS_EDHAC (1 << 0) -#define LASAT_EDHAC_FAST (1 << 1) -#define LASAT_HAS_EADI (1 << 2) -#define LASAT_HAS_HIFN (1 << 3) -#define LASAT_HAS_ISDN (1 << 4) -#define LASAT_HAS_LEASEDLINE_IF (1 << 5) -#define LASAT_HAS_HDC (1 << 6) - -#define LASAT_PRID_MASQUERADE2 0 -#define LASAT_PRID_MASQUERADEPRO 1 -#define LASAT_PRID_SAFEPIPE25 2 -#define LASAT_PRID_SAFEPIPE50 3 -#define LASAT_PRID_SAFEPIPE100 4 -#define LASAT_PRID_SAFEPIPE5000 5 -#define LASAT_PRID_SAFEPIPE7000 6 -#define LASAT_PRID_SAFEPIPE30 7 -#define LASAT_PRID_SAFEPIPE5100 8 -#define LASAT_PRID_SAFEPIPE7100 9 - -#define LASAT_PRID_SAFEPIPE1110 10 -#define LASAT_PRID_SAFEPIPE3020 11 -#define LASAT_PRID_SAFEPIPE3030 12 -#define LASAT_PRID_SAFEPIPE5020 13 -#define LASAT_PRID_SAFEPIPE5030 14 -#define LASAT_PRID_SAFEPIPE1120 15 -#define LASAT_PRID_SAFEPIPE1130 16 -#define LASAT_PRID_SAFEPIPE6010 17 -#define LASAT_PRID_SAFEPIPE6110 18 -#define LASAT_PRID_SAFEPIPE6210 19 -#define LASAT_PRID_SAFEPIPE1020 20 -#define LASAT_PRID_SAFEPIPE1040 21 -#define LASAT_PRID_SAFEPIPE1060 22 - -struct lasat_info { - unsigned int li_cpu_hz; - unsigned int li_bus_hz; - unsigned int li_bmid; - unsigned int li_memsize; - unsigned int li_flash_size; - unsigned int li_prid; - unsigned char li_bmstr[16]; - unsigned char li_namestr[32]; - unsigned char li_typestr[16]; - /* Info on the Flash layout */ - unsigned int li_flash_base; - unsigned long li_flashpart_base[LASAT_MTD_LAST]; - unsigned long li_flashpart_size[LASAT_MTD_LAST]; - struct lasat_eeprom_struct li_eeprom_info; - unsigned int li_eeprom_upgrade_version; - unsigned int li_debugaccess; -}; - -extern struct lasat_info lasat_board_info; - -static inline unsigned long lasat_flash_partition_start(int partno) -{ - if (partno < 0 || partno >= LASAT_MTD_LAST) - return 0; - - return lasat_board_info.li_flashpart_base[partno]; -} - -static inline unsigned long lasat_flash_partition_size(int partno) -{ - if (partno < 0 || partno >= LASAT_MTD_LAST) - return 0; - - return lasat_board_info.li_flashpart_size[partno]; -} - -/* Called from setup() to initialize the global board_info struct */ -extern int lasat_init_board_info(void); - -/* Write the modified EEPROM info struct */ -extern void lasat_write_eeprom_info(void); - -#define N_MACHTYPES 2 -/* for calibration of delays */ - -/* the lasat_ndelay function is necessary because it is used at an - * early stage of the boot process where ndelay is not calibrated. - * It is used for the bit-banging rtc and eeprom drivers */ - -#include <linux/delay.h> -#include <linux/smp.h> - -/* calculating with the slowest board with 100 MHz clock */ -#define LASAT_100_DIVIDER 20 -/* All 200's run at 250 MHz clock */ -#define LASAT_200_DIVIDER 8 - -extern unsigned int lasat_ndelay_divider; - -static inline void lasat_ndelay(unsigned int ns) -{ - __delay(ns / lasat_ndelay_divider); -} - -#define IS_LASAT_200() (current_cpu_data.cputype == CPU_R5000) - -#endif /* !defined (_LANGUAGE_ASSEMBLY) */ - -#define LASAT_SERVICEMODE_MAGIC_1 0xdeadbeef -#define LASAT_SERVICEMODE_MAGIC_2 0xfedeabba - -/* Lasat 100 boards */ -#define LASAT_GT_BASE (KSEG1ADDR(0x14000000)) - -/* Lasat 200 boards */ -#define Vrc5074_PHYS_BASE 0x1fa00000 -#define Vrc5074_BASE (KSEG1ADDR(Vrc5074_PHYS_BASE)) -#define PCI_WINDOW1 0x1a000000 - -#endif /* _LASAT_H */ diff --git a/arch/mips/include/asm/lasat/lasatint.h b/arch/mips/include/asm/lasat/lasatint.h deleted file mode 100644 index b2b346e0ca38..000000000000 --- a/arch/mips/include/asm/lasat/lasatint.h +++ /dev/null @@ -1,15 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_LASAT_LASATINT_H -#define __ASM_LASAT_LASATINT_H - -/* lasat 100 */ -#define LASAT_INT_STATUS_REG_100 (KSEG1ADDR(0x1c880000)) -#define LASAT_INT_MASK_REG_100 (KSEG1ADDR(0x1c890000)) -#define LASATINT_MASK_SHIFT_100 0 - -/* lasat 200 */ -#define LASAT_INT_STATUS_REG_200 (KSEG1ADDR(0x1104003c)) -#define LASAT_INT_MASK_REG_200 (KSEG1ADDR(0x1104003c)) -#define LASATINT_MASK_SHIFT_200 16 - -#endif /* __ASM_LASAT_LASATINT_H */ diff --git a/arch/mips/include/asm/lasat/picvue.h b/arch/mips/include/asm/lasat/picvue.h deleted file mode 100644 index 99987c5a4b83..000000000000 --- a/arch/mips/include/asm/lasat/picvue.h +++ /dev/null @@ -1,16 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* Lasat 100 */ -#define PVC_REG_100 KSEG1ADDR(0x1c820000) -#define PVC_DATA_SHIFT_100 0 -#define PVC_DATA_M_100 0xFF -#define PVC_E_100 (1 << 8) -#define PVC_RW_100 (1 << 9) -#define PVC_RS_100 (1 << 10) - -/* Lasat 200 */ -#define PVC_REG_200 KSEG1ADDR(0x11000000) -#define PVC_DATA_SHIFT_200 24 -#define PVC_DATA_M_200 (0xFF << PVC_DATA_SHIFT_200) -#define PVC_E_200 (1 << 16) -#define PVC_RW_200 (1 << 17) -#define PVC_RS_200 (1 << 18) diff --git a/arch/mips/include/asm/lasat/serial.h b/arch/mips/include/asm/lasat/serial.h deleted file mode 100644 index 7b43d74089d1..000000000000 --- a/arch/mips/include/asm/lasat/serial.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <asm/lasat/lasat.h> - -/* Lasat 100 boards serial configuration */ -#define LASAT_BASE_BAUD_100 (7372800 / 16) -#define LASAT_UART_REGS_BASE_100 0x1c8b0000 -#define LASAT_UART_REGS_SHIFT_100 2 -#define LASATINT_UART_100 16 - -/* * LASAT 200 boards serial configuration */ -#define LASAT_BASE_BAUD_200 (100000000 / 16 / 12) -#define LASAT_UART_REGS_BASE_200 (Vrc5074_PHYS_BASE + 0x0300) -#define LASAT_UART_REGS_SHIFT_200 3 -#define LASATINT_UART_200 21 diff --git a/arch/mips/include/asm/maar.h b/arch/mips/include/asm/maar.h index 6908b93c4ff9..99f1c3e4b11f 100644 --- a/arch/mips/include/asm/maar.h +++ b/arch/mips/include/asm/maar.h @@ -32,7 +32,7 @@ unsigned platform_maar_init(unsigned num_pairs); * @upper: The highest address that the MAAR pair will affect. Must be * aligned to one byte before a 2^16 byte boundary. * @attrs: The accessibility attributes to program, eg. MIPS_MAAR_S. The - * MIPS_MAAR_VL attribute will automatically be set. + * MIPS_MAAR_VL/MIPS_MAAR_VH attributes will automatically be set. * * Program the pair of MAAR registers specified by idx to apply the attributes * specified by attrs to the range of addresses from lower to higher. @@ -48,17 +48,30 @@ static inline void write_maar_pair(unsigned idx, phys_addr_t lower, /* Automatically set MIPS_MAAR_VL */ attrs |= MIPS_MAAR_VL; - /* Write the upper address & attributes (only MIPS_MAAR_VL matters) */ + /* + * Write the upper address & attributes (both MIPS_MAAR_VL and + * MIPS_MAAR_VH matter) + */ write_c0_maari(idx << 1); back_to_back_c0_hazard(); write_c0_maar(((upper >> 4) & MIPS_MAAR_ADDR) | attrs); back_to_back_c0_hazard(); +#ifdef CONFIG_XPA + upper >>= MIPS_MAARX_ADDR_SHIFT; + writex_c0_maar(((upper >> 4) & MIPS_MAARX_ADDR) | MIPS_MAARX_VH); + back_to_back_c0_hazard(); +#endif /* Write the lower address & attributes */ write_c0_maari((idx << 1) | 0x1); back_to_back_c0_hazard(); write_c0_maar((lower >> 4) | attrs); back_to_back_c0_hazard(); +#ifdef CONFIG_XPA + lower >>= MIPS_MAARX_ADDR_SHIFT; + writex_c0_maar(((lower >> 4) & MIPS_MAARX_ADDR) | MIPS_MAARX_VH); + back_to_back_c0_hazard(); +#endif } /** diff --git a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h index 95a0b580909d..a54f20d956a2 100644 --- a/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ath25/cpu-feature-overrides.h @@ -56,6 +56,5 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #endif /* __ASM_MACH_ATH25_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h index e7c972fccd9f..79ab3ad9fee8 100644 --- a/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ath79/cpu-feature-overrides.h @@ -45,7 +45,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-au1x00/ioremap.h b/arch/mips/include/asm/mach-au1x00/ioremap.h deleted file mode 100644 index f6877ed8b8d0..000000000000 --- a/arch/mips/include/asm/mach-au1x00/ioremap.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * include/asm-mips/mach-au1x00/ioremap.h - */ -#ifndef __ASM_MACH_AU1X00_IOREMAP_H -#define __ASM_MACH_AU1X00_IOREMAP_H - -#include <linux/types.h> - -#if defined(CONFIG_PHYS_ADDR_T_64BIT) && defined(CONFIG_PCI) -extern phys_addr_t __fixup_bigphys_addr(phys_addr_t, phys_addr_t); -#else -static inline phys_addr_t __fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} -#endif - -/* - * Allow physical addresses to be fixed up to help 36-bit peripherals. - */ -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return __fixup_bigphys_addr(phys_addr, size); -} - -static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, - unsigned long flags) -{ - return NULL; -} - -static inline int plat_iounmap(const volatile void __iomem *addr) -{ - return 0; -} - -#endif /* __ASM_MACH_AU1X00_IOREMAP_H */ diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h index 8fe88c2251e4..9212429d5edd 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_gpio.h @@ -13,16 +13,16 @@ static inline unsigned long bcm63xx_gpio_count(void) case BCM6328_CPU_ID: return 32; case BCM3368_CPU_ID: - case BCM6358_CPU_ID: return 40; case BCM6338_CPU_ID: return 8; case BCM6345_CPU_ID: return 16; - case BCM6362_CPU_ID: - return 48; + case BCM6358_CPU_ID: case BCM6368_CPU_ID: return 38; + case BCM6362_CPU_ID: + return 48; case BCM6348_CPU_ID: default: return 37; diff --git a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h index bc3444cd4ef2..9ceb5e72889f 100644 --- a/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h +++ b/arch/mips/include/asm/mach-bcm63xx/bcm63xx_regs.h @@ -1367,8 +1367,8 @@ #define MISC_STRAPBUS_6328_REG 0x240 #define STRAPBUS_6328_FCVO_SHIFT 7 #define STRAPBUS_6328_FCVO_MASK (0x1f << STRAPBUS_6328_FCVO_SHIFT) -#define STRAPBUS_6328_BOOT_SEL_SERIAL (1 << 28) -#define STRAPBUS_6328_BOOT_SEL_NAND (0 << 28) +#define STRAPBUS_6328_BOOT_SEL_SERIAL (1 << 18) +#define STRAPBUS_6328_BOOT_SEL_NAND (0 << 18) /************************************************************************* * _REG relative to RSET_PCIE diff --git a/arch/mips/include/asm/mach-bcm63xx/ioremap.h b/arch/mips/include/asm/mach-bcm63xx/ioremap.h index 8cd261ec0a75..73f31825bbf3 100644 --- a/arch/mips/include/asm/mach-bcm63xx/ioremap.h +++ b/arch/mips/include/asm/mach-bcm63xx/ioremap.h @@ -4,11 +4,6 @@ #include <bcm63xx_cpu.h> -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} - static inline int is_bcm63xx_internal_registers(phys_addr_t offset) { switch (bcm63xx_get_cpu_id()) { diff --git a/arch/mips/include/asm/mach-bmips/ioremap.h b/arch/mips/include/asm/mach-bmips/ioremap.h index 52632ebc705f..63b4af9916b6 100644 --- a/arch/mips/include/asm/mach-bmips/ioremap.h +++ b/arch/mips/include/asm/mach-bmips/ioremap.h @@ -4,11 +4,6 @@ #include <linux/types.h> -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} - static inline int is_bmips_internal_registers(phys_addr_t offset) { if (offset >= 0xfff80000) diff --git a/arch/mips/include/asm/mach-emma2rh/irq.h b/arch/mips/include/asm/mach-emma2rh/irq.h deleted file mode 100644 index d32736736bb3..000000000000 --- a/arch/mips/include/asm/mach-emma2rh/irq.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2003 by Ralf Baechle - */ -#ifndef __ASM_MACH_EMMA2RH_IRQ_H -#define __ASM_MACH_EMMA2RH_IRQ_H - -#define NR_IRQS 256 - -#include <asm/mach-generic/irq.h> - -#endif /* __ASM_MACH_EMMA2RH_IRQ_H */ diff --git a/arch/mips/include/asm/mach-generic/floppy.h b/arch/mips/include/asm/mach-generic/floppy.h index 9ec2f6a5200b..e3f446d54827 100644 --- a/arch/mips/include/asm/mach-generic/floppy.h +++ b/arch/mips/include/asm/mach-generic/floppy.h @@ -26,14 +26,14 @@ /* * How to access the FDC's registers. */ -static inline unsigned char fd_inb(unsigned int port) +static inline unsigned char fd_inb(unsigned int base, unsigned int reg) { - return inb_p(port); + return inb_p(base + reg); } -static inline void fd_outb(unsigned char value, unsigned int port) +static inline void fd_outb(unsigned char value, unsigned int base, unsigned int reg) { - outb_p(value, port); + outb_p(value, base + reg); } /* diff --git a/arch/mips/include/asm/mach-generic/ioremap.h b/arch/mips/include/asm/mach-generic/ioremap.h index 4e36ea25ed33..f2442b84545c 100644 --- a/arch/mips/include/asm/mach-generic/ioremap.h +++ b/arch/mips/include/asm/mach-generic/ioremap.h @@ -7,15 +7,6 @@ #include <linux/types.h> -/* - * Allow physical addresses to be fixed up to help peripherals located - * outside the low 32-bit range -- generic pass-through version. - */ -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} - static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, unsigned long flags) { diff --git a/arch/mips/include/asm/mach-generic/irq.h b/arch/mips/include/asm/mach-generic/irq.h index be546a0f65fa..72ac2c202c55 100644 --- a/arch/mips/include/asm/mach-generic/irq.h +++ b/arch/mips/include/asm/mach-generic/irq.h @@ -36,10 +36,4 @@ #endif /* CONFIG_IRQ_MIPS_CPU */ -#ifdef CONFIG_MIPS_GIC -#ifndef MIPS_GIC_IRQ_BASE -#define MIPS_GIC_IRQ_BASE (MIPS_CPU_IRQ_BASE + 8) -#endif -#endif /* CONFIG_MIPS_GIC */ - #endif /* __ASM_MACH_GENERIC_IRQ_H */ diff --git a/arch/mips/include/asm/mach-ip27/spaces.h b/arch/mips/include/asm/mach-ip27/spaces.h index 24d5e31bcfa6..66421e9a6aa6 100644 --- a/arch/mips/include/asm/mach-ip27/spaces.h +++ b/arch/mips/include/asm/mach-ip27/spaces.h @@ -10,17 +10,19 @@ #ifndef _ASM_MACH_IP27_SPACES_H #define _ASM_MACH_IP27_SPACES_H +#include <linux/const.h> + /* * IP27 uses the R10000's uncached attribute feature. Attribute 3 selects * uncached memory addressing. Hide the definitions on 32-bit compilation * of the compat-vdso code. */ #ifdef CONFIG_64BIT -#define HSPEC_BASE 0x9000000000000000 -#define IO_BASE 0x9200000000000000 -#define MSPEC_BASE 0x9400000000000000 -#define UNCAC_BASE 0x9600000000000000 -#define CAC_BASE 0xa800000000000000 +#define HSPEC_BASE _AC(0x9000000000000000, UL) +#define IO_BASE _AC(0x9200000000000000, UL) +#define MSPEC_BASE _AC(0x9400000000000000, UL) +#define UNCAC_BASE _AC(0x9600000000000000, UL) +#define CAC_BASE _AC(0xa800000000000000, UL) #endif #define TO_MSPEC(x) (MSPEC_BASE | ((x) & TO_PHYS_MASK)) diff --git a/arch/mips/include/asm/mach-ip30/war.h b/arch/mips/include/asm/mach-ip30/war.h index a98ba204f183..a1fa0c1f5300 100644 --- a/arch/mips/include/asm/mach-ip30/war.h +++ b/arch/mips/include/asm/mach-ip30/war.h @@ -8,19 +8,17 @@ #define R4600_V1_INDEX_ICACHEOP_WAR 0 #define R4600_V1_HIT_CACHEOP_WAR 0 #define R4600_V2_HIT_CACHEOP_WAR 0 -#define MIPS_CACHE_SYNC_WAR 0 #define BCM1250_M3_WAR 0 #define SIBYTE_1956_WAR 0 #define MIPS4K_ICACHE_REFILL_WAR 0 -#define MIPS34K_MISSED_ITLB_WAR 0 -#define R5432_CP0_INTERRUPT_WAR 0 +#define MIPS_CACHE_SYNC_WAR 0 #define TX49XX_ICACHE_INDEX_INV_WAR 0 #define ICACHE_REFILLS_WORKAROUND_WAR 0 - #ifdef CONFIG_CPU_R10000 #define R10000_LLSC_WAR 1 #else #define R10000_LLSC_WAR 0 #endif +#define MIPS34K_MISSED_ITLB_WAR 0 #endif /* __ASM_MIPS_MACH_IP30_WAR_H */ diff --git a/arch/mips/include/asm/mach-jazz/floppy.h b/arch/mips/include/asm/mach-jazz/floppy.h index 4b86c88a03b7..095000c290e5 100644 --- a/arch/mips/include/asm/mach-jazz/floppy.h +++ b/arch/mips/include/asm/mach-jazz/floppy.h @@ -17,19 +17,19 @@ #include <asm/jazzdma.h> #include <asm/pgtable.h> -static inline unsigned char fd_inb(unsigned int port) +static inline unsigned char fd_inb(unsigned int base, unsigned int reg) { unsigned char c; - c = *(volatile unsigned char *) port; + c = *(volatile unsigned char *) (base + reg); udelay(1); return c; } -static inline void fd_outb(unsigned char value, unsigned int port) +static inline void fd_outb(unsigned char value, unsigned int base, unsigned int reg) { - *(volatile unsigned char *) port = value; + *(volatile unsigned char *) (base + reg) = value; } /* diff --git a/arch/mips/include/asm/mach-jz4740/base.h b/arch/mips/include/asm/mach-jz4740/base.h deleted file mode 100644 index 96b2d6674cdb..000000000000 --- a/arch/mips/include/asm/mach-jz4740/base.h +++ /dev/null @@ -1,27 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __ASM_MACH_JZ4740_BASE_H__ -#define __ASM_MACH_JZ4740_BASE_H__ - -#define JZ4740_CPM_BASE_ADDR 0x10000000 -#define JZ4740_INTC_BASE_ADDR 0x10001000 -#define JZ4740_WDT_BASE_ADDR 0x10002000 -#define JZ4740_TCU_BASE_ADDR 0x10002010 -#define JZ4740_RTC_BASE_ADDR 0x10003000 -#define JZ4740_GPIO_BASE_ADDR 0x10010000 -#define JZ4740_AIC_BASE_ADDR 0x10020000 -#define JZ4740_MSC_BASE_ADDR 0x10021000 -#define JZ4740_UART0_BASE_ADDR 0x10030000 -#define JZ4740_UART1_BASE_ADDR 0x10031000 -#define JZ4740_I2C_BASE_ADDR 0x10042000 -#define JZ4740_SSI_BASE_ADDR 0x10043000 -#define JZ4740_SADC_BASE_ADDR 0x10070000 -#define JZ4740_EMC_BASE_ADDR 0x13010000 -#define JZ4740_DMAC_BASE_ADDR 0x13020000 -#define JZ4740_UHC_BASE_ADDR 0x13030000 -#define JZ4740_UDC_BASE_ADDR 0x13040000 -#define JZ4740_LCD_BASE_ADDR 0x13050000 -#define JZ4740_SLCD_BASE_ADDR 0x13050000 -#define JZ4740_CIM_BASE_ADDR 0x13060000 -#define JZ4740_IPU_BASE_ADDR 0x13080000 - -#endif diff --git a/arch/mips/include/asm/mach-jz4740/dma.h b/arch/mips/include/asm/mach-jz4740/dma.h deleted file mode 100644 index e5d2a5311a3a..000000000000 --- a/arch/mips/include/asm/mach-jz4740/dma.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ7420/JZ4740 DMA definitions - */ - -#ifndef __ASM_MACH_JZ4740_DMA_H__ -#define __ASM_MACH_JZ4740_DMA_H__ - -enum jz4740_dma_request_type { - JZ4740_DMA_TYPE_AUTO_REQUEST = 8, - JZ4740_DMA_TYPE_UART_TRANSMIT = 20, - JZ4740_DMA_TYPE_UART_RECEIVE = 21, - JZ4740_DMA_TYPE_SPI_TRANSMIT = 22, - JZ4740_DMA_TYPE_SPI_RECEIVE = 23, - JZ4740_DMA_TYPE_MMC_TRANSMIT = 26, - JZ4740_DMA_TYPE_MMC_RECEIVE = 27, - JZ4740_DMA_TYPE_TCU = 28, - JZ4740_DMA_TYPE_SADC = 29, - JZ4740_DMA_TYPE_SLCD = 30, -}; - -#endif /* __ASM_JZ4740_DMA_H__ */ diff --git a/arch/mips/include/asm/mach-jz4740/irq.h b/arch/mips/include/asm/mach-jz4740/irq.h index 09c38eac671a..27c543bd340f 100644 --- a/arch/mips/include/asm/mach-jz4740/irq.h +++ b/arch/mips/include/asm/mach-jz4740/irq.h @@ -8,49 +8,6 @@ #define __ASM_MACH_JZ4740_IRQ_H__ #define MIPS_CPU_IRQ_BASE 0 -#define JZ4740_IRQ_BASE 8 - -#ifdef CONFIG_MACH_JZ4740 -# define NR_INTC_IRQS 32 -#else -# define NR_INTC_IRQS 64 -#endif - -/* 1st-level interrupts */ -#define JZ4740_IRQ(x) (JZ4740_IRQ_BASE + (x)) -#define JZ4740_IRQ_I2C JZ4740_IRQ(1) -#define JZ4740_IRQ_UHC JZ4740_IRQ(3) -#define JZ4740_IRQ_UART1 JZ4740_IRQ(8) -#define JZ4740_IRQ_UART0 JZ4740_IRQ(9) -#define JZ4740_IRQ_SADC JZ4740_IRQ(12) -#define JZ4740_IRQ_MSC JZ4740_IRQ(14) -#define JZ4740_IRQ_RTC JZ4740_IRQ(15) -#define JZ4740_IRQ_SSI JZ4740_IRQ(16) -#define JZ4740_IRQ_CIM JZ4740_IRQ(17) -#define JZ4740_IRQ_AIC JZ4740_IRQ(18) -#define JZ4740_IRQ_ETH JZ4740_IRQ(19) -#define JZ4740_IRQ_DMAC JZ4740_IRQ(20) -#define JZ4740_IRQ_TCU2 JZ4740_IRQ(21) -#define JZ4740_IRQ_TCU1 JZ4740_IRQ(22) -#define JZ4740_IRQ_TCU0 JZ4740_IRQ(23) -#define JZ4740_IRQ_UDC JZ4740_IRQ(24) -#define JZ4740_IRQ_GPIO3 JZ4740_IRQ(25) -#define JZ4740_IRQ_GPIO2 JZ4740_IRQ(26) -#define JZ4740_IRQ_GPIO1 JZ4740_IRQ(27) -#define JZ4740_IRQ_GPIO0 JZ4740_IRQ(28) -#define JZ4740_IRQ_IPU JZ4740_IRQ(29) -#define JZ4740_IRQ_LCD JZ4740_IRQ(30) - -#define JZ4780_IRQ_TCU2 JZ4740_IRQ(25) - -/* 2nd-level interrupts */ -#define JZ4740_IRQ_DMA(x) (JZ4740_IRQ(NR_INTC_IRQS) + (x)) - -#define JZ4740_IRQ_INTC_GPIO(x) (JZ4740_IRQ_GPIO0 - (x)) -#define JZ4740_IRQ_GPIO(x) (JZ4740_IRQ(NR_INTC_IRQS + 16) + (x)) - -#define JZ4740_IRQ_ADC_BASE JZ4740_IRQ(NR_INTC_IRQS + 144) - -#define NR_IRQS (JZ4740_IRQ_ADC_BASE + 6) +#define NR_IRQS 256 #endif diff --git a/arch/mips/include/asm/mach-jz4740/timer.h b/arch/mips/include/asm/mach-jz4740/timer.h deleted file mode 100644 index 8a19cfe5bed7..000000000000 --- a/arch/mips/include/asm/mach-jz4740/timer.h +++ /dev/null @@ -1,126 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 platform timer support - */ - -#ifndef __ASM_MACH_JZ4740_TIMER -#define __ASM_MACH_JZ4740_TIMER - -#define JZ_REG_TIMER_STOP 0x0C -#define JZ_REG_TIMER_STOP_SET 0x1C -#define JZ_REG_TIMER_STOP_CLEAR 0x2C -#define JZ_REG_TIMER_ENABLE 0x00 -#define JZ_REG_TIMER_ENABLE_SET 0x04 -#define JZ_REG_TIMER_ENABLE_CLEAR 0x08 -#define JZ_REG_TIMER_FLAG 0x10 -#define JZ_REG_TIMER_FLAG_SET 0x14 -#define JZ_REG_TIMER_FLAG_CLEAR 0x18 -#define JZ_REG_TIMER_MASK 0x20 -#define JZ_REG_TIMER_MASK_SET 0x24 -#define JZ_REG_TIMER_MASK_CLEAR 0x28 - -#define JZ_REG_TIMER_DFR(x) (((x) * 0x10) + 0x30) -#define JZ_REG_TIMER_DHR(x) (((x) * 0x10) + 0x34) -#define JZ_REG_TIMER_CNT(x) (((x) * 0x10) + 0x38) -#define JZ_REG_TIMER_CTRL(x) (((x) * 0x10) + 0x3C) - -#define JZ_TIMER_IRQ_HALF(x) BIT((x) + 0x10) -#define JZ_TIMER_IRQ_FULL(x) BIT(x) - -#define JZ_TIMER_CTRL_PWM_ABBRUPT_SHUTDOWN BIT(9) -#define JZ_TIMER_CTRL_PWM_ACTIVE_LOW BIT(8) -#define JZ_TIMER_CTRL_PWM_ENABLE BIT(7) -#define JZ_TIMER_CTRL_PRESCALE_MASK 0x1c -#define JZ_TIMER_CTRL_PRESCALE_OFFSET 0x3 -#define JZ_TIMER_CTRL_PRESCALE_1 (0 << 3) -#define JZ_TIMER_CTRL_PRESCALE_4 (1 << 3) -#define JZ_TIMER_CTRL_PRESCALE_16 (2 << 3) -#define JZ_TIMER_CTRL_PRESCALE_64 (3 << 3) -#define JZ_TIMER_CTRL_PRESCALE_256 (4 << 3) -#define JZ_TIMER_CTRL_PRESCALE_1024 (5 << 3) - -#define JZ_TIMER_CTRL_PRESCALER(x) ((x) << JZ_TIMER_CTRL_PRESCALE_OFFSET) - -#define JZ_TIMER_CTRL_SRC_EXT BIT(2) -#define JZ_TIMER_CTRL_SRC_RTC BIT(1) -#define JZ_TIMER_CTRL_SRC_PCLK BIT(0) - -extern void __iomem *jz4740_timer_base; -void __init jz4740_timer_init(void); - -void jz4740_timer_enable_watchdog(void); -void jz4740_timer_disable_watchdog(void); - -static inline void jz4740_timer_stop(unsigned int timer) -{ - writel(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_STOP_SET); -} - -static inline void jz4740_timer_start(unsigned int timer) -{ - writel(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR); -} - -static inline bool jz4740_timer_is_enabled(unsigned int timer) -{ - return readb(jz4740_timer_base + JZ_REG_TIMER_ENABLE) & BIT(timer); -} - -static inline void jz4740_timer_enable(unsigned int timer) -{ - writeb(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_ENABLE_SET); -} - -static inline void jz4740_timer_disable(unsigned int timer) -{ - writeb(BIT(timer), jz4740_timer_base + JZ_REG_TIMER_ENABLE_CLEAR); -} - -static inline void jz4740_timer_set_period(unsigned int timer, uint16_t period) -{ - writew(period, jz4740_timer_base + JZ_REG_TIMER_DFR(timer)); -} - -static inline void jz4740_timer_set_duty(unsigned int timer, uint16_t duty) -{ - writew(duty, jz4740_timer_base + JZ_REG_TIMER_DHR(timer)); -} - -static inline void jz4740_timer_set_count(unsigned int timer, uint16_t count) -{ - writew(count, jz4740_timer_base + JZ_REG_TIMER_CNT(timer)); -} - -static inline uint16_t jz4740_timer_get_count(unsigned int timer) -{ - return readw(jz4740_timer_base + JZ_REG_TIMER_CNT(timer)); -} - -static inline void jz4740_timer_ack_full(unsigned int timer) -{ - writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_FLAG_CLEAR); -} - -static inline void jz4740_timer_irq_full_enable(unsigned int timer) -{ - writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_FLAG_CLEAR); - writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_MASK_CLEAR); -} - -static inline void jz4740_timer_irq_full_disable(unsigned int timer) -{ - writel(JZ_TIMER_IRQ_FULL(timer), jz4740_timer_base + JZ_REG_TIMER_MASK_SET); -} - -static inline void jz4740_timer_set_ctrl(unsigned int timer, uint16_t ctrl) -{ - writew(ctrl, jz4740_timer_base + JZ_REG_TIMER_CTRL(timer)); -} - -static inline uint16_t jz4740_timer_get_ctrl(unsigned int timer) -{ - return readw(jz4740_timer_base + JZ_REG_TIMER_CTRL(timer)); -} - -#endif diff --git a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h index f03c1c42dd90..10226976f7b7 100644 --- a/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-lantiq/falcon/cpu-feature-overrides.h @@ -46,7 +46,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-lasat/irq.h b/arch/mips/include/asm/mach-lasat/irq.h deleted file mode 100644 index e8994921779e..000000000000 --- a/arch/mips/include/asm/mach-lasat/irq.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_MACH_LASAT_IRQ_H -#define _ASM_MACH_LASAT_IRQ_H - -#define LASAT_CASCADE_IRQ (MIPS_CPU_IRQ_BASE + 2) - -#define LASAT_IRQ_BASE 8 -#define LASAT_IRQ_END 23 - -#define NR_IRQS 24 - -#include <asm/mach-generic/irq.h> - -#endif /* _ASM_MACH_LASAT_IRQ_H */ diff --git a/arch/mips/include/asm/mach-lasat/mach-gt64120.h b/arch/mips/include/asm/mach-lasat/mach-gt64120.h deleted file mode 100644 index 6666a8871a23..000000000000 --- a/arch/mips/include/asm/mach-lasat/mach-gt64120.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * This is a direct copy of the ev96100.h file, with a global - * search and replace. The numbers are the same. - * - * The reason I'm duplicating this is so that the 64120/96100 - * defines won't be confusing in the source code. - */ -#ifndef _ASM_GT64120_LASAT_GT64120_DEP_H -#define _ASM_GT64120_LASAT_GT64120_DEP_H - -/* - * GT64120 config space base address on Lasat 100 - */ -#define GT64120_BASE (KSEG1ADDR(0x14000000)) - -/* - * PCI Bus allocation - * - * (Guessing ...) - */ -#define GT_PCI_MEM_BASE 0x12000000UL -#define GT_PCI_MEM_SIZE 0x02000000UL -#define GT_PCI_IO_BASE 0x10000000UL -#define GT_PCI_IO_SIZE 0x02000000UL -#define GT_ISA_IO_BASE PCI_IO_BASE - -#endif /* _ASM_GT64120_LASAT_GT64120_DEP_H */ diff --git a/arch/mips/include/asm/mach-loongson2ef/loongson.h b/arch/mips/include/asm/mach-loongson2ef/loongson.h index 5008af0a1a19..57e571128489 100644 --- a/arch/mips/include/asm/mach-loongson2ef/loongson.h +++ b/arch/mips/include/asm/mach-loongson2ef/loongson.h @@ -244,6 +244,7 @@ static inline void do_perfcnt_IRQ(void) #ifdef CONFIG_CPU_SUPPORTS_CPUFREQ #include <linux/cpufreq.h> extern struct cpufreq_frequency_table loongson2_clockmod_table[]; +extern int loongson2_cpu_set_rate(unsigned long rate_khz); #endif /* diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index 2ed483e32d8c..b35be709f9da 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -192,6 +192,11 @@ struct boot_params { struct efi_reset_system_t reset_system; }; +enum loongson_bridge_type { + LS7A = 1, + RS780E = 2 +}; + struct loongson_system_configuration { u32 nr_cpus; u32 nr_nodes; @@ -200,6 +205,7 @@ struct loongson_system_configuration { u16 boot_cpu_id; u16 reserved_cpus_mask; enum loongson_cpu_type cputype; + enum loongson_bridge_type bridgetype; u64 ht_control_base; u64 pci_mem_start_addr; u64 pci_mem_end_addr; @@ -215,9 +221,14 @@ struct loongson_system_configuration { u32 nr_sensors; struct sensor_device sensors[MAX_SENSORS]; u64 workarounds; + void (*early_config)(void); }; extern struct efi_memory_map_loongson *loongson_memmap; extern struct loongson_system_configuration loongson_sysconf; +extern u32 node_id_offset; +extern void ls7a_early_config(void); +extern void rs780e_early_config(void); + #endif diff --git a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h index 4fab38c743dd..b6e9c99b85a5 100644 --- a/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-loongson64/cpu-feature-overrides.h @@ -48,5 +48,6 @@ #define cpu_hwrena_impl_bits 0xc0000000 #define cpu_has_mac2008_only 1 #define cpu_has_mips_r2_exec_hazard 0 +#define cpu_has_perf_cntr_intr_bit 0 #endif /* __ASM_MACH_LOONGSON64_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-loongson64/cpucfg-emul.h b/arch/mips/include/asm/mach-loongson64/cpucfg-emul.h new file mode 100644 index 000000000000..d64af19c210d --- /dev/null +++ b/arch/mips/include/asm/mach-loongson64/cpucfg-emul.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_MACH_LOONGSON64_CPUCFG_EMUL_H_ +#define _ASM_MACH_LOONGSON64_CPUCFG_EMUL_H_ + +#include <asm/cpu-info.h> + +#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION + +#include <loongson_regs.h> + +#define LOONGSON_FPREV_MASK 0x7 + +void loongson3_cpucfg_synthesize_data(struct cpuinfo_mips *c); + +static inline bool loongson3_cpucfg_emulation_enabled(struct cpuinfo_mips *c) +{ + /* All supported cores have non-zero LOONGSON_CFG1 data. */ + return c->loongson3_cpucfg_data[0] != 0; +} + +static inline u32 loongson3_cpucfg_read_synthesized(struct cpuinfo_mips *c, + __u64 sel) +{ + switch (sel) { + case LOONGSON_CFG0: + return c->processor_id; + case LOONGSON_CFG1: + case LOONGSON_CFG2: + case LOONGSON_CFG3: + return c->loongson3_cpucfg_data[sel - 1]; + case LOONGSON_CFG4: + case LOONGSON_CFG5: + /* CPUCFG selects 4 and 5 are related to the input clock + * signal. + * + * Unimplemented for now. + */ + return 0; + case LOONGSON_CFG6: + /* CPUCFG select 6 is for the undocumented Safe Extension. */ + return 0; + case LOONGSON_CFG7: + /* CPUCFG select 7 is for the virtualization extension. + * We don't know if the two currently known features are + * supported on older cores according to the public + * documentation, so leave this at zero. + */ + return 0; + } + + /* + * Return 0 for unrecognized CPUCFG selects, which is real hardware + * behavior observed on Loongson 3A R4. + */ + return 0; +} +#else +static inline void loongson3_cpucfg_synthesize_data(struct cpuinfo_mips *c) +{ +} + +static inline bool loongson3_cpucfg_emulation_enabled(struct cpuinfo_mips *c) +{ + return false; +} + +static inline u32 loongson3_cpucfg_read_synthesized(struct cpuinfo_mips *c, + __u64 sel) +{ + return 0; +} +#endif + +#endif /* _ASM_MACH_LOONGSON64_CPUCFG_EMUL_H_ */ diff --git a/arch/mips/include/asm/mach-loongson64/loongson_regs.h b/arch/mips/include/asm/mach-loongson64/loongson_regs.h index 363a47a5d26e..83dbb9fdf9c2 100644 --- a/arch/mips/include/asm/mach-loongson64/loongson_regs.h +++ b/arch/mips/include/asm/mach-loongson64/loongson_regs.h @@ -67,6 +67,8 @@ static inline u32 read_cpucfg(u32 reg) #define LOONGSON_CFG1_SFBP BIT(29) #define LOONGSON_CFG1_CDMAP BIT(30) +#define LOONGSON_CFG1_FPREV_OFFSET 1 + #define LOONGSON_CFG2 0x2 #define LOONGSON_CFG2_LEXT1 BIT(0) #define LOONGSON_CFG2_LEXT2 BIT(1) @@ -77,12 +79,12 @@ static inline u32 read_cpucfg(u32 reg) #define LOONGSON_CFG2_LBT3 BIT(6) #define LOONGSON_CFG2_LBTMMU BIT(7) #define LOONGSON_CFG2_LPMP BIT(8) -#define LOONGSON_CFG2_LPMPREV GENMASK(11, 9) +#define LOONGSON_CFG2_LPMREV GENMASK(11, 9) #define LOONGSON_CFG2_LAMO BIT(12) #define LOONGSON_CFG2_LPIXU BIT(13) -#define LOONGSON_CFG2_LPIXUN BIT(14) -#define LOONGSON_CFG2_LZVP BIT(15) -#define LOONGSON_CFG2_LZVREV GENMASK(18, 16) +#define LOONGSON_CFG2_LPIXNU BIT(14) +#define LOONGSON_CFG2_LVZP BIT(15) +#define LOONGSON_CFG2_LVZREV GENMASK(18, 16) #define LOONGSON_CFG2_LGFTP BIT(19) #define LOONGSON_CFG2_LGFTPREV GENMASK(22, 20) #define LOONGSON_CFG2_LLFTP BIT(23) @@ -90,6 +92,13 @@ static inline u32 read_cpucfg(u32 reg) #define LOONGSON_CFG2_LCSRP BIT(27) #define LOONGSON_CFG2_LDISBLIKELY BIT(28) +#define LOONGSON_CFG2_LPMREV_OFFSET 9 +#define LOONGSON_CFG2_LPM_REV1 (1 << LOONGSON_CFG2_LPMREV_OFFSET) +#define LOONGSON_CFG2_LPM_REV2 (2 << LOONGSON_CFG2_LPMREV_OFFSET) +#define LOONGSON_CFG2_LVZREV_OFFSET 16 +#define LOONGSON_CFG2_LVZ_REV1 (1 << LOONGSON_CFG2_LVZREV_OFFSET) +#define LOONGSON_CFG2_LVZ_REV2 (2 << LOONGSON_CFG2_LVZREV_OFFSET) + #define LOONGSON_CFG3 0x3 #define LOONGSON_CFG3_LCAMP BIT(0) #define LOONGSON_CFG3_LCAMREV GENMASK(3, 1) @@ -97,6 +106,16 @@ static inline u32 read_cpucfg(u32 reg) #define LOONGSON_CFG3_LCAMKW GENMASK(19, 12) #define LOONGSON_CFG3_LCAMVW GENMASK(27, 20) +#define LOONGSON_CFG3_LCAMREV_OFFSET 1 +#define LOONGSON_CFG3_LCAM_REV1 (1 << LOONGSON_CFG3_LCAMREV_OFFSET) +#define LOONGSON_CFG3_LCAM_REV2 (2 << LOONGSON_CFG3_LCAMREV_OFFSET) +#define LOONGSON_CFG3_LCAMNUM_OFFSET 4 +#define LOONGSON_CFG3_LCAMNUM_REV1 (0x3f << LOONGSON_CFG3_LCAMNUM_OFFSET) +#define LOONGSON_CFG3_LCAMKW_OFFSET 12 +#define LOONGSON_CFG3_LCAMKW_REV1 (0x27 << LOONGSON_CFG3_LCAMKW_OFFSET) +#define LOONGSON_CFG3_LCAMVW_OFFSET 20 +#define LOONGSON_CFG3_LCAMVW_REV1 (0x3f << LOONGSON_CFG3_LCAMVW_OFFSET) + #define LOONGSON_CFG4 0x4 #define LOONGSON_CFG4_CCFREQ GENMASK(31, 0) @@ -139,7 +158,7 @@ static inline u64 csr_readq(u32 reg) { u64 __res; - /* DWRCSR reg, val */ + /* DRDCSR reg, val */ __asm__ __volatile__( "parse_r __res,%0\n\t" "parse_r reg,%1\n\t" diff --git a/arch/mips/include/asm/mach-loongson64/mc146818rtc.h b/arch/mips/include/asm/mach-loongson64/mc146818rtc.h deleted file mode 100644 index ebdccfee50be..000000000000 --- a/arch/mips/include/asm/mach-loongson64/mc146818rtc.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998, 2001, 03, 07 by Ralf Baechle (ralf@linux-mips.org) - * - * RTC routines for PC style attached Dallas chip. - */ -#ifndef __ASM_MACH_LOONGSON64_MC146818RTC_H -#define __ASM_MACH_LOONGSON64_MC146818RTC_H - -#include <linux/io.h> - -#define RTC_PORT(x) (0x70 + (x)) -#define RTC_IRQ 8 - -static inline unsigned char CMOS_READ(unsigned long addr) -{ - outb_p(addr, RTC_PORT(0)); - return inb_p(RTC_PORT(1)); -} - -static inline void CMOS_WRITE(unsigned char data, unsigned long addr) -{ - outb_p(addr, RTC_PORT(0)); - outb_p(data, RTC_PORT(1)); -} - -#define RTC_ALWAYS_BCD 0 - -#ifndef mc146818_decode_year -#define mc146818_decode_year(year) ((year) < 70 ? (year) + 2000 : (year) + 1970) -#endif - -#endif /* __ASM_MACH_LOONGSON64_MC146818RTC_H */ diff --git a/arch/mips/include/asm/mach-loongson64/spaces.h b/arch/mips/include/asm/mach-loongson64/spaces.h index e85bc1d9c4f2..3de0ac9d8829 100644 --- a/arch/mips/include/asm/mach-loongson64/spaces.h +++ b/arch/mips/include/asm/mach-loongson64/spaces.h @@ -6,5 +6,13 @@ #define CAC_BASE _AC(0x9800000000000000, UL) #endif /* CONFIG_64BIT */ +/* Skip 128k to trap NULL pointer dereferences */ +#define PCI_IOBASE _AC(0xc000000000000000 + SZ_128K, UL) +#define PCI_IOSIZE SZ_16M +#define MAP_BASE (PCI_IOBASE + PCI_IOSIZE) + +/* Reserved at the start of PCI_IOBASE for legacy drivers */ +#define MMIO_LOWER_RESERVED 0x10000 + #include <asm/mach-generic/spaces.h> #endif diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/cpu-feature-overrides.h b/arch/mips/include/asm/mach-pmcs-msp71xx/cpu-feature-overrides.h deleted file mode 100644 index 016fa9446ba9..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/cpu-feature-overrides.h +++ /dev/null @@ -1,22 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2003, 04, 07 Ralf Baechle (ralf@linux-mips.org) - */ -#ifndef __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H -#define __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H - -#define cpu_has_mips16 1 -#define cpu_has_dsp 1 -/* #define cpu_has_dsp2 ??? - do runtime detection */ -#define cpu_has_mipsmt 1 -#define cpu_has_fpu 0 - -#define cpu_has_mips32r1 0 -#define cpu_has_mips32r2 1 -#define cpu_has_mips64r1 0 -#define cpu_has_mips64r2 0 - -#endif /* __ASM_MACH_MSP71XX_CPU_FEATURE_OVERRIDES_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_cic_int.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_cic_int.h deleted file mode 100644 index 50de6876e1c9..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_cic_int.h +++ /dev/null @@ -1,139 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Defines for the MSP interrupt controller. - * - * Copyright (C) 1999 MIPS Technologies, Inc. All rights reserved. - * Author: Carsten Langgaard, carstenl@mips.com - * - * ######################################################################## - * - * ######################################################################## - */ - -#ifndef _MSP_CIC_INT_H -#define _MSP_CIC_INT_H - -/* - * The PMC-Sierra CIC interrupts are all centrally managed by the - * CIC sub-system. - * We attempt to keep the interrupt numbers as consistent as possible - * across all of the MSP devices, but some differences will creep in ... - * The interrupts which are directly forwarded to the MIPS core interrupts - * are assigned interrupts in the range 0-7, interrupts cascaded through - * the CIC are assigned interrupts 8-39. The cascade occurs on C_IRQ4 - * (MSP_INT_CIC). Currently we don't really distinguish between VPE1 - * and VPE0 (or thread contexts for that matter). Will have to fix. - * The PER interrupts are assigned interrupts in the range 40-71. -*/ - - -/* - * IRQs directly forwarded to the CPU - */ -#define MSP_MIPS_INTBASE 0 -#define MSP_INT_SW0 0 /* IRQ for swint0, C_SW0 */ -#define MSP_INT_SW1 1 /* IRQ for swint1, C_SW1 */ -#define MSP_INT_MAC0 2 /* IRQ for MAC 0, C_IRQ0 */ -#define MSP_INT_MAC1 3 /* IRQ for MAC 1, C_IRQ1 */ -#define MSP_INT_USB 4 /* IRQ for USB, C_IRQ2 */ -#define MSP_INT_SAR 5 /* IRQ for ADSL2+ SAR, C_IRQ3 */ -#define MSP_INT_CIC 6 /* IRQ for CIC block, C_IRQ4 */ -#define MSP_INT_SEC 7 /* IRQ for Sec engine, C_IRQ5 */ - -/* - * IRQs cascaded on CPU interrupt 4 (CAUSE bit 12, C_IRQ4) - * These defines should be tied to the register definitions for the CIC - * interrupt routine. For now, just use hard-coded values. - */ -#define MSP_CIC_INTBASE (MSP_MIPS_INTBASE + 8) -#define MSP_INT_EXT0 (MSP_CIC_INTBASE + 0) - /* External interrupt 0 */ -#define MSP_INT_EXT1 (MSP_CIC_INTBASE + 1) - /* External interrupt 1 */ -#define MSP_INT_EXT2 (MSP_CIC_INTBASE + 2) - /* External interrupt 2 */ -#define MSP_INT_EXT3 (MSP_CIC_INTBASE + 3) - /* External interrupt 3 */ -#define MSP_INT_CPUIF (MSP_CIC_INTBASE + 4) - /* CPU interface interrupt */ -#define MSP_INT_EXT4 (MSP_CIC_INTBASE + 5) - /* External interrupt 4 */ -#define MSP_INT_CIC_USB (MSP_CIC_INTBASE + 6) - /* Cascaded IRQ for USB */ -#define MSP_INT_MBOX (MSP_CIC_INTBASE + 7) - /* Sec engine mailbox IRQ */ -#define MSP_INT_EXT5 (MSP_CIC_INTBASE + 8) - /* External interrupt 5 */ -#define MSP_INT_TDM (MSP_CIC_INTBASE + 9) - /* TDM interrupt */ -#define MSP_INT_CIC_MAC0 (MSP_CIC_INTBASE + 10) - /* Cascaded IRQ for MAC 0 */ -#define MSP_INT_CIC_MAC1 (MSP_CIC_INTBASE + 11) - /* Cascaded IRQ for MAC 1 */ -#define MSP_INT_CIC_SEC (MSP_CIC_INTBASE + 12) - /* Cascaded IRQ for sec engine */ -#define MSP_INT_PER (MSP_CIC_INTBASE + 13) - /* Peripheral interrupt */ -#define MSP_INT_TIMER0 (MSP_CIC_INTBASE + 14) - /* SLP timer 0 */ -#define MSP_INT_TIMER1 (MSP_CIC_INTBASE + 15) - /* SLP timer 1 */ -#define MSP_INT_TIMER2 (MSP_CIC_INTBASE + 16) - /* SLP timer 2 */ -#define MSP_INT_VPE0_TIMER (MSP_CIC_INTBASE + 17) - /* VPE0 MIPS timer */ -#define MSP_INT_BLKCP (MSP_CIC_INTBASE + 18) - /* Block Copy */ -#define MSP_INT_UART0 (MSP_CIC_INTBASE + 19) - /* UART 0 */ -#define MSP_INT_PCI (MSP_CIC_INTBASE + 20) - /* PCI subsystem */ -#define MSP_INT_EXT6 (MSP_CIC_INTBASE + 21) - /* External interrupt 5 */ -#define MSP_INT_PCI_MSI (MSP_CIC_INTBASE + 22) - /* PCI Message Signal */ -#define MSP_INT_CIC_SAR (MSP_CIC_INTBASE + 23) - /* Cascaded ADSL2+ SAR IRQ */ -#define MSP_INT_DSL (MSP_CIC_INTBASE + 24) - /* ADSL2+ IRQ */ -#define MSP_INT_CIC_ERR (MSP_CIC_INTBASE + 25) - /* SLP error condition */ -#define MSP_INT_VPE1_TIMER (MSP_CIC_INTBASE + 26) - /* VPE1 MIPS timer */ -#define MSP_INT_VPE0_PC (MSP_CIC_INTBASE + 27) - /* VPE0 Performance counter */ -#define MSP_INT_VPE1_PC (MSP_CIC_INTBASE + 28) - /* VPE1 Performance counter */ -#define MSP_INT_EXT7 (MSP_CIC_INTBASE + 29) - /* External interrupt 5 */ -#define MSP_INT_VPE0_SW (MSP_CIC_INTBASE + 30) - /* VPE0 Software interrupt */ -#define MSP_INT_VPE1_SW (MSP_CIC_INTBASE + 31) - /* VPE0 Software interrupt */ - -/* - * IRQs cascaded on CIC PER interrupt (MSP_INT_PER) - */ -#define MSP_PER_INTBASE (MSP_CIC_INTBASE + 32) -/* Reserved 0-1 */ -#define MSP_INT_UART1 (MSP_PER_INTBASE + 2) - /* UART 1 */ -/* Reserved 3-5 */ -#define MSP_INT_2WIRE (MSP_PER_INTBASE + 6) - /* 2-wire */ -#define MSP_INT_TM0 (MSP_PER_INTBASE + 7) - /* Peripheral timer block out 0 */ -#define MSP_INT_TM1 (MSP_PER_INTBASE + 8) - /* Peripheral timer block out 1 */ -/* Reserved 9 */ -#define MSP_INT_SPRX (MSP_PER_INTBASE + 10) - /* SPI RX complete */ -#define MSP_INT_SPTX (MSP_PER_INTBASE + 11) - /* SPI TX complete */ -#define MSP_INT_GPIO (MSP_PER_INTBASE + 12) - /* GPIO */ -#define MSP_INT_PER_ERR (MSP_PER_INTBASE + 13) - /* Peripheral error */ -/* Reserved 14-31 */ - -#endif /* !_MSP_CIC_INT_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_gpio_macros.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_gpio_macros.h deleted file mode 100644 index daacebb047c2..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_gpio_macros.h +++ /dev/null @@ -1,343 +0,0 @@ -/* - * - * Macros for external SMP-safe access to the PMC MSP71xx reference - * board GPIO pins - * - * Copyright 2010 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __MSP_GPIO_MACROS_H__ -#define __MSP_GPIO_MACROS_H__ - -#include <msp_regops.h> -#include <msp_regs.h> - -#ifdef CONFIG_PMC_MSP7120_GW -#define MSP_NUM_GPIOS 20 -#else -#define MSP_NUM_GPIOS 28 -#endif - -/* -- GPIO Enumerations -- */ -enum msp_gpio_data { - MSP_GPIO_LO = 0, - MSP_GPIO_HI = 1, - MSP_GPIO_NONE, /* Special - Means pin is out of range */ - MSP_GPIO_TOGGLE, /* Special - Sets pin to opposite */ -}; - -enum msp_gpio_mode { - MSP_GPIO_INPUT = 0x0, - /* MSP_GPIO_ INTERRUPT = 0x1, Not supported yet */ - MSP_GPIO_UART_INPUT = 0x2, /* Only GPIO 4 or 5 */ - MSP_GPIO_OUTPUT = 0x8, - MSP_GPIO_UART_OUTPUT = 0x9, /* Only GPIO 2 or 3 */ - MSP_GPIO_PERIF_TIMERA = 0x9, /* Only GPIO 0 or 1 */ - MSP_GPIO_PERIF_TIMERB = 0xa, /* Only GPIO 0 or 1 */ - MSP_GPIO_UNKNOWN = 0xb, /* No such GPIO or mode */ -}; - -/* -- Static Tables -- */ - -/* Maps pins to data register */ -static volatile u32 * const MSP_GPIO_DATA_REGISTER[] = { - /* GPIO 0 and 1 on the first register */ - GPIO_DATA1_REG, GPIO_DATA1_REG, - /* GPIO 2, 3, 4, and 5 on the second register */ - GPIO_DATA2_REG, GPIO_DATA2_REG, GPIO_DATA2_REG, GPIO_DATA2_REG, - /* GPIO 6, 7, 8, and 9 on the third register */ - GPIO_DATA3_REG, GPIO_DATA3_REG, GPIO_DATA3_REG, GPIO_DATA3_REG, - /* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */ - GPIO_DATA4_REG, GPIO_DATA4_REG, GPIO_DATA4_REG, GPIO_DATA4_REG, - GPIO_DATA4_REG, GPIO_DATA4_REG, - /* GPIO 16 - 23 on the first strange EXTENDED register */ - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - /* GPIO 24 - 27 on the second strange EXTENDED register */ - EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, - EXTENDED_GPIO2_REG, -}; - -/* Maps pins to mode register */ -static volatile u32 * const MSP_GPIO_MODE_REGISTER[] = { - /* GPIO 0 and 1 on the first register */ - GPIO_CFG1_REG, GPIO_CFG1_REG, - /* GPIO 2, 3, 4, and 5 on the second register */ - GPIO_CFG2_REG, GPIO_CFG2_REG, GPIO_CFG2_REG, GPIO_CFG2_REG, - /* GPIO 6, 7, 8, and 9 on the third register */ - GPIO_CFG3_REG, GPIO_CFG3_REG, GPIO_CFG3_REG, GPIO_CFG3_REG, - /* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */ - GPIO_CFG4_REG, GPIO_CFG4_REG, GPIO_CFG4_REG, GPIO_CFG4_REG, - GPIO_CFG4_REG, GPIO_CFG4_REG, - /* GPIO 16 - 23 on the first strange EXTENDED register */ - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - EXTENDED_GPIO1_REG, EXTENDED_GPIO1_REG, - /* GPIO 24 - 27 on the second strange EXTENDED register */ - EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, EXTENDED_GPIO2_REG, - EXTENDED_GPIO2_REG, -}; - -/* Maps 'basic' pins to relative offset from 0 per register */ -static int MSP_GPIO_OFFSET[] = { - /* GPIO 0 and 1 on the first register */ - 0, 0, - /* GPIO 2, 3, 4, and 5 on the second register */ - 2, 2, 2, 2, - /* GPIO 6, 7, 8, and 9 on the third register */ - 6, 6, 6, 6, - /* GPIO 10, 11, 12, 13, 14, and 15 on the fourth register */ - 10, 10, 10, 10, 10, 10, -}; - -/* Maps MODE to allowed pin mask */ -static unsigned int MSP_GPIO_MODE_ALLOWED[] = { - 0xffffffff, /* Mode 0 - INPUT */ - 0x00000, /* Mode 1 - INTERRUPT */ - 0x00030, /* Mode 2 - UART_INPUT (GPIO 4, 5)*/ - 0, 0, 0, 0, 0, /* Modes 3, 4, 5, 6, and 7 are reserved */ - 0xffffffff, /* Mode 8 - OUTPUT */ - 0x0000f, /* Mode 9 - UART_OUTPUT/ - PERF_TIMERA (GPIO 0, 1, 2, 3) */ - 0x00003, /* Mode a - PERF_TIMERB (GPIO 0, 1) */ - 0x00000, /* Mode b - Not really a mode! */ -}; - -/* -- Bit masks -- */ - -/* This gives you the 'register relative offset gpio' number */ -#define OFFSET_GPIO_NUMBER(gpio) (gpio - MSP_GPIO_OFFSET[gpio]) - -/* These take the 'register relative offset gpio' number */ -#define BASIC_DATA_REG_MASK(ogpio) (1 << ogpio) -#define BASIC_MODE_REG_VALUE(mode, ogpio) \ - (mode << BASIC_MODE_REG_SHIFT(ogpio)) -#define BASIC_MODE_REG_MASK(ogpio) \ - BASIC_MODE_REG_VALUE(0xf, ogpio) -#define BASIC_MODE_REG_SHIFT(ogpio) (ogpio * 4) -#define BASIC_MODE_REG_FROM_REG(data, ogpio) \ - ((data & BASIC_MODE_REG_MASK(ogpio)) >> BASIC_MODE_REG_SHIFT(ogpio)) - -/* These take the actual GPIO number (0 through 15) */ -#define BASIC_DATA_MASK(gpio) \ - BASIC_DATA_REG_MASK(OFFSET_GPIO_NUMBER(gpio)) -#define BASIC_MODE_MASK(gpio) \ - BASIC_MODE_REG_MASK(OFFSET_GPIO_NUMBER(gpio)) -#define BASIC_MODE(mode, gpio) \ - BASIC_MODE_REG_VALUE(mode, OFFSET_GPIO_NUMBER(gpio)) -#define BASIC_MODE_SHIFT(gpio) \ - BASIC_MODE_REG_SHIFT(OFFSET_GPIO_NUMBER(gpio)) -#define BASIC_MODE_FROM_REG(data, gpio) \ - BASIC_MODE_REG_FROM_REG(data, OFFSET_GPIO_NUMBER(gpio)) - -/* - * Each extended GPIO register is 32 bits long and is responsible for up to - * eight GPIOs. The least significant 16 bits contain the set and clear bit - * pair for each of the GPIOs. The most significant 16 bits contain the - * disable and enable bit pair for each of the GPIOs. For example, the - * extended GPIO reg for GPIOs 16-23 is as follows: - * - * 31: GPIO23_DISABLE - * ... - * 19: GPIO17_DISABLE - * 18: GPIO17_ENABLE - * 17: GPIO16_DISABLE - * 16: GPIO16_ENABLE - * ... - * 3: GPIO17_SET - * 2: GPIO17_CLEAR - * 1: GPIO16_SET - * 0: GPIO16_CLEAR - */ - -/* This gives the 'register relative offset gpio' number */ -#define EXTENDED_OFFSET_GPIO(gpio) (gpio < 24 ? gpio - 16 : gpio - 24) - -/* These take the 'register relative offset gpio' number */ -#define EXTENDED_REG_DISABLE(ogpio) (0x2 << ((ogpio * 2) + 16)) -#define EXTENDED_REG_ENABLE(ogpio) (0x1 << ((ogpio * 2) + 16)) -#define EXTENDED_REG_SET(ogpio) (0x2 << (ogpio * 2)) -#define EXTENDED_REG_CLR(ogpio) (0x1 << (ogpio * 2)) - -/* These take the actual GPIO number (16 through 27) */ -#define EXTENDED_DISABLE(gpio) \ - EXTENDED_REG_DISABLE(EXTENDED_OFFSET_GPIO(gpio)) -#define EXTENDED_ENABLE(gpio) \ - EXTENDED_REG_ENABLE(EXTENDED_OFFSET_GPIO(gpio)) -#define EXTENDED_SET(gpio) \ - EXTENDED_REG_SET(EXTENDED_OFFSET_GPIO(gpio)) -#define EXTENDED_CLR(gpio) \ - EXTENDED_REG_CLR(EXTENDED_OFFSET_GPIO(gpio)) - -#define EXTENDED_FULL_MASK (0xffffffff) - -/* -- API inline-functions -- */ - -/* - * Gets the current value of the specified pin - */ -static inline enum msp_gpio_data msp_gpio_pin_get(unsigned int gpio) -{ - u32 pinhi_mask = 0, pinhi_mask2 = 0; - - if (gpio >= MSP_NUM_GPIOS) - return MSP_GPIO_NONE; - - if (gpio < 16) { - pinhi_mask = BASIC_DATA_MASK(gpio); - } else { - /* - * Two cases are possible with the EXTENDED register: - * - In output mode (ENABLED flag set), check the CLR bit - * - In input mode (ENABLED flag not set), check the SET bit - */ - pinhi_mask = EXTENDED_ENABLE(gpio) | EXTENDED_CLR(gpio); - pinhi_mask2 = EXTENDED_SET(gpio); - } - if (((*MSP_GPIO_DATA_REGISTER[gpio] & pinhi_mask) == pinhi_mask) || - (*MSP_GPIO_DATA_REGISTER[gpio] & pinhi_mask2)) - return MSP_GPIO_HI; - else - return MSP_GPIO_LO; -} - -/* Sets the specified pin to the specified value */ -static inline void msp_gpio_pin_set(enum msp_gpio_data data, unsigned int gpio) -{ - if (gpio >= MSP_NUM_GPIOS) - return; - - if (gpio < 16) { - if (data == MSP_GPIO_TOGGLE) - toggle_reg32(MSP_GPIO_DATA_REGISTER[gpio], - BASIC_DATA_MASK(gpio)); - else if (data == MSP_GPIO_HI) - set_reg32(MSP_GPIO_DATA_REGISTER[gpio], - BASIC_DATA_MASK(gpio)); - else - clear_reg32(MSP_GPIO_DATA_REGISTER[gpio], - BASIC_DATA_MASK(gpio)); - } else { - if (data == MSP_GPIO_TOGGLE) { - /* Special ugly case: - * We have to read the CLR bit. - * If set, we write the CLR bit. - * If not, we write the SET bit. - */ - u32 tmpdata; - - custom_read_reg32(MSP_GPIO_DATA_REGISTER[gpio], - tmpdata); - if (tmpdata & EXTENDED_CLR(gpio)) - tmpdata = EXTENDED_CLR(gpio); - else - tmpdata = EXTENDED_SET(gpio); - custom_write_reg32(MSP_GPIO_DATA_REGISTER[gpio], - tmpdata); - } else { - u32 newdata; - - if (data == MSP_GPIO_HI) - newdata = EXTENDED_SET(gpio); - else - newdata = EXTENDED_CLR(gpio); - set_value_reg32(MSP_GPIO_DATA_REGISTER[gpio], - EXTENDED_FULL_MASK, newdata); - } - } -} - -/* Sets the specified pin to the specified value */ -static inline void msp_gpio_pin_hi(unsigned int gpio) -{ - msp_gpio_pin_set(MSP_GPIO_HI, gpio); -} - -/* Sets the specified pin to the specified value */ -static inline void msp_gpio_pin_lo(unsigned int gpio) -{ - msp_gpio_pin_set(MSP_GPIO_LO, gpio); -} - -/* Sets the specified pin to the opposite value */ -static inline void msp_gpio_pin_toggle(unsigned int gpio) -{ - msp_gpio_pin_set(MSP_GPIO_TOGGLE, gpio); -} - -/* Gets the mode of the specified pin */ -static inline enum msp_gpio_mode msp_gpio_pin_get_mode(unsigned int gpio) -{ - enum msp_gpio_mode retval = MSP_GPIO_UNKNOWN; - uint32_t data; - - if (gpio >= MSP_NUM_GPIOS) - return retval; - - data = *MSP_GPIO_MODE_REGISTER[gpio]; - - if (gpio < 16) { - retval = BASIC_MODE_FROM_REG(data, gpio); - } else { - /* Extended pins can only be either INPUT or OUTPUT */ - if (data & EXTENDED_ENABLE(gpio)) - retval = MSP_GPIO_OUTPUT; - else - retval = MSP_GPIO_INPUT; - } - - return retval; -} - -/* - * Sets the specified mode on the requested pin - * Returns 0 on success, or -1 if that mode is not allowed on this pin - */ -static inline int msp_gpio_pin_mode(enum msp_gpio_mode mode, unsigned int gpio) -{ - u32 modemask, newmode; - - if ((1 << gpio) & ~MSP_GPIO_MODE_ALLOWED[mode]) - return -1; - - if (gpio >= MSP_NUM_GPIOS) - return -1; - - if (gpio < 16) { - modemask = BASIC_MODE_MASK(gpio); - newmode = BASIC_MODE(mode, gpio); - } else { - modemask = EXTENDED_FULL_MASK; - if (mode == MSP_GPIO_INPUT) - newmode = EXTENDED_DISABLE(gpio); - else - newmode = EXTENDED_ENABLE(gpio); - } - /* Do the set atomically */ - set_value_reg32(MSP_GPIO_MODE_REGISTER[gpio], modemask, newmode); - - return 0; -} - -#endif /* __MSP_GPIO_MACROS_H__ */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_int.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_int.h deleted file mode 100644 index 55078b40f5b5..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_int.h +++ /dev/null @@ -1,31 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Defines for the MSP interrupt handlers. - * - * Copyright (C) 2005, PMC-Sierra, Inc. All rights reserved. - * Author: Andrew Hughes, Andrew_Hughes@pmc-sierra.com - * - * ######################################################################## - * - * ######################################################################## - */ - -#ifndef _MSP_INT_H -#define _MSP_INT_H - -/* - * The PMC-Sierra MSP product line has at least two different interrupt - * controllers, the SLP register based scheme and the CIC interrupt - * controller block mechanism. This file distinguishes between them - * so that devices see a uniform interface. - */ - -#if defined(CONFIG_IRQ_MSP_SLP) - #include "msp_slp_int.h" -#elif defined(CONFIG_IRQ_MSP_CIC) - #include "msp_cic_int.h" -#else - #error "What sort of interrupt controller does *your* MSP have?" -#endif - -#endif /* !_MSP_INT_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_pci.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_pci.h deleted file mode 100644 index 5b2535efceb2..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_pci.h +++ /dev/null @@ -1,189 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (c) 2000-2006 PMC-Sierra INC. - * - * PMC-SIERRA INC. DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS - * SOFTWARE. - */ - -#ifndef _MSP_PCI_H_ -#define _MSP_PCI_H_ - -#define MSP_HAS_PCI(ID) (((u32)(ID) <= 0x4236) && ((u32)(ID) >= 0x4220)) - -/* - * It is convenient to program the OATRAN register so that - * Athena virtual address space and PCI address space are - * the same. This is not a requirement, just a convenience. - * - * The only hard restrictions on the value of OATRAN is that - * OATRAN must not be programmed to allow translated memory - * addresses to fall within the lowest 512MB of - * PCI address space. This region is hardcoded - * for use as Athena PCI Host Controller target - * access memory space to the Athena's SDRAM. - * - * Note that OATRAN applies only to memory accesses, not - * to I/O accesses. - * - * To program OATRAN to make Athena virtual address space - * and PCI address space have the same values, OATRAN - * is to be programmed to 0xB8000000. The top seven - * bits of the value mimic the seven bits clipped off - * by the PCI Host controller. - * - * With OATRAN at the said value, when the CPU does - * an access to its virtual address at, say 0xB900_5000, - * the address appearing on the PCI bus will be - * 0xB900_5000. - * - Michael Penner - */ -#define MSP_PCI_OATRAN 0xB8000000UL - -#define MSP_PCI_SPACE_BASE (MSP_PCI_OATRAN + 0x1002000UL) -#define MSP_PCI_SPACE_SIZE (0x3000000UL - 0x2000) -#define MSP_PCI_SPACE_END \ - (MSP_PCI_SPACE_BASE + MSP_PCI_SPACE_SIZE - 1) -#define MSP_PCI_IOSPACE_BASE (MSP_PCI_OATRAN + 0x1001000UL) -#define MSP_PCI_IOSPACE_SIZE 0x1000 -#define MSP_PCI_IOSPACE_END \ - (MSP_PCI_IOSPACE_BASE + MSP_PCI_IOSPACE_SIZE - 1) - -/* IRQ for PCI status interrupts */ -#define PCI_STAT_IRQ 20 - -#define QFLUSH_REG_1 0xB7F40000 - -typedef volatile unsigned int pcireg; -typedef void * volatile ppcireg; - -struct pci_block_copy -{ - pcireg unused1; /* +0x00 */ - pcireg unused2; /* +0x04 */ - ppcireg unused3; /* +0x08 */ - ppcireg unused4; /* +0x0C */ - pcireg unused5; /* +0x10 */ - pcireg unused6; /* +0x14 */ - pcireg unused7; /* +0x18 */ - ppcireg unused8; /* +0x1C */ - ppcireg unused9; /* +0x20 */ - pcireg unusedA; /* +0x24 */ - ppcireg unusedB; /* +0x28 */ - ppcireg unusedC; /* +0x2C */ -}; - -enum -{ - config_device_vendor, /* 0 */ - config_status_command, /* 1 */ - config_class_revision, /* 2 */ - config_BIST_header_latency_cache, /* 3 */ - config_BAR0, /* 4 */ - config_BAR1, /* 5 */ - config_BAR2, /* 6 */ - config_not_used7, /* 7 */ - config_not_used8, /* 8 */ - config_not_used9, /* 9 */ - config_CIS, /* 10 */ - config_subsystem, /* 11 */ - config_not_used12, /* 12 */ - config_capabilities, /* 13 */ - config_not_used14, /* 14 */ - config_lat_grant_irq, /* 15 */ - config_message_control,/* 16 */ - config_message_addr, /* 17 */ - config_message_data, /* 18 */ - config_VPD_addr, /* 19 */ - config_VPD_data, /* 20 */ - config_maxregs /* 21 - number of registers */ -}; - -struct msp_pci_regs -{ - pcireg hop_unused_00; /* +0x00 */ - pcireg hop_unused_04; /* +0x04 */ - pcireg hop_unused_08; /* +0x08 */ - pcireg hop_unused_0C; /* +0x0C */ - pcireg hop_unused_10; /* +0x10 */ - pcireg hop_unused_14; /* +0x14 */ - pcireg hop_unused_18; /* +0x18 */ - pcireg hop_unused_1C; /* +0x1C */ - pcireg hop_unused_20; /* +0x20 */ - pcireg hop_unused_24; /* +0x24 */ - pcireg hop_unused_28; /* +0x28 */ - pcireg hop_unused_2C; /* +0x2C */ - pcireg hop_unused_30; /* +0x30 */ - pcireg hop_unused_34; /* +0x34 */ - pcireg if_control; /* +0x38 */ - pcireg oatran; /* +0x3C */ - pcireg reset_ctl; /* +0x40 */ - pcireg config_addr; /* +0x44 */ - pcireg hop_unused_48; /* +0x48 */ - pcireg msg_signaled_int_status; /* +0x4C */ - pcireg msg_signaled_int_mask; /* +0x50 */ - pcireg if_status; /* +0x54 */ - pcireg if_mask; /* +0x58 */ - pcireg hop_unused_5C; /* +0x5C */ - pcireg hop_unused_60; /* +0x60 */ - pcireg hop_unused_64; /* +0x64 */ - pcireg hop_unused_68; /* +0x68 */ - pcireg hop_unused_6C; /* +0x6C */ - pcireg hop_unused_70; /* +0x70 */ - - struct pci_block_copy pci_bc[2] __attribute__((aligned(64))); - - pcireg error_hdr1; /* +0xE0 */ - pcireg error_hdr2; /* +0xE4 */ - - pcireg config[config_maxregs] __attribute__((aligned(256))); - -}; - -#define BPCI_CFGADDR_BUSNUM_SHF 16 -#define BPCI_CFGADDR_FUNCTNUM_SHF 8 -#define BPCI_CFGADDR_REGNUM_SHF 2 -#define BPCI_CFGADDR_ENABLE (1<<31) - -#define BPCI_IFCONTROL_RTO (1<<20) /* Retry timeout */ -#define BPCI_IFCONTROL_HCE (1<<16) /* Host configuration enable */ -#define BPCI_IFCONTROL_CTO_SHF 12 /* Shift count for CTO bits */ -#define BPCI_IFCONTROL_SE (1<<5) /* Enable exceptions on errors */ -#define BPCI_IFCONTROL_BIST (1<<4) /* Use BIST in per. mode */ -#define BPCI_IFCONTROL_CAP (1<<3) /* Enable capabilities */ -#define BPCI_IFCONTROL_MMC_SHF 0 /* Shift count for MMC bits */ - -#define BPCI_IFSTATUS_MGT (1<<8) /* Master Grant timeout */ -#define BPCI_IFSTATUS_MTT (1<<9) /* Master TRDY timeout */ -#define BPCI_IFSTATUS_MRT (1<<10) /* Master retry timeout */ -#define BPCI_IFSTATUS_BC0F (1<<13) /* Block copy 0 fault */ -#define BPCI_IFSTATUS_BC1F (1<<14) /* Block copy 1 fault */ -#define BPCI_IFSTATUS_PCIU (1<<15) /* PCI unable to respond */ -#define BPCI_IFSTATUS_BSIZ (1<<16) /* PCI access with illegal size */ -#define BPCI_IFSTATUS_BADD (1<<17) /* PCI access with illegal addr */ -#define BPCI_IFSTATUS_RTO (1<<18) /* Retry time out */ -#define BPCI_IFSTATUS_SER (1<<19) /* System error */ -#define BPCI_IFSTATUS_PER (1<<20) /* Parity error */ -#define BPCI_IFSTATUS_LCA (1<<21) /* Local CPU abort */ -#define BPCI_IFSTATUS_MEM (1<<22) /* Memory prot. violation */ -#define BPCI_IFSTATUS_ARB (1<<23) /* Arbiter timed out */ -#define BPCI_IFSTATUS_STA (1<<27) /* Signaled target abort */ -#define BPCI_IFSTATUS_TA (1<<28) /* Target abort */ -#define BPCI_IFSTATUS_MA (1<<29) /* Master abort */ -#define BPCI_IFSTATUS_PEI (1<<30) /* Parity error as initiator */ -#define BPCI_IFSTATUS_PET (1<<31) /* Parity error as target */ - -#define BPCI_RESETCTL_PR (1<<0) /* True if reset asserted */ -#define BPCI_RESETCTL_RT (1<<4) /* Release time */ -#define BPCI_RESETCTL_CT (1<<8) /* Config time */ -#define BPCI_RESETCTL_PE (1<<12) /* PCI enabled */ -#define BPCI_RESETCTL_HM (1<<13) /* PCI host mode */ -#define BPCI_RESETCTL_RI (1<<14) /* PCI reset in */ - -extern struct msp_pci_regs msp_pci_regs - __attribute__((section(".register"))); -extern unsigned long msp_pci_config_space - __attribute__((section(".register"))); - -#endif /* !_MSP_PCI_H_ */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_prom.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_prom.h deleted file mode 100644 index 4120a01c30a9..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_prom.h +++ /dev/null @@ -1,159 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * MIPS boards bootprom interface for the Linux kernel. - * - * Copyright (C) 2000 MIPS Technologies, Inc. All rights reserved. - * Author: Carsten Langgaard, carstenl@mips.com - * - * ######################################################################## - * - * ######################################################################## - */ - -#ifndef _ASM_MSP_PROM_H -#define _ASM_MSP_PROM_H - -#include <linux/types.h> - -#define DEVICEID "deviceid" -#define FEATURES "features" -#define PROM_ENV "prom_env" -#define PROM_ENV_FILE "/proc/"PROM_ENV -#define PROM_ENV_SIZE 256 - -#define CPU_DEVID_FAMILY 0x0000ff00 -#define CPU_DEVID_REVISION 0x000000ff - -#define FPGA_IS_POLO(revision) \ - (((revision >= 0xb0) && (revision < 0xd0))) -#define FPGA_IS_5000(revision) \ - ((revision >= 0x80) && (revision <= 0x90)) -#define FPGA_IS_ZEUS(revision) ((revision < 0x7f)) -#define FPGA_IS_DUET(revision) \ - (((revision >= 0xa0) && (revision < 0xb0))) -#define FPGA_IS_MSP4200(revision) ((revision >= 0xd0)) -#define FPGA_IS_MSP7100(revision) ((revision >= 0xd0)) - -#define MACHINE_TYPE_POLO "POLO" -#define MACHINE_TYPE_DUET "DUET" -#define MACHINE_TYPE_ZEUS "ZEUS" -#define MACHINE_TYPE_MSP2000REVB "MSP2000REVB" -#define MACHINE_TYPE_MSP5000 "MSP5000" -#define MACHINE_TYPE_MSP4200 "MSP4200" -#define MACHINE_TYPE_MSP7120 "MSP7120" -#define MACHINE_TYPE_MSP7130 "MSP7130" -#define MACHINE_TYPE_OTHER "OTHER" - -#define MACHINE_TYPE_POLO_FPGA "POLO-FPGA" -#define MACHINE_TYPE_DUET_FPGA "DUET-FPGA" -#define MACHINE_TYPE_ZEUS_FPGA "ZEUS_FPGA" -#define MACHINE_TYPE_MSP2000REVB_FPGA "MSP2000REVB-FPGA" -#define MACHINE_TYPE_MSP5000_FPGA "MSP5000-FPGA" -#define MACHINE_TYPE_MSP4200_FPGA "MSP4200-FPGA" -#define MACHINE_TYPE_MSP7100_FPGA "MSP7100-FPGA" -#define MACHINE_TYPE_OTHER_FPGA "OTHER-FPGA" - -/* Device Family definitions */ -#define FAMILY_FPGA 0x0000 -#define FAMILY_ZEUS 0x1000 -#define FAMILY_POLO 0x2000 -#define FAMILY_DUET 0x4000 -#define FAMILY_TRIAD 0x5000 -#define FAMILY_MSP4200 0x4200 -#define FAMILY_MSP4200_FPGA 0x4f00 -#define FAMILY_MSP7100 0x7100 -#define FAMILY_MSP7100_FPGA 0x7f00 - -/* Device Type definitions */ -#define TYPE_MSP7120 0x7120 -#define TYPE_MSP7130 0x7130 - -#define ENET_KEY 'E' -#define ENETTXD_KEY 'e' -#define PCI_KEY 'P' -#define PCIMUX_KEY 'p' -#define SEC_KEY 'S' -#define SPAD_KEY 'D' -#define TDM_KEY 'T' -#define ZSP_KEY 'Z' - -#define FEATURE_NOEXIST '-' -#define FEATURE_EXIST '+' - -#define ENET_MII 'M' -#define ENET_RMII 'R' - -#define ENETTXD_FALLING 'F' -#define ENETTXD_RISING 'R' - -#define PCI_HOST 'H' -#define PCI_PERIPHERAL 'P' - -#define PCIMUX_FULL 'F' -#define PCIMUX_SINGLE 'S' - -#define SEC_DUET 'D' -#define SEC_POLO 'P' -#define SEC_SLOW 'S' -#define SEC_TRIAD 'T' - -#define SPAD_POLO 'P' - -#define TDM_DUET 'D' /* DUET TDMs might exist */ -#define TDM_POLO 'P' /* POLO TDMs might exist */ -#define TDM_TRIAD 'T' /* TRIAD TDMs might exist */ - -#define ZSP_DUET 'D' /* one DUET zsp engine */ -#define ZSP_TRIAD 'T' /* two TRIAD zsp engines */ - -extern char *prom_getenv(char *name); -extern void prom_init_cmdline(void); -extern void prom_meminit(void); -extern void prom_fixup_mem_map(unsigned long start_mem, - unsigned long end_mem); - -extern int get_ethernet_addr(char *ethaddr_name, char *ethernet_addr); -extern unsigned long get_deviceid(void); -extern char identify_enet(unsigned long interface_num); -extern char identify_enetTxD(unsigned long interface_num); -extern char identify_pci(void); -extern char identify_sec(void); -extern char identify_spad(void); -extern char identify_sec(void); -extern char identify_tdm(void); -extern char identify_zsp(void); -extern unsigned long identify_family(void); -extern unsigned long identify_revision(void); - -/* - * The following macro calls prom_printf and puts the format string - * into an init section so it can be reclaimed. - */ -#define ppfinit(f, x...) \ - do { \ - static char _f[] __initdata = KERN_INFO f; \ - printk(_f, ## x); \ - } while (0) - -/* Memory descriptor management. */ -#define PROM_MAX_PMEMBLOCKS 7 /* 6 used */ - -enum yamon_memtypes { - yamon_dontuse, - yamon_prom, - yamon_free, -}; - -struct prom_pmemblock { - unsigned long base; /* Within KSEG0. */ - unsigned int size; /* In bytes. */ - unsigned int type; /* free or prom memory */ -}; - -extern int prom_argc; -extern char **prom_argv; -extern char **prom_envp; -extern int *prom_vec; -extern struct prom_pmemblock *prom_getmdesc(void); - -#endif /* !_ASM_MSP_PROM_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h deleted file mode 100644 index 90dbe43c8d27..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regops.h +++ /dev/null @@ -1,237 +0,0 @@ -/* - * SMP/VPE-safe functions to access "registers" (see note). - * - * NOTES: -* - These macros use ll/sc instructions, so it is your responsibility to - * ensure these are available on your platform before including this file. - * - The MIPS32 spec states that ll/sc results are undefined for uncached - * accesses. This means they can't be used on HW registers accessed - * through kseg1. Code which requires these macros for this purpose must - * front-end the registers with cached memory "registers" and have a single - * thread update the actual HW registers. - * - A maximum of 2k of code can be inserted between ll and sc. Every - * memory accesses between the instructions will increase the chance of - * sc failing and having to loop. - * - When using custom_read_reg32/custom_write_reg32 only perform the - * necessary logical operations on the register value in between these - * two calls. All other logic should be performed before the first call. - * - There is a bug on the R10000 chips which has a workaround. If you - * are affected by this bug, make sure to define the symbol 'R10000_LLSC_WAR' - * to be non-zero. If you are using this header from within linux, you may - * include <asm/war.h> before including this file to have this defined - * appropriately for you. - * - * Copyright 2005-2007 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., 675 - * Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifndef __ASM_REGOPS_H__ -#define __ASM_REGOPS_H__ - -#include <linux/types.h> - -#include <asm/compiler.h> -#include <asm/war.h> - -#ifndef R10000_LLSC_WAR -#define R10000_LLSC_WAR 0 -#endif - -#if R10000_LLSC_WAR == 1 -#define __beqz "beqzl " -#else -#define __beqz "beqz " -#endif - -#ifndef _LINUX_TYPES_H -typedef unsigned int u32; -#endif - -/* - * Sets all the masked bits to the corresponding value bits - */ -static inline void set_value_reg32(volatile u32 *const addr, - u32 const mask, - u32 const value) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: ll %0, %1 # set_value_reg32 \n" - " and %0, %2 \n" - " or %0, %3 \n" - " sc %0, %1 \n" - " "__beqz"%0, 1b \n" - " nop \n" - " .set pop \n" - : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr) - : "ir" (~mask), "ir" (value), GCC_OFF_SMALL_ASM() (*addr)); -} - -/* - * Sets all the masked bits to '1' - */ -static inline void set_reg32(volatile u32 *const addr, - u32 const mask) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: ll %0, %1 # set_reg32 \n" - " or %0, %2 \n" - " sc %0, %1 \n" - " "__beqz"%0, 1b \n" - " nop \n" - " .set pop \n" - : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr) - : "ir" (mask), GCC_OFF_SMALL_ASM() (*addr)); -} - -/* - * Sets all the masked bits to '0' - */ -static inline void clear_reg32(volatile u32 *const addr, - u32 const mask) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: ll %0, %1 # clear_reg32 \n" - " and %0, %2 \n" - " sc %0, %1 \n" - " "__beqz"%0, 1b \n" - " nop \n" - " .set pop \n" - : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr) - : "ir" (~mask), GCC_OFF_SMALL_ASM() (*addr)); -} - -/* - * Toggles all masked bits from '0' to '1' and '1' to '0' - */ -static inline void toggle_reg32(volatile u32 *const addr, - u32 const mask) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set arch=r4000 \n" - "1: ll %0, %1 # toggle_reg32 \n" - " xor %0, %2 \n" - " sc %0, %1 \n" - " "__beqz"%0, 1b \n" - " nop \n" - " .set pop \n" - : "=&r" (temp), "=" GCC_OFF_SMALL_ASM() (*addr) - : "ir" (mask), GCC_OFF_SMALL_ASM() (*addr)); -} - -/* - * Read all masked bits others are returned as '0' - */ -static inline u32 read_reg32(volatile u32 *const addr, - u32 const mask) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set noreorder \n" - " lw %0, %1 # read \n" - " and %0, %2 # mask \n" - " .set pop \n" - : "=&r" (temp) - : "m" (*addr), "ir" (mask)); - - return temp; -} - -/* - * blocking_read_reg32 - Read address with blocking load - * - * Uncached writes need to be read back to ensure they reach RAM. - * The returned value must be 'used' to prevent from becoming a - * non-blocking load. - */ -static inline u32 blocking_read_reg32(volatile u32 *const addr) -{ - u32 temp; - - __asm__ __volatile__( - " .set push \n" - " .set noreorder \n" - " lw %0, %1 # read \n" - " move %0, %0 # block \n" - " .set pop \n" - : "=&r" (temp) - : "m" (*addr)); - - return temp; -} - -/* - * For special strange cases only: - * - * If you need custom processing within a ll/sc loop, use the following macros - * VERY CAREFULLY: - * - * u32 tmp; <-- Define a variable to hold the data - * - * custom_read_reg32(address, tmp); <-- Reads the address and put the value - * in the 'tmp' variable given - * - * From here on out, you are (basically) atomic, so don't do anything too - * fancy! - * Also, this code may loop if the end of this block fails to write - * everything back safely due do the other CPU, so do NOT do anything - * with side-effects! - * - * custom_write_reg32(address, tmp); <-- Writes back 'tmp' safely. - */ -#define custom_read_reg32(address, tmp) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set arch=r4000 \n" \ - "1: ll %0, %1 #custom_read_reg32 \n" \ - " .set pop \n" \ - : "=r" (tmp), "=" GCC_OFF_SMALL_ASM() (*address) \ - : GCC_OFF_SMALL_ASM() (*address)) - -#define custom_write_reg32(address, tmp) \ - __asm__ __volatile__( \ - " .set push \n" \ - " .set arch=r4000 \n" \ - " sc %0, %1 #custom_write_reg32 \n" \ - " "__beqz"%0, 1b \n" \ - " nop \n" \ - " .set pop \n" \ - : "=&r" (tmp), "=" GCC_OFF_SMALL_ASM() (*address) \ - : "0" (tmp), GCC_OFF_SMALL_ASM() (*address)) - -#endif /* __ASM_REGOPS_H__ */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regs.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regs.h deleted file mode 100644 index e2ce9be51f3f..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_regs.h +++ /dev/null @@ -1,652 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Defines for the address space, registers and register configuration - * (bit masks, access macros etc) for the PMC-Sierra line of MSP products. - * This file contains addess maps for all the devices in the line of - * products but only has register definitions and configuration masks for - * registers which aren't definitely associated with any device. Things - * like clock settings, reset access, the ELB etc. Individual device - * drivers will reference the appropriate XXX_BASE value defined here - * and have individual registers offset from that. - * - * Copyright (C) 2005-2007 PMC-Sierra, Inc. All rights reserved. - * Author: Andrew Hughes, Andrew_Hughes@pmc-sierra.com - * - * ######################################################################## - * - * ######################################################################## - */ - -#include <asm/addrspace.h> -#include <linux/types.h> - -#ifndef _ASM_MSP_REGS_H -#define _ASM_MSP_REGS_H - -/* - ######################################################################## - # Address space and device base definitions # - ######################################################################## - */ - -/* - *************************************************************************** - * System Logic and Peripherals (ELB, UART0, etc) device address space * - *************************************************************************** - */ -#define MSP_SLP_BASE 0x1c000000 - /* System Logic and Peripherals */ -#define MSP_RST_BASE (MSP_SLP_BASE + 0x10) - /* System reset register base */ -#define MSP_RST_SIZE 0x0C /* System reset register space */ - -#define MSP_WTIMER_BASE (MSP_SLP_BASE + 0x04C) - /* watchdog timer base */ -#define MSP_ITIMER_BASE (MSP_SLP_BASE + 0x054) - /* internal timer base */ -#define MSP_UART0_BASE (MSP_SLP_BASE + 0x100) - /* UART0 controller base */ -#define MSP_BCPY_CTRL_BASE (MSP_SLP_BASE + 0x120) - /* Block Copy controller base */ -#define MSP_BCPY_DESC_BASE (MSP_SLP_BASE + 0x160) - /* Block Copy descriptor base */ - -/* - *************************************************************************** - * PCI address space * - *************************************************************************** - */ -#define MSP_PCI_BASE 0x19000000 - -/* - *************************************************************************** - * MSbus device address space * - *************************************************************************** - */ -#define MSP_MSB_BASE 0x18000000 - /* MSbus address start */ -#define MSP_PER_BASE (MSP_MSB_BASE + 0x400000) - /* Peripheral device registers */ -#define MSP_MAC0_BASE (MSP_MSB_BASE + 0x600000) - /* MAC A device registers */ -#define MSP_MAC1_BASE (MSP_MSB_BASE + 0x700000) - /* MAC B device registers */ -#define MSP_MAC_SIZE 0xE0 /* MAC register space */ - -#define MSP_SEC_BASE (MSP_MSB_BASE + 0x800000) - /* Security Engine registers */ -#define MSP_MAC2_BASE (MSP_MSB_BASE + 0x900000) - /* MAC C device registers */ -#define MSP_ADSL2_BASE (MSP_MSB_BASE + 0xA80000) - /* ADSL2 device registers */ -#define MSP_USB0_BASE (MSP_MSB_BASE + 0xB00000) - /* USB0 device registers */ -#define MSP_USB1_BASE (MSP_MSB_BASE + 0x300000) - /* USB1 device registers */ -#define MSP_CPUIF_BASE (MSP_MSB_BASE + 0xC00000) - /* CPU interface registers */ - -/* Devices within the MSbus peripheral block */ -#define MSP_UART1_BASE (MSP_PER_BASE + 0x030) - /* UART1 controller base */ -#define MSP_SPI_BASE (MSP_PER_BASE + 0x058) - /* SPI/MPI control registers */ -#define MSP_TWI_BASE (MSP_PER_BASE + 0x090) - /* Two-wire control registers */ -#define MSP_PTIMER_BASE (MSP_PER_BASE + 0x0F0) - /* Programmable timer control */ - -/* - *************************************************************************** - * Physical Memory configuration address space * - *************************************************************************** - */ -#define MSP_MEM_CFG_BASE 0x17f00000 - -#define MSP_MEM_INDIRECT_CTL_10 0x10 - -/* - * Notes: - * 1) The SPI registers are split into two blocks, one offset from the - * MSP_SPI_BASE by 0x00 and the other offset from the MSP_SPI_BASE by - * 0x68. The SPI driver definitions for the register must be aware - * of this. - * 2) The block copy engine register are divided into two regions, one - * for the control/configuration of the engine proper and one for the - * values of the descriptors used in the copy process. These have - * different base defines (CTRL_BASE vs DESC_BASE) - * 3) These constants are for physical addresses which means that they - * work correctly with "ioremap" and friends. This means that device - * drivers will need to remap these addresses using ioremap and perhaps - * the readw/writew macros. Or they could use the regptr() macro - * defined below, but the readw/writew calls are the correct thing. - * 4) The UARTs have an additional status register offset from the base - * address. This register isn't used in the standard 8250 driver but - * may be used in other software. Consult the hardware datasheet for - * offset details. - * 5) For some unknown reason the security engine (MSP_SEC_BASE) registers - * start at an offset of 0x84 from the base address but the block of - * registers before this is reserved for the security engine. The - * driver will have to be aware of this but it makes the register - * definitions line up better with the documentation. - */ - -/* - ######################################################################## - # System register definitions. Not associated with a specific device # - ######################################################################## - */ - -/* - * This macro maps the physical register number into uncached space - * and (for C code) casts it into a u32 pointer so it can be dereferenced - * Normally these would be accessed with ioremap and readX/writeX, but - * these are convenient for a lot of internal kernel code. - */ -#ifdef __ASSEMBLER__ - #define regptr(addr) (KSEG1ADDR(addr)) -#else - #define regptr(addr) ((volatile u32 *const)(KSEG1ADDR(addr))) -#endif - -/* - *************************************************************************** - * System Logic and Peripherals (RESET, ELB, etc) registers * - *************************************************************************** - */ - -/* System Control register definitions */ -#define DEV_ID_REG regptr(MSP_SLP_BASE + 0x00) - /* Device-ID RO */ -#define FWR_ID_REG regptr(MSP_SLP_BASE + 0x04) - /* Firmware-ID Register RW */ -#define SYS_ID_REG0 regptr(MSP_SLP_BASE + 0x08) - /* System-ID Register-0 RW */ -#define SYS_ID_REG1 regptr(MSP_SLP_BASE + 0x0C) - /* System-ID Register-1 RW */ - -/* System Reset register definitions */ -#define RST_STS_REG regptr(MSP_SLP_BASE + 0x10) - /* System Reset Status RO */ -#define RST_SET_REG regptr(MSP_SLP_BASE + 0x14) - /* System Set Reset WO */ -#define RST_CLR_REG regptr(MSP_SLP_BASE + 0x18) - /* System Clear Reset WO */ - -/* System Clock Registers */ -#define PCI_SLP_REG regptr(MSP_SLP_BASE + 0x1C) - /* PCI clock generator RW */ -#define URT_SLP_REG regptr(MSP_SLP_BASE + 0x20) - /* UART clock generator RW */ -/* reserved (MSP_SLP_BASE + 0x24) */ -/* reserved (MSP_SLP_BASE + 0x28) */ -#define PLL1_SLP_REG regptr(MSP_SLP_BASE + 0x2C) - /* PLL1 clock generator RW */ -#define PLL0_SLP_REG regptr(MSP_SLP_BASE + 0x30) - /* PLL0 clock generator RW */ -#define MIPS_SLP_REG regptr(MSP_SLP_BASE + 0x34) - /* MIPS clock generator RW */ -#define VE_SLP_REG regptr(MSP_SLP_BASE + 0x38) - /* Voice Eng clock generator RW */ -/* reserved (MSP_SLP_BASE + 0x3C) */ -#define MSB_SLP_REG regptr(MSP_SLP_BASE + 0x40) - /* MS-Bus clock generator RW */ -#define SMAC_SLP_REG regptr(MSP_SLP_BASE + 0x44) - /* Sec & MAC clock generator RW */ -#define PERF_SLP_REG regptr(MSP_SLP_BASE + 0x48) - /* Per & TDM clock generator RW */ - -/* Interrupt Controller Registers */ -#define SLP_INT_STS_REG regptr(MSP_SLP_BASE + 0x70) - /* Interrupt status register RW */ -#define SLP_INT_MSK_REG regptr(MSP_SLP_BASE + 0x74) - /* Interrupt enable/mask RW */ -#define SE_MBOX_REG regptr(MSP_SLP_BASE + 0x78) - /* Security Engine mailbox RW */ -#define VE_MBOX_REG regptr(MSP_SLP_BASE + 0x7C) - /* Voice Engine mailbox RW */ - -/* ELB Controller Registers */ -#define CS0_CNFG_REG regptr(MSP_SLP_BASE + 0x80) - /* ELB CS0 Configuration Reg */ -#define CS0_ADDR_REG regptr(MSP_SLP_BASE + 0x84) - /* ELB CS0 Base Address Reg */ -#define CS0_MASK_REG regptr(MSP_SLP_BASE + 0x88) - /* ELB CS0 Mask Register */ -#define CS0_ACCESS_REG regptr(MSP_SLP_BASE + 0x8C) - /* ELB CS0 access register */ - -#define CS1_CNFG_REG regptr(MSP_SLP_BASE + 0x90) - /* ELB CS1 Configuration Reg */ -#define CS1_ADDR_REG regptr(MSP_SLP_BASE + 0x94) - /* ELB CS1 Base Address Reg */ -#define CS1_MASK_REG regptr(MSP_SLP_BASE + 0x98) - /* ELB CS1 Mask Register */ -#define CS1_ACCESS_REG regptr(MSP_SLP_BASE + 0x9C) - /* ELB CS1 access register */ - -#define CS2_CNFG_REG regptr(MSP_SLP_BASE + 0xA0) - /* ELB CS2 Configuration Reg */ -#define CS2_ADDR_REG regptr(MSP_SLP_BASE + 0xA4) - /* ELB CS2 Base Address Reg */ -#define CS2_MASK_REG regptr(MSP_SLP_BASE + 0xA8) - /* ELB CS2 Mask Register */ -#define CS2_ACCESS_REG regptr(MSP_SLP_BASE + 0xAC) - /* ELB CS2 access register */ - -#define CS3_CNFG_REG regptr(MSP_SLP_BASE + 0xB0) - /* ELB CS3 Configuration Reg */ -#define CS3_ADDR_REG regptr(MSP_SLP_BASE + 0xB4) - /* ELB CS3 Base Address Reg */ -#define CS3_MASK_REG regptr(MSP_SLP_BASE + 0xB8) - /* ELB CS3 Mask Register */ -#define CS3_ACCESS_REG regptr(MSP_SLP_BASE + 0xBC) - /* ELB CS3 access register */ - -#define CS4_CNFG_REG regptr(MSP_SLP_BASE + 0xC0) - /* ELB CS4 Configuration Reg */ -#define CS4_ADDR_REG regptr(MSP_SLP_BASE + 0xC4) - /* ELB CS4 Base Address Reg */ -#define CS4_MASK_REG regptr(MSP_SLP_BASE + 0xC8) - /* ELB CS4 Mask Register */ -#define CS4_ACCESS_REG regptr(MSP_SLP_BASE + 0xCC) - /* ELB CS4 access register */ - -#define CS5_CNFG_REG regptr(MSP_SLP_BASE + 0xD0) - /* ELB CS5 Configuration Reg */ -#define CS5_ADDR_REG regptr(MSP_SLP_BASE + 0xD4) - /* ELB CS5 Base Address Reg */ -#define CS5_MASK_REG regptr(MSP_SLP_BASE + 0xD8) - /* ELB CS5 Mask Register */ -#define CS5_ACCESS_REG regptr(MSP_SLP_BASE + 0xDC) - /* ELB CS5 access register */ - -/* reserved 0xE0 - 0xE8 */ -#define ELB_1PC_EN_REG regptr(MSP_SLP_BASE + 0xEC) - /* ELB single PC card detect */ - -/* reserved 0xF0 - 0xF8 */ -#define ELB_CLK_CFG_REG regptr(MSP_SLP_BASE + 0xFC) - /* SDRAM read/ELB timing Reg */ - -/* Extended UART status registers */ -#define UART0_STATUS_REG regptr(MSP_UART0_BASE + 0x0c0) - /* UART Status Register 0 */ -#define UART1_STATUS_REG regptr(MSP_UART1_BASE + 0x170) - /* UART Status Register 1 */ - -/* Performance monitoring registers */ -#define PERF_MON_CTRL_REG regptr(MSP_SLP_BASE + 0x140) - /* Performance monitor control */ -#define PERF_MON_CLR_REG regptr(MSP_SLP_BASE + 0x144) - /* Performance monitor clear */ -#define PERF_MON_CNTH_REG regptr(MSP_SLP_BASE + 0x148) - /* Perf monitor counter high */ -#define PERF_MON_CNTL_REG regptr(MSP_SLP_BASE + 0x14C) - /* Perf monitor counter low */ - -/* System control registers */ -#define SYS_CTRL_REG regptr(MSP_SLP_BASE + 0x150) - /* System control register */ -#define SYS_ERR1_REG regptr(MSP_SLP_BASE + 0x154) - /* System Error status 1 */ -#define SYS_ERR2_REG regptr(MSP_SLP_BASE + 0x158) - /* System Error status 2 */ -#define SYS_INT_CFG_REG regptr(MSP_SLP_BASE + 0x15C) - /* System Interrupt config */ - -/* Voice Engine Memory configuration */ -#define VE_MEM_REG regptr(MSP_SLP_BASE + 0x17C) - /* Voice engine memory config */ - -/* CPU/SLP Error Status registers */ -#define CPU_ERR1_REG regptr(MSP_SLP_BASE + 0x180) - /* CPU/SLP Error status 1 */ -#define CPU_ERR2_REG regptr(MSP_SLP_BASE + 0x184) - /* CPU/SLP Error status 1 */ - -/* Extended GPIO registers */ -#define EXTENDED_GPIO1_REG regptr(MSP_SLP_BASE + 0x188) -#define EXTENDED_GPIO2_REG regptr(MSP_SLP_BASE + 0x18c) -#define EXTENDED_GPIO_REG EXTENDED_GPIO1_REG - /* Backward-compatibility */ - -/* System Error registers */ -#define SLP_ERR_STS_REG regptr(MSP_SLP_BASE + 0x190) - /* Int status for SLP errors */ -#define SLP_ERR_MSK_REG regptr(MSP_SLP_BASE + 0x194) - /* Int mask for SLP errors */ -#define SLP_ELB_ERST_REG regptr(MSP_SLP_BASE + 0x198) - /* External ELB reset */ -#define SLP_BOOT_STS_REG regptr(MSP_SLP_BASE + 0x19C) - /* Boot Status */ - -/* Extended ELB addressing */ -#define CS0_EXT_ADDR_REG regptr(MSP_SLP_BASE + 0x1A0) - /* CS0 Extended address */ -#define CS1_EXT_ADDR_REG regptr(MSP_SLP_BASE + 0x1A4) - /* CS1 Extended address */ -#define CS2_EXT_ADDR_REG regptr(MSP_SLP_BASE + 0x1A8) - /* CS2 Extended address */ -#define CS3_EXT_ADDR_REG regptr(MSP_SLP_BASE + 0x1AC) - /* CS3 Extended address */ -/* reserved 0x1B0 */ -#define CS5_EXT_ADDR_REG regptr(MSP_SLP_BASE + 0x1B4) - /* CS5 Extended address */ - -/* PLL Adjustment registers */ -#define PLL_LOCK_REG regptr(MSP_SLP_BASE + 0x200) - /* PLL0 lock status */ -#define PLL_ARST_REG regptr(MSP_SLP_BASE + 0x204) - /* PLL Analog reset status */ -#define PLL0_ADJ_REG regptr(MSP_SLP_BASE + 0x208) - /* PLL0 Adjustment value */ -#define PLL1_ADJ_REG regptr(MSP_SLP_BASE + 0x20C) - /* PLL1 Adjustment value */ - -/* - *************************************************************************** - * Peripheral Register definitions * - *************************************************************************** - */ - -/* Peripheral status */ -#define PER_CTRL_REG regptr(MSP_PER_BASE + 0x50) - /* Peripheral control register */ -#define PER_STS_REG regptr(MSP_PER_BASE + 0x54) - /* Peripheral status register */ - -/* SPI/MPI Registers */ -#define SMPI_TX_SZ_REG regptr(MSP_PER_BASE + 0x58) - /* SPI/MPI Tx Size register */ -#define SMPI_RX_SZ_REG regptr(MSP_PER_BASE + 0x5C) - /* SPI/MPI Rx Size register */ -#define SMPI_CTL_REG regptr(MSP_PER_BASE + 0x60) - /* SPI/MPI Control register */ -#define SMPI_MS_REG regptr(MSP_PER_BASE + 0x64) - /* SPI/MPI Chip Select reg */ -#define SMPI_CORE_DATA_REG regptr(MSP_PER_BASE + 0xC0) - /* SPI/MPI Core Data reg */ -#define SMPI_CORE_CTRL_REG regptr(MSP_PER_BASE + 0xC4) - /* SPI/MPI Core Control reg */ -#define SMPI_CORE_STAT_REG regptr(MSP_PER_BASE + 0xC8) - /* SPI/MPI Core Status reg */ -#define SMPI_CORE_SSEL_REG regptr(MSP_PER_BASE + 0xCC) - /* SPI/MPI Core Ssel reg */ -#define SMPI_FIFO_REG regptr(MSP_PER_BASE + 0xD0) - /* SPI/MPI Data FIFO reg */ - -/* Peripheral Block Error Registers */ -#define PER_ERR_STS_REG regptr(MSP_PER_BASE + 0x70) - /* Error Bit Status Register */ -#define PER_ERR_MSK_REG regptr(MSP_PER_BASE + 0x74) - /* Error Bit Mask Register */ -#define PER_HDR1_REG regptr(MSP_PER_BASE + 0x78) - /* Error Header 1 Register */ -#define PER_HDR2_REG regptr(MSP_PER_BASE + 0x7C) - /* Error Header 2 Register */ - -/* Peripheral Block Interrupt Registers */ -#define PER_INT_STS_REG regptr(MSP_PER_BASE + 0x80) - /* Interrupt status register */ -#define PER_INT_MSK_REG regptr(MSP_PER_BASE + 0x84) - /* Interrupt Mask Register */ -#define GPIO_INT_STS_REG regptr(MSP_PER_BASE + 0x88) - /* GPIO interrupt status reg */ -#define GPIO_INT_MSK_REG regptr(MSP_PER_BASE + 0x8C) - /* GPIO interrupt MASK Reg */ - -/* POLO GPIO registers */ -#define POLO_GPIO_DAT1_REG regptr(MSP_PER_BASE + 0x0E0) - /* Polo GPIO[8:0] data reg */ -#define POLO_GPIO_CFG1_REG regptr(MSP_PER_BASE + 0x0E4) - /* Polo GPIO[7:0] config reg */ -#define POLO_GPIO_CFG2_REG regptr(MSP_PER_BASE + 0x0E8) - /* Polo GPIO[15:8] config reg */ -#define POLO_GPIO_OD1_REG regptr(MSP_PER_BASE + 0x0EC) - /* Polo GPIO[31:0] output drive */ -#define POLO_GPIO_CFG3_REG regptr(MSP_PER_BASE + 0x170) - /* Polo GPIO[23:16] config reg */ -#define POLO_GPIO_DAT2_REG regptr(MSP_PER_BASE + 0x174) - /* Polo GPIO[15:9] data reg */ -#define POLO_GPIO_DAT3_REG regptr(MSP_PER_BASE + 0x178) - /* Polo GPIO[23:16] data reg */ -#define POLO_GPIO_DAT4_REG regptr(MSP_PER_BASE + 0x17C) - /* Polo GPIO[31:24] data reg */ -#define POLO_GPIO_DAT5_REG regptr(MSP_PER_BASE + 0x180) - /* Polo GPIO[39:32] data reg */ -#define POLO_GPIO_DAT6_REG regptr(MSP_PER_BASE + 0x184) - /* Polo GPIO[47:40] data reg */ -#define POLO_GPIO_DAT7_REG regptr(MSP_PER_BASE + 0x188) - /* Polo GPIO[54:48] data reg */ -#define POLO_GPIO_CFG4_REG regptr(MSP_PER_BASE + 0x18C) - /* Polo GPIO[31:24] config reg */ -#define POLO_GPIO_CFG5_REG regptr(MSP_PER_BASE + 0x190) - /* Polo GPIO[39:32] config reg */ -#define POLO_GPIO_CFG6_REG regptr(MSP_PER_BASE + 0x194) - /* Polo GPIO[47:40] config reg */ -#define POLO_GPIO_CFG7_REG regptr(MSP_PER_BASE + 0x198) - /* Polo GPIO[54:48] config reg */ -#define POLO_GPIO_OD2_REG regptr(MSP_PER_BASE + 0x19C) - /* Polo GPIO[54:32] output drive */ - -/* Generic GPIO registers */ -#define GPIO_DATA1_REG regptr(MSP_PER_BASE + 0x170) - /* GPIO[1:0] data register */ -#define GPIO_DATA2_REG regptr(MSP_PER_BASE + 0x174) - /* GPIO[5:2] data register */ -#define GPIO_DATA3_REG regptr(MSP_PER_BASE + 0x178) - /* GPIO[9:6] data register */ -#define GPIO_DATA4_REG regptr(MSP_PER_BASE + 0x17C) - /* GPIO[15:10] data register */ -#define GPIO_CFG1_REG regptr(MSP_PER_BASE + 0x180) - /* GPIO[1:0] config register */ -#define GPIO_CFG2_REG regptr(MSP_PER_BASE + 0x184) - /* GPIO[5:2] config register */ -#define GPIO_CFG3_REG regptr(MSP_PER_BASE + 0x188) - /* GPIO[9:6] config register */ -#define GPIO_CFG4_REG regptr(MSP_PER_BASE + 0x18C) - /* GPIO[15:10] config register */ -#define GPIO_OD_REG regptr(MSP_PER_BASE + 0x190) - /* GPIO[15:0] output drive */ - -/* - *************************************************************************** - * CPU Interface register definitions * - *************************************************************************** - */ -#define PCI_FLUSH_REG regptr(MSP_CPUIF_BASE + 0x00) - /* PCI-SDRAM queue flush trigger */ -#define OCP_ERR1_REG regptr(MSP_CPUIF_BASE + 0x04) - /* OCP Error Attribute 1 */ -#define OCP_ERR2_REG regptr(MSP_CPUIF_BASE + 0x08) - /* OCP Error Attribute 2 */ -#define OCP_STS_REG regptr(MSP_CPUIF_BASE + 0x0C) - /* OCP Error Status */ -#define CPUIF_PM_REG regptr(MSP_CPUIF_BASE + 0x10) - /* CPU policy configuration */ -#define CPUIF_CFG_REG regptr(MSP_CPUIF_BASE + 0x10) - /* Misc configuration options */ - -/* Central Interrupt Controller Registers */ -#define MSP_CIC_BASE (MSP_CPUIF_BASE + 0x8000) - /* Central Interrupt registers */ -#define CIC_EXT_CFG_REG regptr(MSP_CIC_BASE + 0x00) - /* External interrupt config */ -#define CIC_STS_REG regptr(MSP_CIC_BASE + 0x04) - /* CIC Interrupt Status */ -#define CIC_VPE0_MSK_REG regptr(MSP_CIC_BASE + 0x08) - /* VPE0 Interrupt Mask */ -#define CIC_VPE1_MSK_REG regptr(MSP_CIC_BASE + 0x0C) - /* VPE1 Interrupt Mask */ -#define CIC_TC0_MSK_REG regptr(MSP_CIC_BASE + 0x10) - /* Thread Context 0 Int Mask */ -#define CIC_TC1_MSK_REG regptr(MSP_CIC_BASE + 0x14) - /* Thread Context 1 Int Mask */ -#define CIC_TC2_MSK_REG regptr(MSP_CIC_BASE + 0x18) - /* Thread Context 2 Int Mask */ -#define CIC_TC3_MSK_REG regptr(MSP_CIC_BASE + 0x18) - /* Thread Context 3 Int Mask */ -#define CIC_TC4_MSK_REG regptr(MSP_CIC_BASE + 0x18) - /* Thread Context 4 Int Mask */ -#define CIC_PCIMSI_STS_REG regptr(MSP_CIC_BASE + 0x18) -#define CIC_PCIMSI_MSK_REG regptr(MSP_CIC_BASE + 0x18) -#define CIC_PCIFLSH_REG regptr(MSP_CIC_BASE + 0x18) -#define CIC_VPE0_SWINT_REG regptr(MSP_CIC_BASE + 0x08) - - -/* - *************************************************************************** - * Memory controller registers * - *************************************************************************** - */ -#define MEM_CFG1_REG regptr(MSP_MEM_CFG_BASE + 0x00) -#define MEM_SS_ADDR regptr(MSP_MEM_CFG_BASE + 0x00) -#define MEM_SS_DATA regptr(MSP_MEM_CFG_BASE + 0x04) -#define MEM_SS_WRITE regptr(MSP_MEM_CFG_BASE + 0x08) - -/* - *************************************************************************** - * PCI controller registers * - *************************************************************************** - */ -#define PCI_BASE_REG regptr(MSP_PCI_BASE + 0x00) -#define PCI_CONFIG_SPACE_REG regptr(MSP_PCI_BASE + 0x800) -#define PCI_JTAG_DEVID_REG regptr(MSP_SLP_BASE + 0x13c) - -/* - ######################################################################## - # Register content & macro definitions # - ######################################################################## - */ - -/* - *************************************************************************** - * DEV_ID defines * - *************************************************************************** - */ -#define DEV_ID_PCI_DIS (1 << 26) /* Set if PCI disabled */ -#define DEV_ID_PCI_HOST (1 << 20) /* Set if PCI host */ -#define DEV_ID_SINGLE_PC (1 << 19) /* Set if single PC Card */ -#define DEV_ID_FAMILY (0xff << 8) /* family ID code */ -#define POLO_ZEUS_SUB_FAMILY (0x7 << 16) /* sub family for Polo/Zeus */ - -#define MSPFPGA_ID (0x00 << 8) /* you are on your own here */ -#define MSP5000_ID (0x50 << 8) -#define MSP4F00_ID (0x4f << 8) /* FPGA version of MSP4200 */ -#define MSP4E00_ID (0x4f << 8) /* FPGA version of MSP7120 */ -#define MSP4200_ID (0x42 << 8) -#define MSP4000_ID (0x40 << 8) -#define MSP2XXX_ID (0x20 << 8) -#define MSPZEUS_ID (0x10 << 8) - -#define MSP2004_SUB_ID (0x0 << 16) -#define MSP2005_SUB_ID (0x1 << 16) -#define MSP2006_SUB_ID (0x1 << 16) -#define MSP2007_SUB_ID (0x2 << 16) -#define MSP2010_SUB_ID (0x3 << 16) -#define MSP2015_SUB_ID (0x4 << 16) -#define MSP2020_SUB_ID (0x5 << 16) -#define MSP2100_SUB_ID (0x6 << 16) - -/* - *************************************************************************** - * RESET defines * - *************************************************************************** - */ -#define MSP_GR_RST (0x01 << 0) /* Global reset bit */ -#define MSP_MR_RST (0x01 << 1) /* MIPS reset bit */ -#define MSP_PD_RST (0x01 << 2) /* PVC DMA reset bit */ -#define MSP_PP_RST (0x01 << 3) /* PVC reset bit */ -/* reserved */ -#define MSP_EA_RST (0x01 << 6) /* Mac A reset bit */ -#define MSP_EB_RST (0x01 << 7) /* Mac B reset bit */ -#define MSP_SE_RST (0x01 << 8) /* Security Eng reset bit */ -#define MSP_PB_RST (0x01 << 9) /* Per block reset bit */ -#define MSP_EC_RST (0x01 << 10) /* Mac C reset bit */ -#define MSP_TW_RST (0x01 << 11) /* TWI reset bit */ -#define MSP_SPI_RST (0x01 << 12) /* SPI/MPI reset bit */ -#define MSP_U1_RST (0x01 << 13) /* UART1 reset bit */ -#define MSP_U0_RST (0x01 << 14) /* UART0 reset bit */ - -/* - *************************************************************************** - * UART defines * - *************************************************************************** - */ -#define MSP_BASE_BAUD 25000000 -#define MSP_UART_REG_LEN 0x20 - -/* - *************************************************************************** - * ELB defines * - *************************************************************************** - */ -#define PCCARD_32 0x02 /* Set if is PCCARD 32 (Cardbus) */ -#define SINGLE_PCCARD 0x01 /* Set to enable single PC card */ - -/* - *************************************************************************** - * CIC defines * - *************************************************************************** - */ - -/* CIC_EXT_CFG_REG */ -#define EXT_INT_POL(eirq) (1 << (eirq + 8)) -#define EXT_INT_EDGE(eirq) (1 << eirq) - -#define CIC_EXT_SET_TRIGGER_LEVEL(reg, eirq) (reg &= ~EXT_INT_EDGE(eirq)) -#define CIC_EXT_SET_TRIGGER_EDGE(reg, eirq) (reg |= EXT_INT_EDGE(eirq)) -#define CIC_EXT_SET_ACTIVE_HI(reg, eirq) (reg |= EXT_INT_POL(eirq)) -#define CIC_EXT_SET_ACTIVE_LO(reg, eirq) (reg &= ~EXT_INT_POL(eirq)) -#define CIC_EXT_SET_ACTIVE_RISING CIC_EXT_SET_ACTIVE_HI -#define CIC_EXT_SET_ACTIVE_FALLING CIC_EXT_SET_ACTIVE_LO - -#define CIC_EXT_IS_TRIGGER_LEVEL(reg, eirq) \ - ((reg & EXT_INT_EDGE(eirq)) == 0) -#define CIC_EXT_IS_TRIGGER_EDGE(reg, eirq) (reg & EXT_INT_EDGE(eirq)) -#define CIC_EXT_IS_ACTIVE_HI(reg, eirq) (reg & EXT_INT_POL(eirq)) -#define CIC_EXT_IS_ACTIVE_LO(reg, eirq) \ - ((reg & EXT_INT_POL(eirq)) == 0) -#define CIC_EXT_IS_ACTIVE_RISING CIC_EXT_IS_ACTIVE_HI -#define CIC_EXT_IS_ACTIVE_FALLING CIC_EXT_IS_ACTIVE_LO - -/* - *************************************************************************** - * Memory Controller defines * - *************************************************************************** - */ - -/* Indirect memory controller registers */ -#define DDRC_CFG(n) (n) -#define DDRC_DEBUG(n) (0x04 + n) -#define DDRC_CTL(n) (0x40 + n) - -/* Macro to perform DDRC indirect write */ -#define DDRC_INDIRECT_WRITE(reg, mask, value) \ -({ \ - *MEM_SS_ADDR = (((mask) & 0xf) << 8) | ((reg) & 0xff); \ - *MEM_SS_DATA = (value); \ - *MEM_SS_WRITE = 1; \ -}) - -/* - *************************************************************************** - * SPI/MPI Mode * - *************************************************************************** - */ -#define SPI_MPI_RX_BUSY 0x00008000 /* SPI/MPI Receive Busy */ -#define SPI_MPI_FIFO_EMPTY 0x00004000 /* SPI/MPI Fifo Empty */ -#define SPI_MPI_TX_BUSY 0x00002000 /* SPI/MPI Transmit Busy */ -#define SPI_MPI_FIFO_FULL 0x00001000 /* SPI/MPU FIFO full */ - -/* - *************************************************************************** - * SPI/MPI Control Register * - *************************************************************************** - */ -#define SPI_MPI_RX_START 0x00000004 /* Start receive command */ -#define SPI_MPI_FLUSH_Q 0x00000002 /* Flush SPI/MPI Queue */ -#define SPI_MPI_TX_START 0x00000001 /* Start Transmit Command */ - -#endif /* !_ASM_MSP_REGS_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_slp_int.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_slp_int.h deleted file mode 100644 index 9a763eb5e5f5..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_slp_int.h +++ /dev/null @@ -1,129 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Defines for the MSP interrupt controller. - * - * Copyright (C) 1999 MIPS Technologies, Inc. All rights reserved. - * Author: Carsten Langgaard, carstenl@mips.com - * - * ######################################################################## - * - * ######################################################################## - */ - -#ifndef _MSP_SLP_INT_H -#define _MSP_SLP_INT_H - -/* - * The PMC-Sierra SLP interrupts are arranged in a 3 level cascaded - * hierarchical system. The first level are the direct MIPS interrupts - * and are assigned the interrupt range 0-7. The second level is the SLM - * interrupt controller and is assigned the range 8-39. The third level - * comprises the Peripherial block, the PCI block, the PCI MSI block and - * the SLP. The PCI interrupts and the SLP errors are handled by the - * relevant subsystems so the core interrupt code needs only concern - * itself with the Peripheral block. These are assigned interrupts in - * the range 40-71. - */ - -/* - * IRQs directly connected to CPU - */ -#define MSP_MIPS_INTBASE 0 -#define MSP_INT_SW0 0 /* IRQ for swint0, C_SW0 */ -#define MSP_INT_SW1 1 /* IRQ for swint1, C_SW1 */ -#define MSP_INT_MAC0 2 /* IRQ for MAC 0, C_IRQ0 */ -#define MSP_INT_MAC1 3 /* IRQ for MAC 1, C_IRQ1 */ -#define MSP_INT_C_IRQ2 4 /* Wired off, C_IRQ2 */ -#define MSP_INT_VE 5 /* IRQ for Voice Engine, C_IRQ3 */ -#define MSP_INT_SLP 6 /* IRQ for SLM block, C_IRQ4 */ -#define MSP_INT_TIMER 7 /* IRQ for the MIPS timer, C_IRQ5 */ - -/* - * IRQs cascaded on CPU interrupt 4 (CAUSE bit 12, C_IRQ4) - * These defines should be tied to the register definition for the SLM - * interrupt routine. For now, just use hard-coded values. - */ -#define MSP_SLP_INTBASE (MSP_MIPS_INTBASE + 8) -#define MSP_INT_EXT0 (MSP_SLP_INTBASE + 0) - /* External interrupt 0 */ -#define MSP_INT_EXT1 (MSP_SLP_INTBASE + 1) - /* External interrupt 1 */ -#define MSP_INT_EXT2 (MSP_SLP_INTBASE + 2) - /* External interrupt 2 */ -#define MSP_INT_EXT3 (MSP_SLP_INTBASE + 3) - /* External interrupt 3 */ -/* Reserved 4-7 */ - -/* - ************************************************************************* - * DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER/DANGER * - * Some MSP produces have this interrupt labelled as Voice and some are * - * SEC mbox ... * - ************************************************************************* - */ -#define MSP_INT_SLP_VE (MSP_SLP_INTBASE + 8) - /* Cascaded IRQ for Voice Engine*/ -#define MSP_INT_SLP_TDM (MSP_SLP_INTBASE + 9) - /* TDM interrupt */ -#define MSP_INT_SLP_MAC0 (MSP_SLP_INTBASE + 10) - /* Cascaded IRQ for MAC 0 */ -#define MSP_INT_SLP_MAC1 (MSP_SLP_INTBASE + 11) - /* Cascaded IRQ for MAC 1 */ -#define MSP_INT_SEC (MSP_SLP_INTBASE + 12) - /* IRQ for security engine */ -#define MSP_INT_PER (MSP_SLP_INTBASE + 13) - /* Peripheral interrupt */ -#define MSP_INT_TIMER0 (MSP_SLP_INTBASE + 14) - /* SLP timer 0 */ -#define MSP_INT_TIMER1 (MSP_SLP_INTBASE + 15) - /* SLP timer 1 */ -#define MSP_INT_TIMER2 (MSP_SLP_INTBASE + 16) - /* SLP timer 2 */ -#define MSP_INT_SLP_TIMER (MSP_SLP_INTBASE + 17) - /* Cascaded MIPS timer */ -#define MSP_INT_BLKCP (MSP_SLP_INTBASE + 18) - /* Block Copy */ -#define MSP_INT_UART0 (MSP_SLP_INTBASE + 19) - /* UART 0 */ -#define MSP_INT_PCI (MSP_SLP_INTBASE + 20) - /* PCI subsystem */ -#define MSP_INT_PCI_DBELL (MSP_SLP_INTBASE + 21) - /* PCI doorbell */ -#define MSP_INT_PCI_MSI (MSP_SLP_INTBASE + 22) - /* PCI Message Signal */ -#define MSP_INT_PCI_BC0 (MSP_SLP_INTBASE + 23) - /* PCI Block Copy 0 */ -#define MSP_INT_PCI_BC1 (MSP_SLP_INTBASE + 24) - /* PCI Block Copy 1 */ -#define MSP_INT_SLP_ERR (MSP_SLP_INTBASE + 25) - /* SLP error condition */ -#define MSP_INT_MAC2 (MSP_SLP_INTBASE + 26) - /* IRQ for MAC2 */ -/* Reserved 26-31 */ - -/* - * IRQs cascaded on SLP PER interrupt (MSP_INT_PER) - */ -#define MSP_PER_INTBASE (MSP_SLP_INTBASE + 32) -/* Reserved 0-1 */ -#define MSP_INT_UART1 (MSP_PER_INTBASE + 2) - /* UART 1 */ -/* Reserved 3-5 */ -#define MSP_INT_2WIRE (MSP_PER_INTBASE + 6) - /* 2-wire */ -#define MSP_INT_TM0 (MSP_PER_INTBASE + 7) - /* Peripheral timer block out 0 */ -#define MSP_INT_TM1 (MSP_PER_INTBASE + 8) - /* Peripheral timer block out 1 */ -/* Reserved 9 */ -#define MSP_INT_SPRX (MSP_PER_INTBASE + 10) - /* SPI RX complete */ -#define MSP_INT_SPTX (MSP_PER_INTBASE + 11) - /* SPI TX complete */ -#define MSP_INT_GPIO (MSP_PER_INTBASE + 12) - /* GPIO */ -#define MSP_INT_PER_ERR (MSP_PER_INTBASE + 13) - /* Peripheral error */ -/* Reserved 14-31 */ - -#endif /* !_MSP_SLP_INT_H */ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h b/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h deleted file mode 100644 index 3cc3edb336b6..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/msp_usb.h +++ /dev/null @@ -1,124 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/****************************************************************** - * Copyright (c) 2000-2007 PMC-Sierra INC. - * - * PMC-SIERRA INC. DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS - * SOFTWARE. - */ -#ifndef MSP_USB_H_ -#define MSP_USB_H_ - -#define NUM_USB_DEVS 1 - -/* Register spaces for USB host 0 */ -#define MSP_USB0_MAB_START (MSP_USB0_BASE + 0x0) -#define MSP_USB0_MAB_END (MSP_USB0_BASE + 0x17) -#define MSP_USB0_ID_START (MSP_USB0_BASE + 0x40000) -#define MSP_USB0_ID_END (MSP_USB0_BASE + 0x4008f) -#define MSP_USB0_HS_START (MSP_USB0_BASE + 0x40100) -#define MSP_USB0_HS_END (MSP_USB0_BASE + 0x401FF) - -/* Register spaces for USB host 1 */ -#define MSP_USB1_MAB_START (MSP_USB1_BASE + 0x0) -#define MSP_USB1_MAB_END (MSP_USB1_BASE + 0x17) -#define MSP_USB1_ID_START (MSP_USB1_BASE + 0x40000) -#define MSP_USB1_ID_END (MSP_USB1_BASE + 0x4008f) -#define MSP_USB1_HS_START (MSP_USB1_BASE + 0x40100) -#define MSP_USB1_HS_END (MSP_USB1_BASE + 0x401ff) - -/* USB Identification registers */ -struct msp_usbid_regs { - u32 id; /* 0x0: Identification register */ - u32 hwgen; /* 0x4: General HW params */ - u32 hwhost; /* 0x8: Host HW params */ - u32 hwdev; /* 0xc: Device HW params */ - u32 hwtxbuf; /* 0x10: Tx buffer HW params */ - u32 hwrxbuf; /* 0x14: Rx buffer HW params */ - u32 reserved[26]; - u32 timer0_load; /* 0x80: General-purpose timer 0 load*/ - u32 timer0_ctrl; /* 0x84: General-purpose timer 0 control */ - u32 timer1_load; /* 0x88: General-purpose timer 1 load*/ - u32 timer1_ctrl; /* 0x8c: General-purpose timer 1 control */ -}; - -/* MSBus to AMBA registers */ -struct msp_mab_regs { - u32 isr; /* 0x0: Interrupt status */ - u32 imr; /* 0x4: Interrupt mask */ - u32 thcr0; /* 0x8: Transaction header capture 0 */ - u32 thcr1; /* 0xc: Transaction header capture 1 */ - u32 int_stat; /* 0x10: Interrupt status summary */ - u32 phy_cfg; /* 0x14: USB phy config */ -}; - -/* EHCI registers */ -struct msp_usbhs_regs { - u32 hciver; /* 0x0: Version and offset to operational regs */ - u32 hcsparams; /* 0x4: Host control structural parameters */ - u32 hccparams; /* 0x8: Host control capability parameters */ - u32 reserved0[5]; - u32 dciver; /* 0x20: Device interface version */ - u32 dccparams; /* 0x24: Device control capability parameters */ - u32 reserved1[6]; - u32 cmd; /* 0x40: USB command */ - u32 sts; /* 0x44: USB status */ - u32 int_ena; /* 0x48: USB interrupt enable */ - u32 frindex; /* 0x4c: Frame index */ - u32 reserved3; - union { - struct { - u32 flb_addr; /* 0x54: Frame list base address */ - u32 next_async_addr; /* 0x58: next asynchronous addr */ - u32 ttctrl; /* 0x5c: embedded transaction translator - async buffer status */ - u32 burst_size; /* 0x60: Controller burst size */ - u32 tx_fifo_ctrl; /* 0x64: Tx latency FIFO tuning */ - u32 reserved0[4]; - u32 endpt_nak; /* 0x78: Endpoint NAK */ - u32 endpt_nak_ena; /* 0x7c: Endpoint NAK enable */ - u32 cfg_flag; /* 0x80: Config flag */ - u32 port_sc1; /* 0x84: Port status & control 1 */ - u32 reserved1[7]; - u32 otgsc; /* 0xa4: OTG status & control */ - u32 mode; /* 0xa8: USB controller mode */ - } host; - - struct { - u32 dev_addr; /* 0x54: Device address */ - u32 endpt_list_addr; /* 0x58: Endpoint list address */ - u32 reserved0[7]; - u32 endpt_nak; /* 0x74 */ - u32 endpt_nak_ctrl; /* 0x78 */ - u32 cfg_flag; /* 0x80 */ - u32 port_sc1; /* 0x84: Port status & control 1 */ - u32 reserved[7]; - u32 otgsc; /* 0xa4: OTG status & control */ - u32 mode; /* 0xa8: USB controller mode */ - u32 endpt_setup_stat; /* 0xac */ - u32 endpt_prime; /* 0xb0 */ - u32 endpt_flush; /* 0xb4 */ - u32 endpt_stat; /* 0xb8 */ - u32 endpt_complete; /* 0xbc */ - u32 endpt_ctrl0; /* 0xc0 */ - u32 endpt_ctrl1; /* 0xc4 */ - u32 endpt_ctrl2; /* 0xc8 */ - u32 endpt_ctrl3; /* 0xcc */ - } device; - } u; -}; -/* - * Container for the more-generic platform_device. - * This exists mainly as a way to map the non-standard register - * spaces and make them accessible to the USB ISR. - */ -struct mspusb_device { - struct msp_mab_regs __iomem *mab_regs; - struct msp_usbid_regs __iomem *usbid_regs; - struct msp_usbhs_regs __iomem *usbhs_regs; - struct platform_device dev; -}; - -#define to_mspusb_device(x) container_of((x), struct mspusb_device, dev) -#define TO_HOST_ID(x) ((x) & 0x3) -#endif /*MSP_USB_H_*/ diff --git a/arch/mips/include/asm/mach-pmcs-msp71xx/war.h b/arch/mips/include/asm/mach-pmcs-msp71xx/war.h deleted file mode 100644 index 31c546f58bb5..000000000000 --- a/arch/mips/include/asm/mach-pmcs-msp71xx/war.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2002, 2004, 2007 by Ralf Baechle <ralf@linux-mips.org> - */ -#ifndef __ASM_MIPS_PMC_SIERRA_WAR_H -#define __ASM_MIPS_PMC_SIERRA_WAR_H - -#define R4600_V1_INDEX_ICACHEOP_WAR 0 -#define R4600_V1_HIT_CACHEOP_WAR 0 -#define R4600_V2_HIT_CACHEOP_WAR 0 -#define BCM1250_M3_WAR 0 -#define SIBYTE_1956_WAR 0 -#define MIPS4K_ICACHE_REFILL_WAR 0 -#define MIPS_CACHE_SYNC_WAR 0 -#define TX49XX_ICACHE_INDEX_INV_WAR 0 -#define ICACHE_REFILLS_WORKAROUND_WAR 0 -#define R10000_LLSC_WAR 0 -#if defined(CONFIG_PMC_MSP7120_EVAL) || defined(CONFIG_PMC_MSP7120_GW) || \ - defined(CONFIG_PMC_MSP7120_FPGA) -#define MIPS34K_MISSED_ITLB_WAR 1 -#else -#define MIPS34K_MISSED_ITLB_WAR 0 -#endif - -#endif /* __ASM_MIPS_PMC_SIERRA_WAR_H */ diff --git a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h index 6ea5908f0c11..c4579f1705c2 100644 --- a/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/mt7620/cpu-feature-overrides.h @@ -45,7 +45,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-ralink/mt7621.h b/arch/mips/include/asm/mach-ralink/mt7621.h index 65483a4681ab..e1af1ba50bd8 100644 --- a/arch/mips/include/asm/mach-ralink/mt7621.h +++ b/arch/mips/include/asm/mach-ralink/mt7621.h @@ -31,6 +31,4 @@ #define MT7621_CHIP_NAME0 0x3637544D #define MT7621_CHIP_NAME1 0x20203132 -#define MIPS_GIC_IRQ_BASE (MIPS_CPU_IRQ_BASE + 8) - #endif diff --git a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h index e06f517b2588..168359a0a58d 100644 --- a/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/mt7621/cpu-feature-overrides.h @@ -46,7 +46,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h index 9c069646d0bd..fdaf8c9182bc 100644 --- a/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt288x/cpu-feature-overrides.h @@ -44,7 +44,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 16 #define cpu_icache_line_size() 16 diff --git a/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h index 2e423fd15384..7a385fe784a6 100644 --- a/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt305x/cpu-feature-overrides.h @@ -44,7 +44,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h b/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h index 7cee0e232580..0a61910f6521 100644 --- a/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-ralink/rt3883/cpu-feature-overrides.h @@ -43,7 +43,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_dcache_line_size() 32 #define cpu_icache_line_size() 32 diff --git a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h index bc46179fdf40..8539ccfb69b7 100644 --- a/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-rc32434/cpu-feature-overrides.h @@ -54,7 +54,6 @@ #define cpu_has_64bits 0 #define cpu_has_64bit_zero_reg 0 #define cpu_has_64bit_gp_regs 0 -#define cpu_has_64bit_addresses 0 #define cpu_has_inclusive_pcaches 0 diff --git a/arch/mips/include/asm/mach-rc32434/pci.h b/arch/mips/include/asm/mach-rc32434/pci.h index 6f40d1515580..9a6eefd12757 100644 --- a/arch/mips/include/asm/mach-rc32434/pci.h +++ b/arch/mips/include/asm/mach-rc32434/pci.h @@ -319,9 +319,6 @@ struct pci_msu { #define PCIM_H_EA 0x3 #define PCIM_H_IA_FIX 0x4 #define PCIM_H_IA_RR 0x5 -#if 0 -#define PCI_ADDR_START 0x13000000 -#endif #define PCI_ADDR_START 0x50000000 diff --git a/arch/mips/include/asm/mach-tx39xx/ioremap.h b/arch/mips/include/asm/mach-tx39xx/ioremap.h index 077b3c9971f7..157a7292397e 100644 --- a/arch/mips/include/asm/mach-tx39xx/ioremap.h +++ b/arch/mips/include/asm/mach-tx39xx/ioremap.h @@ -7,15 +7,6 @@ #include <linux/types.h> -/* - * Allow physical addresses to be fixed up to help peripherals located - * outside the low 32-bit range -- generic pass-through version. - */ -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} - static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, unsigned long flags) { diff --git a/arch/mips/include/asm/mach-tx49xx/ioremap.h b/arch/mips/include/asm/mach-tx49xx/ioremap.h index c6b9e05f44c4..b1f3710acf8e 100644 --- a/arch/mips/include/asm/mach-tx49xx/ioremap.h +++ b/arch/mips/include/asm/mach-tx49xx/ioremap.h @@ -7,15 +7,6 @@ #include <linux/types.h> -/* - * Allow physical addresses to be fixed up to help peripherals located - * outside the low 32-bit range -- generic pass-through version. - */ -static inline phys_addr_t fixup_bigphys_addr(phys_addr_t phys_addr, phys_addr_t size) -{ - return phys_addr; -} - static inline void __iomem *plat_ioremap(phys_addr_t offset, unsigned long size, unsigned long flags) { diff --git a/arch/mips/include/asm/mach-xilfpga/irq.h b/arch/mips/include/asm/mach-xilfpga/irq.h deleted file mode 100644 index 15ad29ec1dee..000000000000 --- a/arch/mips/include/asm/mach-xilfpga/irq.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-or-later */ -/* - * Copyright (C) 2015 Imagination Technologies - * Author: Zubair Lutfullah Kakakhel <Zubair.Kakakhel@imgtec.com> - */ - -#ifndef __MIPS_ASM_MACH_XILFPGA_IRQ_H__ -#define __MIPS_ASM_MACH_XILFPGA_IRQ_H__ - -#define NR_IRQS 32 - -#include <asm/mach-generic/irq.h> - -#endif /* __MIPS_ASM_MACH_XILFPGA_IRQ_H__ */ diff --git a/arch/mips/include/asm/mipsregs.h b/arch/mips/include/asm/mipsregs.h index 796fe47cfd17..796dbb86575b 100644 --- a/arch/mips/include/asm/mipsregs.h +++ b/arch/mips/include/asm/mipsregs.h @@ -468,6 +468,7 @@ #define EXCCODE_THREAD 25 /* Thread exceptions (MT) */ #define EXCCODE_DSPDIS 26 /* DSP disabled exception */ #define EXCCODE_GE 27 /* Virtualized guest exception (VZ) */ +#define EXCCODE_CACHEERR 30 /* Parity/ECC occured on a core */ /* Implementation specific trap codes used by MIPS cores */ #define MIPS_EXCCODE_TLBPAR 16 /* TLB parity error exception */ @@ -563,6 +564,17 @@ #define MIPS_CONF_MT_FTLB (_ULCAST_(4) << 7) #define MIPS_CONF_AR (_ULCAST_(7) << 10) #define MIPS_CONF_AT (_ULCAST_(3) << 13) +#define MIPS_CONF_BE (_ULCAST_(1) << 15) +#define MIPS_CONF_BM (_ULCAST_(1) << 16) +#define MIPS_CONF_MM (_ULCAST_(3) << 17) +#define MIPS_CONF_MM_SYSAD (_ULCAST_(1) << 17) +#define MIPS_CONF_MM_FULL (_ULCAST_(2) << 17) +#define MIPS_CONF_SB (_ULCAST_(1) << 21) +#define MIPS_CONF_UDI (_ULCAST_(1) << 22) +#define MIPS_CONF_DSP (_ULCAST_(1) << 23) +#define MIPS_CONF_ISP (_ULCAST_(1) << 24) +#define MIPS_CONF_KU (_ULCAST_(3) << 25) +#define MIPS_CONF_K23 (_ULCAST_(3) << 28) #define MIPS_CONF_M (_ULCAST_(1) << 31) /* @@ -674,13 +686,38 @@ #define MIPS_CONF5_CV (_ULCAST_(1) << 29) #define MIPS_CONF5_K (_ULCAST_(1) << 30) -#define MIPS_CONF6_SYND (_ULCAST_(1) << 13) +/* Config6 feature bits for proAptiv/P5600 */ + +/* Jump register cache prediction disable */ +#define MIPS_CONF6_MTI_JRCD (_ULCAST_(1) << 0) +/* MIPSr6 extensions enable */ +#define MIPS_CONF6_MTI_R6 (_ULCAST_(1) << 2) +/* IFU Performance Control */ +#define MIPS_CONF6_MTI_IFUPERFCTL (_ULCAST_(3) << 10) +#define MIPS_CONF6_MTI_SYND (_ULCAST_(1) << 13) +/* Sleep state performance counter disable */ +#define MIPS_CONF6_MTI_SPCD (_ULCAST_(1) << 14) /* proAptiv FTLB on/off bit */ -#define MIPS_CONF6_FTLBEN (_ULCAST_(1) << 15) -/* Loongson-3 FTLB on/off bit */ -#define MIPS_CONF6_FTLBDIS (_ULCAST_(1) << 22) +#define MIPS_CONF6_MTI_FTLBEN (_ULCAST_(1) << 15) +/* Disable load/store bonding */ +#define MIPS_CONF6_MTI_DLSB (_ULCAST_(1) << 21) /* FTLB probability bits */ -#define MIPS_CONF6_FTLBP_SHIFT (16) +#define MIPS_CONF6_MTI_FTLBP_SHIFT (16) + +/* Config6 feature bits for Loongson-3 */ + +/* Loongson-3 internal timer bit */ +#define MIPS_CONF6_LOONGSON_INTIMER (_ULCAST_(1) << 6) +/* Loongson-3 external timer bit */ +#define MIPS_CONF6_LOONGSON_EXTIMER (_ULCAST_(1) << 7) +/* Loongson-3 SFB on/off bit, STFill in manual */ +#define MIPS_CONF6_LOONGSON_SFBEN (_ULCAST_(1) << 8) +/* Loongson-3's LL on exclusive cacheline */ +#define MIPS_CONF6_LOONGSON_LLEXC (_ULCAST_(1) << 16) +/* Loongson-3's SC has a random delay */ +#define MIPS_CONF6_LOONGSON_SCRAND (_ULCAST_(1) << 17) +/* Loongson-3 FTLB on/off bit, VTLBOnly in manual */ +#define MIPS_CONF6_LOONGSON_FTLBDIS (_ULCAST_(1) << 22) #define MIPS_CONF7_WII (_ULCAST_(1) << 31) @@ -753,10 +790,18 @@ /* MAAR bit definitions */ #define MIPS_MAAR_VH (_U64CAST_(1) << 63) -#define MIPS_MAAR_ADDR ((BIT_ULL(BITS_PER_LONG - 12) - 1) << 12) +#define MIPS_MAAR_ADDR GENMASK_ULL(55, 12) #define MIPS_MAAR_ADDR_SHIFT 12 #define MIPS_MAAR_S (_ULCAST_(1) << 1) #define MIPS_MAAR_VL (_ULCAST_(1) << 0) +#ifdef CONFIG_XPA +#define MIPS_MAAR_V (MIPS_MAAR_VH | MIPS_MAAR_VL) +#else +#define MIPS_MAAR_V MIPS_MAAR_VL +#endif +#define MIPS_MAARX_VH (_ULCAST_(1) << 31) +#define MIPS_MAARX_ADDR 0xF +#define MIPS_MAARX_ADDR_SHIFT 32 /* MAARI bit definitions */ #define MIPS_MAARI_INDEX (_ULCAST_(0x3f) << 0) @@ -997,6 +1042,8 @@ #define LOONGSON_DIAG_ITLB (_ULCAST_(1) << 2) /* Flush DTLB */ #define LOONGSON_DIAG_DTLB (_ULCAST_(1) << 3) +/* Allow some CACHE instructions (CACHE0, 1, 3, 21 and 23) in user mode */ +#define LOONGSON_DIAG_UCAC (_ULCAST_(1) << 8) /* Flush VTLB */ #define LOONGSON_DIAG_VTLB (_ULCAST_(1) << 12) /* Flush FTLB */ @@ -1717,6 +1764,8 @@ do { \ #define write_c0_lladdr(val) __write_ulong_c0_register($17, 0, val) #define read_c0_maar() __read_ulong_c0_register($17, 1) #define write_c0_maar(val) __write_ulong_c0_register($17, 1, val) +#define readx_c0_maar() __readx_32bit_c0_register($17, 1) +#define writex_c0_maar(val) __writex_32bit_c0_register($17, 1, val) #define read_c0_maari() __read_32bit_c0_register($17, 2) #define write_c0_maari(val) __write_32bit_c0_register($17, 2, val) diff --git a/arch/mips/include/asm/nile4.h b/arch/mips/include/asm/nile4.h deleted file mode 100644 index 9d36b7823603..000000000000 --- a/arch/mips/include/asm/nile4.h +++ /dev/null @@ -1,310 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * asm-mips/nile4.h -- NEC Vrc-5074 Nile 4 definitions - * - * Copyright (C) 2000 Geert Uytterhoeven <geert@linux-m68k.org> - * Sony Software Development Center Europe (SDCE), Brussels - * - * This file is based on the following documentation: - * - * NEC Vrc 5074 System Controller Data Sheet, June 1998 - */ - -#ifndef _ASM_NILE4_H -#define _ASM_NILE4_H - -#define NILE4_BASE 0xbfa00000 -#define NILE4_SIZE 0x00200000 /* 2 MB */ - - - /* - * Physical Device Address Registers (PDARs) - */ - -#define NILE4_SDRAM0 0x0000 /* SDRAM Bank 0 [R/W] */ -#define NILE4_SDRAM1 0x0008 /* SDRAM Bank 1 [R/W] */ -#define NILE4_DCS2 0x0010 /* Device Chip-Select 2 [R/W] */ -#define NILE4_DCS3 0x0018 /* Device Chip-Select 3 [R/W] */ -#define NILE4_DCS4 0x0020 /* Device Chip-Select 4 [R/W] */ -#define NILE4_DCS5 0x0028 /* Device Chip-Select 5 [R/W] */ -#define NILE4_DCS6 0x0030 /* Device Chip-Select 6 [R/W] */ -#define NILE4_DCS7 0x0038 /* Device Chip-Select 7 [R/W] */ -#define NILE4_DCS8 0x0040 /* Device Chip-Select 8 [R/W] */ -#define NILE4_PCIW0 0x0060 /* PCI Address Window 0 [R/W] */ -#define NILE4_PCIW1 0x0068 /* PCI Address Window 1 [R/W] */ -#define NILE4_INTCS 0x0070 /* Controller Internal Registers and Devices */ - /* [R/W] */ -#define NILE4_BOOTCS 0x0078 /* Boot ROM Chip-Select [R/W] */ - - - /* - * CPU Interface Registers - */ - -#define NILE4_CPUSTAT 0x0080 /* CPU Status [R/W] */ -#define NILE4_INTCTRL 0x0088 /* Interrupt Control [R/W] */ -#define NILE4_INTSTAT0 0x0090 /* Interrupt Status 0 [R] */ -#define NILE4_INTSTAT1 0x0098 /* Interrupt Status 1 and CPU Interrupt */ - /* Enable [R/W] */ -#define NILE4_INTCLR 0x00A0 /* Interrupt Clear [R/W] */ -#define NILE4_INTPPES 0x00A8 /* PCI Interrupt Control [R/W] */ - - - /* - * Memory-Interface Registers - */ - -#define NILE4_MEMCTRL 0x00C0 /* Memory Control */ -#define NILE4_ACSTIME 0x00C8 /* Memory Access Timing [R/W] */ -#define NILE4_CHKERR 0x00D0 /* Memory Check Error Status [R] */ - - - /* - * PCI-Bus Registers - */ - -#define NILE4_PCICTRL 0x00E0 /* PCI Control [R/W] */ -#define NILE4_PCIARB 0x00E8 /* PCI Arbiter [R/W] */ -#define NILE4_PCIINIT0 0x00F0 /* PCI Master (Initiator) 0 [R/W] */ -#define NILE4_PCIINIT1 0x00F8 /* PCI Master (Initiator) 1 [R/W] */ -#define NILE4_PCIERR 0x00B8 /* PCI Error [R/W] */ - - - /* - * Local-Bus Registers - */ - -#define NILE4_LCNFG 0x0100 /* Local Bus Configuration [R/W] */ -#define NILE4_LCST2 0x0110 /* Local Bus Chip-Select Timing 2 [R/W] */ -#define NILE4_LCST3 0x0118 /* Local Bus Chip-Select Timing 3 [R/W] */ -#define NILE4_LCST4 0x0120 /* Local Bus Chip-Select Timing 4 [R/W] */ -#define NILE4_LCST5 0x0128 /* Local Bus Chip-Select Timing 5 [R/W] */ -#define NILE4_LCST6 0x0130 /* Local Bus Chip-Select Timing 6 [R/W] */ -#define NILE4_LCST7 0x0138 /* Local Bus Chip-Select Timing 7 [R/W] */ -#define NILE4_LCST8 0x0140 /* Local Bus Chip-Select Timing 8 [R/W] */ -#define NILE4_DCSFN 0x0150 /* Device Chip-Select Muxing and Output */ - /* Enables [R/W] */ -#define NILE4_DCSIO 0x0158 /* Device Chip-Selects As I/O Bits [R/W] */ -#define NILE4_BCST 0x0178 /* Local Boot Chip-Select Timing [R/W] */ - - - /* - * DMA Registers - */ - -#define NILE4_DMACTRL0 0x0180 /* DMA Control 0 [R/W] */ -#define NILE4_DMASRCA0 0x0188 /* DMA Source Address 0 [R/W] */ -#define NILE4_DMADESA0 0x0190 /* DMA Destination Address 0 [R/W] */ -#define NILE4_DMACTRL1 0x0198 /* DMA Control 1 [R/W] */ -#define NILE4_DMASRCA1 0x01A0 /* DMA Source Address 1 [R/W] */ -#define NILE4_DMADESA1 0x01A8 /* DMA Destination Address 1 [R/W] */ - - - /* - * Timer Registers - */ - -#define NILE4_T0CTRL 0x01C0 /* SDRAM Refresh Control [R/W] */ -#define NILE4_T0CNTR 0x01C8 /* SDRAM Refresh Counter [R/W] */ -#define NILE4_T1CTRL 0x01D0 /* CPU-Bus Read Time-Out Control [R/W] */ -#define NILE4_T1CNTR 0x01D8 /* CPU-Bus Read Time-Out Counter [R/W] */ -#define NILE4_T2CTRL 0x01E0 /* General-Purpose Timer Control [R/W] */ -#define NILE4_T2CNTR 0x01E8 /* General-Purpose Timer Counter [R/W] */ -#define NILE4_T3CTRL 0x01F0 /* Watchdog Timer Control [R/W] */ -#define NILE4_T3CNTR 0x01F8 /* Watchdog Timer Counter [R/W] */ - - - /* - * PCI Configuration Space Registers - */ - -#define NILE4_PCI_BASE 0x0200 - -#define NILE4_VID 0x0200 /* PCI Vendor ID [R] */ -#define NILE4_DID 0x0202 /* PCI Device ID [R] */ -#define NILE4_PCICMD 0x0204 /* PCI Command [R/W] */ -#define NILE4_PCISTS 0x0206 /* PCI Status [R/W] */ -#define NILE4_REVID 0x0208 /* PCI Revision ID [R] */ -#define NILE4_CLASS 0x0209 /* PCI Class Code [R] */ -#define NILE4_CLSIZ 0x020C /* PCI Cache Line Size [R/W] */ -#define NILE4_MLTIM 0x020D /* PCI Latency Timer [R/W] */ -#define NILE4_HTYPE 0x020E /* PCI Header Type [R] */ -#define NILE4_BIST 0x020F /* BIST [R] (unimplemented) */ -#define NILE4_BARC 0x0210 /* PCI Base Address Register Control [R/W] */ -#define NILE4_BAR0 0x0218 /* PCI Base Address Register 0 [R/W] */ -#define NILE4_BAR1 0x0220 /* PCI Base Address Register 1 [R/W] */ -#define NILE4_CIS 0x0228 /* PCI Cardbus CIS Pointer [R] */ - /* (unimplemented) */ -#define NILE4_SSVID 0x022C /* PCI Sub-System Vendor ID [R/W] */ -#define NILE4_SSID 0x022E /* PCI Sub-System ID [R/W] */ -#define NILE4_ROM 0x0230 /* Expansion ROM Base Address [R] */ - /* (unimplemented) */ -#define NILE4_INTLIN 0x023C /* PCI Interrupt Line [R/W] */ -#define NILE4_INTPIN 0x023D /* PCI Interrupt Pin [R] */ -#define NILE4_MINGNT 0x023E /* PCI Min_Gnt [R] (unimplemented) */ -#define NILE4_MAXLAT 0x023F /* PCI Max_Lat [R] (unimplemented) */ -#define NILE4_BAR2 0x0240 /* PCI Base Address Register 2 [R/W] */ -#define NILE4_BAR3 0x0248 /* PCI Base Address Register 3 [R/W] */ -#define NILE4_BAR4 0x0250 /* PCI Base Address Register 4 [R/W] */ -#define NILE4_BAR5 0x0258 /* PCI Base Address Register 5 [R/W] */ -#define NILE4_BAR6 0x0260 /* PCI Base Address Register 6 [R/W] */ -#define NILE4_BAR7 0x0268 /* PCI Base Address Register 7 [R/W] */ -#define NILE4_BAR8 0x0270 /* PCI Base Address Register 8 [R/W] */ -#define NILE4_BARB 0x0278 /* PCI Base Address Register BOOT [R/W] */ - - - /* - * Serial-Port Registers - */ - -#define NILE4_UART_BASE 0x0300 - -#define NILE4_UARTRBR 0x0300 /* UART Receiver Data Buffer [R] */ -#define NILE4_UARTTHR 0x0300 /* UART Transmitter Data Holding [W] */ -#define NILE4_UARTIER 0x0308 /* UART Interrupt Enable [R/W] */ -#define NILE4_UARTDLL 0x0300 /* UART Divisor Latch LSB [R/W] */ -#define NILE4_UARTDLM 0x0308 /* UART Divisor Latch MSB [R/W] */ -#define NILE4_UARTIIR 0x0310 /* UART Interrupt ID [R] */ -#define NILE4_UARTFCR 0x0310 /* UART FIFO Control [W] */ -#define NILE4_UARTLCR 0x0318 /* UART Line Control [R/W] */ -#define NILE4_UARTMCR 0x0320 /* UART Modem Control [R/W] */ -#define NILE4_UARTLSR 0x0328 /* UART Line Status [R/W] */ -#define NILE4_UARTMSR 0x0330 /* UART Modem Status [R/W] */ -#define NILE4_UARTSCR 0x0338 /* UART Scratch [R/W] */ - -#define NILE4_UART_BASE_BAUD 520833 /* 100 MHz / 12 / 16 */ - - - /* - * Interrupt Lines - */ - -#define NILE4_INT_CPCE 0 /* CPU-Interface Parity-Error Interrupt */ -#define NILE4_INT_CNTD 1 /* CPU No-Target Decode Interrupt */ -#define NILE4_INT_MCE 2 /* Memory-Check Error Interrupt */ -#define NILE4_INT_DMA 3 /* DMA Controller Interrupt */ -#define NILE4_INT_UART 4 /* UART Interrupt */ -#define NILE4_INT_WDOG 5 /* Watchdog Timer Interrupt */ -#define NILE4_INT_GPT 6 /* General-Purpose Timer Interrupt */ -#define NILE4_INT_LBRTD 7 /* Local-Bus Ready Timer Interrupt */ -#define NILE4_INT_INTA 8 /* PCI Interrupt Signal INTA# */ -#define NILE4_INT_INTB 9 /* PCI Interrupt Signal INTB# */ -#define NILE4_INT_INTC 10 /* PCI Interrupt Signal INTC# */ -#define NILE4_INT_INTD 11 /* PCI Interrupt Signal INTD# */ -#define NILE4_INT_INTE 12 /* PCI Interrupt Signal INTE# (ISA cascade) */ -#define NILE4_INT_RESV 13 /* Reserved */ -#define NILE4_INT_PCIS 14 /* PCI SERR# Interrupt */ -#define NILE4_INT_PCIE 15 /* PCI Internal Error Interrupt */ - - - /* - * Nile 4 Register Access - */ - -static inline void nile4_sync(void) -{ - volatile u32 *p = (volatile u32 *)0xbfc00000; - (void)(*p); -} - -static inline void nile4_out32(u32 offset, u32 val) -{ - *(volatile u32 *)(NILE4_BASE+offset) = val; - nile4_sync(); -} - -static inline u32 nile4_in32(u32 offset) -{ - u32 val = *(volatile u32 *)(NILE4_BASE+offset); - nile4_sync(); - return val; -} - -static inline void nile4_out16(u32 offset, u16 val) -{ - *(volatile u16 *)(NILE4_BASE+offset) = val; - nile4_sync(); -} - -static inline u16 nile4_in16(u32 offset) -{ - u16 val = *(volatile u16 *)(NILE4_BASE+offset); - nile4_sync(); - return val; -} - -static inline void nile4_out8(u32 offset, u8 val) -{ - *(volatile u8 *)(NILE4_BASE+offset) = val; - nile4_sync(); -} - -static inline u8 nile4_in8(u32 offset) -{ - u8 val = *(volatile u8 *)(NILE4_BASE+offset); - nile4_sync(); - return val; -} - - - /* - * Physical Device Address Registers - */ - -extern void nile4_set_pdar(u32 pdar, u32 phys, u32 size, int width, - int on_memory_bus, int visible); - - - /* - * PCI Master Registers - */ - -#define NILE4_PCICMD_IACK 0 /* PCI Interrupt Acknowledge */ -#define NILE4_PCICMD_IO 1 /* PCI I/O Space */ -#define NILE4_PCICMD_MEM 3 /* PCI Memory Space */ -#define NILE4_PCICMD_CFG 5 /* PCI Configuration Space */ - - - /* - * PCI Address Spaces - * - * Note that these are multiplexed using PCIINIT[01]! - */ - -#define NILE4_PCI_IO_BASE 0xa6000000 -#define NILE4_PCI_MEM_BASE 0xa8000000 -#define NILE4_PCI_CFG_BASE NILE4_PCI_MEM_BASE -#define NILE4_PCI_IACK_BASE NILE4_PCI_IO_BASE - - -extern void nile4_set_pmr(u32 pmr, u32 type, u32 addr); - - - /* - * Interrupt Programming - */ - -#define NUM_I8259_INTERRUPTS 16 -#define NUM_NILE4_INTERRUPTS 16 - -#define IRQ_I8259_CASCADE NILE4_INT_INTE -#define is_i8259_irq(irq) ((irq) < NUM_I8259_INTERRUPTS) -#define nile4_to_irq(n) ((n)+NUM_I8259_INTERRUPTS) -#define irq_to_nile4(n) ((n)-NUM_I8259_INTERRUPTS) - -extern void nile4_map_irq(int nile4_irq, int cpu_irq); -extern void nile4_map_irq_all(int cpu_irq); -extern void nile4_enable_irq(unsigned int nile4_irq); -extern void nile4_disable_irq(unsigned int nile4_irq); -extern void nile4_disable_irq_all(void); -extern u16 nile4_get_irq_stat(int cpu_irq); -extern void nile4_enable_irq_output(int cpu_irq); -extern void nile4_disable_irq_output(int cpu_irq); -extern void nile4_set_pci_irq_polarity(int pci_irq, int high); -extern void nile4_set_pci_irq_level_or_edge(int pci_irq, int level); -extern void nile4_clear_irq(int nile4_irq); -extern void nile4_clear_irq_mask(u32 mask); -extern u8 nile4_i8259_iack(void); -extern void nile4_dump_irq_status(void); /* Debug */ - -#endif diff --git a/arch/mips/include/asm/octeon/cvmx-sli-defs.h b/arch/mips/include/asm/octeon/cvmx-sli-defs.h index cbc7cdae1c6a..5ef6c38150f5 100644 --- a/arch/mips/include/asm/octeon/cvmx-sli-defs.h +++ b/arch/mips/include/asm/octeon/cvmx-sli-defs.h @@ -46,7 +46,7 @@ static inline uint64_t CVMX_SLI_PCIE_MSI_RCV_FUNC(void) case OCTEON_CN78XX & OCTEON_FAMILY_MASK: if (OCTEON_IS_MODEL(OCTEON_CN78XX_PASS1_X)) return 0x0000000000003CB0ull; - /* Else, fall through */ + fallthrough; default: return 0x0000000000023CB0ull; } diff --git a/arch/mips/include/asm/page.h b/arch/mips/include/asm/page.h index e2f503fc7a84..6a77bc4a6eec 100644 --- a/arch/mips/include/asm/page.h +++ b/arch/mips/include/asm/page.h @@ -49,7 +49,7 @@ static inline unsigned int page_size_ftlb(unsigned int mmuextdef) return 6; if (PAGE_SIZE > (256 << 10)) return 7; /* reserved */ - /* fall through */ + fallthrough; case MIPS_CONF4_MMUEXTDEF_VTLBSIZEEXT: return (PAGE_SHIFT - 10) / 2; default: diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index f92716cfa4f4..ee5dc0c145b9 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -172,6 +172,8 @@ extern pte_t invalid_pte_table[PTRS_PER_PTE]; +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + #ifndef __PAGETABLE_PUD_FOLDED /* * For 4-level pagetables we defines these ourselves, for 3-level the @@ -210,8 +212,6 @@ static inline void p4d_clear(p4d_t *p4dp) p4d_val(*p4dp) = (unsigned long)invalid_pud_table; } -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) - static inline unsigned long p4d_page_vaddr(p4d_t p4d) { return p4d_val(p4d); diff --git a/arch/mips/include/asm/pgtable-bits.h b/arch/mips/include/asm/pgtable-bits.h index 4da79b85c179..e26dc41a8a68 100644 --- a/arch/mips/include/asm/pgtable-bits.h +++ b/arch/mips/include/asm/pgtable-bits.h @@ -55,6 +55,9 @@ enum pgtable_bits { #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) _PAGE_SPECIAL_SHIFT, #endif +#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY) + _PAGE_SOFT_DIRTY_SHIFT, +#endif }; /* @@ -84,6 +87,9 @@ enum pgtable_bits { #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) _PAGE_SPECIAL_SHIFT, #endif +#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY) + _PAGE_SOFT_DIRTY_SHIFT, +#endif }; #elif defined(CONFIG_CPU_R3K_TLB) @@ -99,6 +105,9 @@ enum pgtable_bits { #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) _PAGE_SPECIAL_SHIFT, #endif +#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY) + _PAGE_SOFT_DIRTY_SHIFT, +#endif /* Used by TLB hardware (placed in EntryLo) */ _PAGE_GLOBAL_SHIFT = 8, @@ -125,7 +134,9 @@ enum pgtable_bits { #if defined(CONFIG_ARCH_HAS_PTE_SPECIAL) _PAGE_SPECIAL_SHIFT, #endif - +#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY) + _PAGE_SOFT_DIRTY_SHIFT, +#endif /* Used by TLB hardware (placed in EntryLo*) */ #if defined(CONFIG_CPU_HAS_RIXI) _PAGE_NO_EXEC_SHIFT, @@ -152,6 +163,11 @@ enum pgtable_bits { #else # define _PAGE_SPECIAL 0 #endif +#if defined(CONFIG_HAVE_ARCH_SOFT_DIRTY) +# define _PAGE_SOFT_DIRTY (1 << _PAGE_SOFT_DIRTY_SHIFT) +#else +# define _PAGE_SOFT_DIRTY 0 +#endif /* Used by TLB hardware (placed in EntryLo*) */ #if defined(CONFIG_XPA) @@ -269,6 +285,6 @@ static inline uint64_t pte_to_entrylo(unsigned long pte_val) #define __WRITEABLE (_PAGE_SILENT_WRITE | _PAGE_WRITE | _PAGE_MODIFIED) #define _PAGE_CHG_MASK (_PAGE_ACCESSED | _PAGE_MODIFIED | \ - _PFN_MASK | _CACHE_MASK) + _PAGE_SOFT_DIRTY | _PFN_MASK | _CACHE_MASK) #endif /* _ASM_PGTABLE_BITS_H */ diff --git a/arch/mips/include/asm/pgtable.h b/arch/mips/include/asm/pgtable.h index f1801e7a4b15..85b39c9fd09e 100644 --- a/arch/mips/include/asm/pgtable.h +++ b/arch/mips/include/asm/pgtable.h @@ -400,7 +400,7 @@ static inline pte_t pte_mkwrite(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - pte_val(pte) |= _PAGE_MODIFIED; + pte_val(pte) |= _PAGE_MODIFIED | _PAGE_SOFT_DIRTY; if (pte_val(pte) & _PAGE_WRITE) pte_val(pte) |= _PAGE_SILENT_WRITE; return pte; @@ -414,6 +414,8 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } +#define pte_sw_mkyoung pte_mkyoung + #ifdef CONFIG_MIPS_HUGE_TLB_SUPPORT static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_HUGE; } @@ -423,6 +425,30 @@ static inline pte_t pte_mkhuge(pte_t pte) return pte; } #endif /* CONFIG_MIPS_HUGE_TLB_SUPPORT */ + +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline bool pte_soft_dirty(pte_t pte) +{ + return pte_val(pte) & _PAGE_SOFT_DIRTY; +} +#define pte_swp_soft_dirty pte_soft_dirty + +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + pte_val(pte) |= _PAGE_SOFT_DIRTY; + return pte; +} +#define pte_swp_mksoft_dirty pte_mksoft_dirty + +static inline pte_t pte_clear_soft_dirty(pte_t pte) +{ + pte_val(pte) &= ~(_PAGE_SOFT_DIRTY); + return pte; +} +#define pte_swp_clear_soft_dirty pte_clear_soft_dirty + +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + #endif /* @@ -454,6 +480,31 @@ static inline pgprot_t pgprot_writecombine(pgprot_t _prot) return __pgprot(prot); } +static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma, + unsigned long address) +{ +} + +#define __HAVE_ARCH_PTE_SAME +static inline int pte_same(pte_t pte_a, pte_t pte_b) +{ + return pte_val(pte_a) == pte_val(pte_b); +} + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +static inline int ptep_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pte_t *ptep, + pte_t entry, int dirty) +{ + if (!pte_same(*ptep, entry)) + set_pte_at(vma->vm_mm, address, ptep, entry); + /* + * update_mmu_cache will unconditionally execute, handling both + * the case that the PTE changed and the spurious fault case. + */ + return true; +} + /* * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. @@ -481,8 +532,11 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #else static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | - (pgprot_val(newprot) & ~_PAGE_CHG_MASK)); + pte_val(pte) &= _PAGE_CHG_MASK; + pte_val(pte) |= pgprot_val(newprot) & ~_PAGE_CHG_MASK; + if ((pte_val(pte) & _PAGE_ACCESSED) && !(pte_val(pte) & _PAGE_NO_READ)) + pte_val(pte) |= _PAGE_SILENT_READ; + return pte; } #endif @@ -497,6 +551,9 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, __update_tlb(vma, address, pte); } +#define __HAVE_ARCH_UPDATE_MMU_TLB +#define update_mmu_tlb update_mmu_cache + static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { @@ -507,20 +564,17 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, #define kern_addr_valid(addr) (1) -#ifdef CONFIG_PHYS_ADDR_T_64BIT -extern int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot); - -static inline int io_remap_pfn_range(struct vm_area_struct *vma, - unsigned long vaddr, - unsigned long pfn, - unsigned long size, - pgprot_t prot) -{ - phys_addr_t phys_addr_high = fixup_bigphys_addr(pfn << PAGE_SHIFT, size); - return remap_pfn_range(vma, vaddr, phys_addr_high >> PAGE_SHIFT, size, prot); -} +/* + * Allow physical addresses to be fixed up to help 36-bit peripherals. + */ +#ifdef CONFIG_MIPS_FIXUP_BIGPHYS_ADDR +phys_addr_t fixup_bigphys_addr(phys_addr_t addr, phys_addr_t size); +int io_remap_pfn_range(struct vm_area_struct *vma, unsigned long vaddr, + unsigned long pfn, unsigned long size, pgprot_t prot); #define io_remap_pfn_range io_remap_pfn_range -#endif +#else +#define fixup_bigphys_addr(addr, size) (addr) +#endif /* CONFIG_MIPS_FIXUP_BIGPHYS_ADDR */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -579,7 +633,7 @@ static inline pmd_t pmd_mkclean(pmd_t pmd) static inline pmd_t pmd_mkdirty(pmd_t pmd) { - pmd_val(pmd) |= _PAGE_MODIFIED; + pmd_val(pmd) |= _PAGE_MODIFIED | _PAGE_SOFT_DIRTY; if (pmd_val(pmd) & _PAGE_WRITE) pmd_val(pmd) |= _PAGE_SILENT_WRITE; @@ -608,6 +662,26 @@ static inline pmd_t pmd_mkyoung(pmd_t pmd) return pmd; } +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline int pmd_soft_dirty(pmd_t pmd) +{ + return !!(pmd_val(pmd) & _PAGE_SOFT_DIRTY); +} + +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) +{ + pmd_val(pmd) |= _PAGE_SOFT_DIRTY; + return pmd; +} + +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) +{ + pmd_val(pmd) &= ~(_PAGE_SOFT_DIRTY); + return pmd; +} + +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + /* Extern to avoid header file madness */ extern pmd_t mk_pmd(struct page *page, pgprot_t prot); diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index 7990c1c70471..5d9ff61004ca 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -125,7 +125,7 @@ static inline void arch_send_call_function_single_ipi(int cpu) { extern const struct plat_smp_ops *mp_ops; /* private */ - mp_ops->send_ipi_mask(cpumask_of(cpu), SMP_CALL_FUNCTION); + mp_ops->send_ipi_single(cpu, SMP_CALL_FUNCTION); } static inline void arch_send_call_function_ipi_mask(const struct cpumask *mask) diff --git a/arch/mips/include/asm/stackframe.h b/arch/mips/include/asm/stackframe.h index 4d6ad907ae54..3e8d2aaf96af 100644 --- a/arch/mips/include/asm/stackframe.h +++ b/arch/mips/include/asm/stackframe.h @@ -424,7 +424,7 @@ .macro RESTORE_SP_AND_RET docfi=0 RESTORE_SP \docfi -#ifdef CONFIG_CPU_MIPSR6 +#if defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) eretnc #else .set push diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index 09cbe9042828..0b0a93bf83cd 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -67,11 +67,11 @@ do { \ #endif /* - * Clear LLBit during context switches on MIPSr6 such that eretnc can be used + * Clear LLBit during context switches on MIPSr5+ such that eretnc can be used * unconditionally when returning to userland in entry.S. */ -#define __clear_r6_hw_ll_bit() do { \ - if (cpu_has_mips_r6) \ +#define __clear_r5_hw_ll_bit() do { \ + if (cpu_has_mips_r5 || cpu_has_mips_r6) \ write_c0_lladdr(0); \ } while (0) @@ -129,7 +129,7 @@ do { \ } \ clear_c0_status(ST0_CU2); \ } \ - __clear_r6_hw_ll_bit(); \ + __clear_r5_hw_ll_bit(); \ __clear_software_ll_bit(); \ if (cpu_has_userlocal) \ write_c0_userlocal(task_thread_info(next)->tp_value); \ diff --git a/arch/mips/include/asm/unaligned-emul.h b/arch/mips/include/asm/unaligned-emul.h new file mode 100644 index 000000000000..2022b18944b9 --- /dev/null +++ b/arch/mips/include/asm/unaligned-emul.h @@ -0,0 +1,779 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_MIPS_UNALIGNED_EMUL_H +#define _ASM_MIPS_UNALIGNED_EMUL_H + +#include <asm/asm.h> + +#ifdef __BIG_ENDIAN +#define _LoadHW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ (".set\tnoat\n" \ + "1:\t"type##_lb("%0", "0(%2)")"\n" \ + "2:\t"type##_lbu("$1", "1(%2)")"\n\t"\ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + "3:\t.set\tat\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _LoadW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_lwl("%0", "(%2)")"\n" \ + "2:\t"type##_lwr("%0", "3(%2)")"\n\t"\ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +/* For CPUs without lwl instruction */ +#define _LoadW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n" \ + ".set\tnoat\n\t" \ + "1:"type##_lb("%0", "0(%2)")"\n\t" \ + "2:"type##_lbu("$1", "1(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:"type##_lbu("$1", "2(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:"type##_lbu("$1", "3(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ + +#define _LoadHWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tnoat\n" \ + "1:\t"type##_lbu("%0", "0(%2)")"\n" \ + "2:\t"type##_lbu("$1", "1(%2)")"\n\t"\ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".set\tat\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _LoadWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_lwl("%0", "(%2)")"\n" \ + "2:\t"type##_lwr("%0", "3(%2)")"\n\t"\ + "dsll\t%0, %0, 32\n\t" \ + "dsrl\t%0, %0, 32\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + "\t.section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _LoadDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tldl\t%0, (%2)\n" \ + "2:\tldr\t%0, 7(%2)\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + "\t.section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +/* For CPUs without lwl and ldl instructions */ +#define _LoadWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:"type##_lbu("%0", "0(%2)")"\n\t" \ + "2:"type##_lbu("$1", "1(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:"type##_lbu("$1", "2(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:"type##_lbu("$1", "3(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _LoadDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:lb\t%0, 0(%2)\n\t" \ + "2:lbu\t $1, 1(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:lbu\t$1, 2(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:lbu\t$1, 3(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "5:lbu\t$1, 4(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "6:lbu\t$1, 5(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "7:lbu\t$1, 6(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "8:lbu\t$1, 7(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n\t" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR)"\t5b, 11b\n\t" \ + STR(PTR)"\t6b, 11b\n\t" \ + STR(PTR)"\t7b, 11b\n\t" \ + STR(PTR)"\t8b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ + + +#define _StoreHW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tnoat\n" \ + "1:\t"type##_sb("%1", "1(%2)")"\n" \ + "srl\t$1, %1, 0x8\n" \ + "2:\t"type##_sb("$1", "0(%2)")"\n" \ + ".set\tat\n\t" \ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT));\ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _StoreW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_swl("%1", "(%2)")"\n" \ + "2:\t"type##_swr("%1", "3(%2)")"\n\t"\ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _StoreDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tsdl\t%1,(%2)\n" \ + "2:\tsdr\t%1, 7(%2)\n\t" \ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +#define _StoreW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:"type##_sb("%1", "3(%2)")"\n\t" \ + "srl\t$1, %1, 0x8\n\t" \ + "2:"type##_sb("$1", "2(%2)")"\n\t" \ + "srl\t$1, $1, 0x8\n\t" \ + "3:"type##_sb("$1", "1(%2)")"\n\t" \ + "srl\t$1, $1, 0x8\n\t" \ + "4:"type##_sb("$1", "0(%2)")"\n\t" \ + ".set\tpop\n\t" \ + "li\t%0, 0\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%0, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT) \ + : "memory"); \ +} while (0) + +#define _StoreDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:sb\t%1, 7(%2)\n\t" \ + "dsrl\t$1, %1, 0x8\n\t" \ + "2:sb\t$1, 6(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "3:sb\t$1, 5(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "4:sb\t$1, 4(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "5:sb\t$1, 3(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "6:sb\t$1, 2(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "7:sb\t$1, 1(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "8:sb\t$1, 0(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + ".set\tpop\n\t" \ + "li\t%0, 0\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%0, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR)"\t5b, 11b\n\t" \ + STR(PTR)"\t6b, 11b\n\t" \ + STR(PTR)"\t7b, 11b\n\t" \ + STR(PTR)"\t8b, 11b\n\t" \ + ".previous" \ + : "=&r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT) \ + : "memory"); \ +} while (0) + +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ + +#else /* __BIG_ENDIAN */ + +#define _LoadHW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ (".set\tnoat\n" \ + "1:\t"type##_lb("%0", "1(%2)")"\n" \ + "2:\t"type##_lbu("$1", "0(%2)")"\n\t"\ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + "3:\t.set\tat\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _LoadW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_lwl("%0", "3(%2)")"\n" \ + "2:\t"type##_lwr("%0", "(%2)")"\n\t"\ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +/* For CPUs without lwl instruction */ +#define _LoadW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n" \ + ".set\tnoat\n\t" \ + "1:"type##_lb("%0", "3(%2)")"\n\t" \ + "2:"type##_lbu("$1", "2(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:"type##_lbu("$1", "1(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:"type##_lbu("$1", "0(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ + + +#define _LoadHWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tnoat\n" \ + "1:\t"type##_lbu("%0", "1(%2)")"\n" \ + "2:\t"type##_lbu("$1", "0(%2)")"\n\t"\ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".set\tat\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _LoadWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_lwl("%0", "3(%2)")"\n" \ + "2:\t"type##_lwr("%0", "(%2)")"\n\t"\ + "dsll\t%0, %0, 32\n\t" \ + "dsrl\t%0, %0, 32\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + "\t.section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _LoadDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tldl\t%0, 7(%2)\n" \ + "2:\tldr\t%0, (%2)\n\t" \ + "li\t%1, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + "\t.section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%1, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +/* For CPUs without lwl and ldl instructions */ +#define _LoadWU(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:"type##_lbu("%0", "3(%2)")"\n\t" \ + "2:"type##_lbu("$1", "2(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:"type##_lbu("$1", "1(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:"type##_lbu("$1", "0(%2)")"\n\t" \ + "sll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _LoadDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:lb\t%0, 7(%2)\n\t" \ + "2:lbu\t$1, 6(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "3:lbu\t$1, 5(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "4:lbu\t$1, 4(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "5:lbu\t$1, 3(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "6:lbu\t$1, 2(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "7:lbu\t$1, 1(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "8:lbu\t$1, 0(%2)\n\t" \ + "dsll\t%0, 0x8\n\t" \ + "or\t%0, $1\n\t" \ + "li\t%1, 0\n" \ + ".set\tpop\n\t" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%1, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR)"\t5b, 11b\n\t" \ + STR(PTR)"\t6b, 11b\n\t" \ + STR(PTR)"\t7b, 11b\n\t" \ + STR(PTR)"\t8b, 11b\n\t" \ + ".previous" \ + : "=&r" (value), "=r" (res) \ + : "r" (addr), "i" (-EFAULT)); \ +} while (0) +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ + +#define _StoreHW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tnoat\n" \ + "1:\t"type##_sb("%1", "0(%2)")"\n" \ + "srl\t$1,%1, 0x8\n" \ + "2:\t"type##_sb("$1", "1(%2)")"\n" \ + ".set\tat\n\t" \ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT));\ +} while (0) + +#ifndef CONFIG_CPU_NO_LOAD_STORE_LR +#define _StoreW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + "1:\t"type##_swl("%1", "3(%2)")"\n" \ + "2:\t"type##_swr("%1", "(%2)")"\n\t"\ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#define _StoreDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + "1:\tsdl\t%1, 7(%2)\n" \ + "2:\tsdr\t%1, (%2)\n\t" \ + "li\t%0, 0\n" \ + "3:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "4:\tli\t%0, %3\n\t" \ + "j\t3b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 4b\n\t" \ + STR(PTR)"\t2b, 4b\n\t" \ + ".previous" \ + : "=r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT)); \ +} while (0) + +#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ +/* For CPUs without swl and sdl instructions */ +#define _StoreW(addr, value, res, type) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:"type##_sb("%1", "0(%2)")"\n\t" \ + "srl\t$1, %1, 0x8\n\t" \ + "2:"type##_sb("$1", "1(%2)")"\n\t" \ + "srl\t$1, $1, 0x8\n\t" \ + "3:"type##_sb("$1", "2(%2)")"\n\t" \ + "srl\t$1, $1, 0x8\n\t" \ + "4:"type##_sb("$1", "3(%2)")"\n\t" \ + ".set\tpop\n\t" \ + "li\t%0, 0\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%0, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + ".previous" \ + : "=&r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT) \ + : "memory"); \ +} while (0) + +#define _StoreDW(addr, value, res) \ +do { \ + __asm__ __volatile__ ( \ + ".set\tpush\n\t" \ + ".set\tnoat\n\t" \ + "1:sb\t%1, 0(%2)\n\t" \ + "dsrl\t$1, %1, 0x8\n\t" \ + "2:sb\t$1, 1(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "3:sb\t$1, 2(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "4:sb\t$1, 3(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "5:sb\t$1, 4(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "6:sb\t$1, 5(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "7:sb\t$1, 6(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + "8:sb\t$1, 7(%2)\n\t" \ + "dsrl\t$1, $1, 0x8\n\t" \ + ".set\tpop\n\t" \ + "li\t%0, 0\n" \ + "10:\n\t" \ + ".insn\n\t" \ + ".section\t.fixup,\"ax\"\n\t" \ + "11:\tli\t%0, %3\n\t" \ + "j\t10b\n\t" \ + ".previous\n\t" \ + ".section\t__ex_table,\"a\"\n\t" \ + STR(PTR)"\t1b, 11b\n\t" \ + STR(PTR)"\t2b, 11b\n\t" \ + STR(PTR)"\t3b, 11b\n\t" \ + STR(PTR)"\t4b, 11b\n\t" \ + STR(PTR)"\t5b, 11b\n\t" \ + STR(PTR)"\t6b, 11b\n\t" \ + STR(PTR)"\t7b, 11b\n\t" \ + STR(PTR)"\t8b, 11b\n\t" \ + ".previous" \ + : "=&r" (res) \ + : "r" (value), "r" (addr), "i" (-EFAULT) \ + : "memory"); \ +} while (0) + +#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ +#endif + +#define LoadHWU(addr, value, res) _LoadHWU(addr, value, res, kernel) +#define LoadHWUE(addr, value, res) _LoadHWU(addr, value, res, user) +#define LoadWU(addr, value, res) _LoadWU(addr, value, res, kernel) +#define LoadWUE(addr, value, res) _LoadWU(addr, value, res, user) +#define LoadHW(addr, value, res) _LoadHW(addr, value, res, kernel) +#define LoadHWE(addr, value, res) _LoadHW(addr, value, res, user) +#define LoadW(addr, value, res) _LoadW(addr, value, res, kernel) +#define LoadWE(addr, value, res) _LoadW(addr, value, res, user) +#define LoadDW(addr, value, res) _LoadDW(addr, value, res) + +#define StoreHW(addr, value, res) _StoreHW(addr, value, res, kernel) +#define StoreHWE(addr, value, res) _StoreHW(addr, value, res, user) +#define StoreW(addr, value, res) _StoreW(addr, value, res, kernel) +#define StoreWE(addr, value, res) _StoreW(addr, value, res, user) +#define StoreDW(addr, value, res) _StoreDW(addr, value, res) + +#endif /* _ASM_MIPS_UNALIGNED_EMUL_H */ diff --git a/arch/mips/include/asm/vermagic.h b/arch/mips/include/asm/vermagic.h index 24dc3d35161c..4d2dae0c7c57 100644 --- a/arch/mips/include/asm/vermagic.h +++ b/arch/mips/include/asm/vermagic.h @@ -8,12 +8,16 @@ #define MODULE_PROC_FAMILY "MIPS32_R1 " #elif defined CONFIG_CPU_MIPS32_R2 #define MODULE_PROC_FAMILY "MIPS32_R2 " +#elif defined CONFIG_CPU_MIPS32_R5 +#define MODULE_PROC_FAMILY "MIPS32_R5 " #elif defined CONFIG_CPU_MIPS32_R6 #define MODULE_PROC_FAMILY "MIPS32_R6 " #elif defined CONFIG_CPU_MIPS64_R1 #define MODULE_PROC_FAMILY "MIPS64_R1 " #elif defined CONFIG_CPU_MIPS64_R2 #define MODULE_PROC_FAMILY "MIPS64_R2 " +#elif defined CONFIG_CPU_MIPS64_R5 +#define MODULE_PROC_FAMILY "MIPS64_R5 " #elif defined CONFIG_CPU_MIPS64_R6 #define MODULE_PROC_FAMILY "MIPS64_R6 " #elif defined CONFIG_CPU_R3000 @@ -46,6 +50,8 @@ #define MODULE_PROC_FAMILY "LOONGSON64 " #elif defined CONFIG_CPU_CAVIUM_OCTEON #define MODULE_PROC_FAMILY "OCTEON " +#elif defined CONFIG_CPU_P5600 +#define MODULE_PROC_FAMILY "P5600 " #elif defined CONFIG_CPU_XLR #define MODULE_PROC_FAMILY "XLR " #elif defined CONFIG_CPU_XLP diff --git a/arch/mips/include/uapi/asm/hwcap.h b/arch/mips/include/uapi/asm/hwcap.h index 1ade1daa4921..b7e02bdc1985 100644 --- a/arch/mips/include/uapi/asm/hwcap.h +++ b/arch/mips/include/uapi/asm/hwcap.h @@ -17,5 +17,6 @@ #define HWCAP_LOONGSON_MMI (1 << 11) #define HWCAP_LOONGSON_EXT (1 << 12) #define HWCAP_LOONGSON_EXT2 (1 << 13) +#define HWCAP_LOONGSON_CPUCFG (1 << 14) #endif /* _UAPI_ASM_HWCAP_H */ diff --git a/arch/mips/include/uapi/asm/inst.h b/arch/mips/include/uapi/asm/inst.h index eaa3a80affdf..98f97c85e059 100644 --- a/arch/mips/include/uapi/asm/inst.h +++ b/arch/mips/include/uapi/asm/inst.h @@ -989,6 +989,30 @@ struct mm16_r5_format { /* Load/store from stack pointer format */ }; /* + * Loongson-3 overridden COP2 instruction formats (32-bit length) + */ +struct loongson3_lswc2_format { /* Loongson-3 overridden lwc2/swc2 Load/Store format */ + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int base : 5, + __BITFIELD_FIELD(unsigned int rt : 5, + __BITFIELD_FIELD(unsigned int fr : 1, + __BITFIELD_FIELD(unsigned int offset : 9, + __BITFIELD_FIELD(unsigned int ls : 1, + __BITFIELD_FIELD(unsigned int rq : 5, + ;))))))) +}; + +struct loongson3_lsdc2_format { /* Loongson-3 overridden ldc2/sdc2 Load/Store format */ + __BITFIELD_FIELD(unsigned int opcode : 6, + __BITFIELD_FIELD(unsigned int base : 5, + __BITFIELD_FIELD(unsigned int rt : 5, + __BITFIELD_FIELD(unsigned int index : 5, + __BITFIELD_FIELD(unsigned int offset : 8, + __BITFIELD_FIELD(unsigned int opcode1 : 3, + ;)))))) +}; + +/* * MIPS16e instruction formats (16-bit length) */ struct m16e_rr { @@ -1088,6 +1112,8 @@ union mips_instruction { struct mm16_rb_format mm16_rb_format; struct mm16_r3_format mm16_r3_format; struct mm16_r5_format mm16_r5_format; + struct loongson3_lswc2_format loongson3_lswc2_format; + struct loongson3_lsdc2_format loongson3_lsdc2_format; }; union mips16e_instruction { diff --git a/arch/mips/jazz/Platform b/arch/mips/jazz/Platform index 3373788acca1..eb0490ae8b09 100644 --- a/arch/mips/jazz/Platform +++ b/arch/mips/jazz/Platform @@ -1,6 +1,5 @@ # # Acer PICA 61, Mips Magnum 4000 and Olivetti M700. # -platform-$(CONFIG_MACH_JAZZ) += jazz/ cflags-$(CONFIG_MACH_JAZZ) += -I$(srctree)/arch/mips/include/asm/mach-jazz load-$(CONFIG_MACH_JAZZ) += 0xffffffff80080000 diff --git a/arch/mips/jz4740/Kconfig b/arch/mips/jz4740/Kconfig index 412d2faa3cdf..9c2e8c15bb97 100644 --- a/arch/mips/jz4740/Kconfig +++ b/arch/mips/jz4740/Kconfig @@ -2,7 +2,14 @@ choice prompt "Machine type" depends on MACH_INGENIC - default JZ4740_QI_LB60 + default INGENIC_GENERIC_BOARD + +config INGENIC_GENERIC_BOARD + bool "Generic board" + select MACH_JZ4740 + select MACH_JZ4770 + select MACH_JZ4780 + select MACH_X1000 config JZ4740_QI_LB60 bool "Qi Hardware Ben NanoNote" diff --git a/arch/mips/jz4740/Makefile b/arch/mips/jz4740/Makefile index 6de14c0deb4e..f96c0f5eca44 100644 --- a/arch/mips/jz4740/Makefile +++ b/arch/mips/jz4740/Makefile @@ -4,11 +4,6 @@ # # Object file lists. - -obj-y += prom.o time.o reset.o setup.o timer.o +obj-y += setup.o CFLAGS_setup.o = -I$(src)/../../../scripts/dtc/libfdt - -# PM support - -obj-$(CONFIG_PM) += pm.o diff --git a/arch/mips/jz4740/Platform b/arch/mips/jz4740/Platform index a2a5a85ea1f9..bd35d0621b13 100644 --- a/arch/mips/jz4740/Platform +++ b/arch/mips/jz4740/Platform @@ -1,4 +1,3 @@ -platform-$(CONFIG_MACH_INGENIC) += jz4740/ cflags-$(CONFIG_MACH_INGENIC) += -I$(srctree)/arch/mips/include/asm/mach-jz4740 load-$(CONFIG_MACH_INGENIC) += 0xffffffff80010000 zload-$(CONFIG_MACH_INGENIC) += 0xffffffff81000000 diff --git a/arch/mips/jz4740/pm.c b/arch/mips/jz4740/pm.c deleted file mode 100644 index f9b551f01f42..000000000000 --- a/arch/mips/jz4740/pm.c +++ /dev/null @@ -1,34 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 SoC power management support - */ - -#include <linux/init.h> -#include <linux/pm.h> -#include <linux/delay.h> -#include <linux/suspend.h> - -static int jz4740_pm_enter(suspend_state_t state) -{ - __asm__(".set\tmips3\n\t" - "wait\n\t" - ".set\tmips0"); - - - - return 0; -} - -static const struct platform_suspend_ops jz4740_pm_ops = { - .valid = suspend_valid_only_mem, - .enter = jz4740_pm_enter, -}; - -static int __init jz4740_pm_init(void) -{ - suspend_set_ops(&jz4740_pm_ops); - return 0; - -} -late_initcall(jz4740_pm_init); diff --git a/arch/mips/jz4740/prom.c b/arch/mips/jz4740/prom.c deleted file mode 100644 index ff4555c3fb15..000000000000 --- a/arch/mips/jz4740/prom.c +++ /dev/null @@ -1,19 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 SoC prom code - */ - -#include <linux/init.h> - -#include <asm/bootinfo.h> -#include <asm/fw/fw.h> - -void __init prom_init(void) -{ - fw_init_cmdline(); -} - -void __init prom_free_prom_memory(void) -{ -} diff --git a/arch/mips/jz4740/reset.c b/arch/mips/jz4740/reset.c deleted file mode 100644 index 1f9f02e54085..000000000000 --- a/arch/mips/jz4740/reset.c +++ /dev/null @@ -1,24 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - */ - -#include <asm/reboot.h> - -#include "reset.h" - -static void jz4740_halt(void) -{ - while (1) { - __asm__(".set push;\n" - ".set mips3;\n" - "wait;\n" - ".set pop;\n" - ); - } -} - -void jz4740_reset_init(void) -{ - _machine_halt = jz4740_halt; -} diff --git a/arch/mips/jz4740/reset.h b/arch/mips/jz4740/reset.h deleted file mode 100644 index 4e8746ee9b61..000000000000 --- a/arch/mips/jz4740/reset.h +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __MIPS_JZ4740_RESET_H__ -#define __MIPS_JZ4740_RESET_H__ - -extern void jz4740_reset_init(void); - -#endif diff --git a/arch/mips/jz4740/setup.c b/arch/mips/jz4740/setup.c index 880c26857aff..61468a87775c 100644 --- a/arch/mips/jz4740/setup.c +++ b/arch/mips/jz4740/setup.c @@ -5,17 +5,22 @@ * JZ4740 setup code */ +#include <linux/clocksource.h> #include <linux/init.h> #include <linux/io.h> #include <linux/irqchip.h> #include <linux/kernel.h> #include <linux/libfdt.h> +#include <linux/of_clk.h> #include <linux/of_fdt.h> +#include <linux/pm.h> +#include <linux/suspend.h> #include <asm/bootinfo.h> +#include <asm/fw/fw.h> #include <asm/prom.h> - -#include "reset.h" +#include <asm/reboot.h> +#include <asm/time.h> #define JZ4740_EMC_BASE_ADDR 0x13010000 @@ -61,8 +66,6 @@ void __init plat_mem_setup(void) int offset; void *dtb; - jz4740_reset_init(); - if (__dtb_start != __dtb_end) dtb = __dtb_start; else @@ -105,3 +108,56 @@ void __init arch_init_irq(void) { irqchip_init(); } + +void __init plat_time_init(void) +{ + of_clk_init(NULL); + timer_probe(); +} + +void __init prom_init(void) +{ + fw_init_cmdline(); +} + +void __init prom_free_prom_memory(void) +{ +} + +static void jz4740_wait_instr(void) +{ + __asm__(".set push;\n" + ".set mips3;\n" + "wait;\n" + ".set pop;\n" + ); +} + +static void jz4740_halt(void) +{ + for (;;) + jz4740_wait_instr(); +} + +static int __maybe_unused jz4740_pm_enter(suspend_state_t state) +{ + jz4740_wait_instr(); + + return 0; +} + +static const struct platform_suspend_ops jz4740_pm_ops __maybe_unused = { + .valid = suspend_valid_only_mem, + .enter = jz4740_pm_enter, +}; + +static int __init jz4740_pm_init(void) +{ + if (IS_ENABLED(CONFIG_PM_SLEEP)) + suspend_set_ops(&jz4740_pm_ops); + _machine_halt = jz4740_halt; + + return 0; + +} +late_initcall(jz4740_pm_init); diff --git a/arch/mips/jz4740/time.c b/arch/mips/jz4740/time.c deleted file mode 100644 index 605a84a250bf..000000000000 --- a/arch/mips/jz4740/time.c +++ /dev/null @@ -1,17 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 platform time support - */ - -#include <linux/clocksource.h> -#include <linux/of_clk.h> - -#include <asm/mach-jz4740/timer.h> - -void __init plat_time_init(void) -{ - of_clk_init(NULL); - jz4740_timer_init(); - timer_probe(); -} diff --git a/arch/mips/jz4740/timer.c b/arch/mips/jz4740/timer.c deleted file mode 100644 index 5c9f82de6a82..000000000000 --- a/arch/mips/jz4740/timer.c +++ /dev/null @@ -1,42 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2010, Lars-Peter Clausen <lars@metafoo.de> - * JZ4740 platform timer support - */ - -#include <linux/export.h> -#include <linux/io.h> -#include <linux/init.h> -#include <linux/kernel.h> - -#include <asm/mach-jz4740/base.h> -#include <asm/mach-jz4740/timer.h> - -void __iomem *jz4740_timer_base; -EXPORT_SYMBOL_GPL(jz4740_timer_base); - -void jz4740_timer_enable_watchdog(void) -{ - writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_CLEAR); -} -EXPORT_SYMBOL_GPL(jz4740_timer_enable_watchdog); - -void jz4740_timer_disable_watchdog(void) -{ - writel(BIT(16), jz4740_timer_base + JZ_REG_TIMER_STOP_SET); -} -EXPORT_SYMBOL_GPL(jz4740_timer_disable_watchdog); - -void __init jz4740_timer_init(void) -{ - jz4740_timer_base = ioremap(JZ4740_TCU_BASE_ADDR, 0x100); - - if (!jz4740_timer_base) - panic("Failed to ioremap timer registers"); - - /* Disable all timer clocks except for those used as system timers */ - writel(0x000100fc, jz4740_timer_base + JZ_REG_TIMER_STOP_SET); - - /* Timer irqs are unmasked by default, mask them */ - writel(0x00ff00ff, jz4740_timer_base + JZ_REG_TIMER_MASK_SET); -} diff --git a/arch/mips/kernel/8250-platform.c b/arch/mips/kernel/8250-platform.c deleted file mode 100644 index 5c6b2ab1f56e..000000000000 --- a/arch/mips/kernel/8250-platform.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2007 Ralf Baechle (ralf@linux-mips.org) - */ -#include <linux/init.h> -#include <linux/serial_8250.h> - -#define PORT(base, int) \ -{ \ - .iobase = base, \ - .irq = int, \ - .uartclk = 1843200, \ - .iotype = UPIO_PORT, \ - .flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST, \ - .regshift = 0, \ -} - -static struct plat_serial8250_port uart8250_data[] = { - PORT(0x3F8, 4), - PORT(0x2F8, 3), - PORT(0x3E8, 4), - PORT(0x2E8, 3), - { }, -}; - -static struct platform_device uart8250_device = { - .name = "serial8250", - .id = PLAT8250_DEV_PLATFORM, - .dev = { - .platform_data = uart8250_data, - }, -}; - -static int __init uart8250_init(void) -{ - return platform_device_register(&uart8250_device); -} - -module_init(uart8250_init); - -MODULE_AUTHOR("Ralf Baechle <ralf@linux-mips.org>"); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Generic 8250 UART probe driver"); diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index d6e97df51cfb..8c7a043295ed 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -98,8 +98,6 @@ obj-$(CONFIG_MIPSR2_TO_R6_EMULATOR) += mips-r2-to-r6-emul.o CFLAGS_cpu-bugs64.o = $(shell if $(CC) $(KBUILD_CFLAGS) -Wa,-mdaddi -c -o /dev/null -x c /dev/null >/dev/null 2>&1; then echo "-DHAVE_AS_SET_DADDI"; fi) -obj-$(CONFIG_HAVE_STD_PC_SERIAL_PORT) += 8250-platform.o - obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_HW_PERF_EVENTS) += perf_event_mipsxx.o diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index 2c38f75d87ff..fb3e203698ea 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -90,7 +90,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + fallthrough; case mm_bltz_op: if ((long)regs->regs[insn.mm_i_format.rs] < 0) *contpc = regs->cp0_epc + @@ -106,7 +106,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + fallthrough; case mm_bgez_op: if ((long)regs->regs[insn.mm_i_format.rs] >= 0) *contpc = regs->cp0_epc + @@ -144,7 +144,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, unsigned int bit; bc_false = 1; - /* Fall through */ + fallthrough; case mm_bc2t_op: case mm_bc1t_op: preempt_disable(); @@ -178,7 +178,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case mm_jalrs16_op: regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + fallthrough; case mm_jr16_op: *contpc = regs->regs[insn.mm_i_format.rs]; return 1; @@ -239,7 +239,7 @@ int __mm_isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case mm_jal32_op: regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* Fall through */ + fallthrough; case mm_j32_op: *contpc = regs->cp0_epc + dec_insn.pc_inc; *contpc >>= 27; @@ -432,7 +432,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, switch (insn.r_format.func) { case jalr_op: regs->regs[insn.r_format.rd] = epc + 8; - /* Fall through */ + fallthrough; case jr_op: if (NO_R6EMU && insn.r_format.func == jr_op) goto sigill_r2r6; @@ -451,7 +451,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bltzl_op: if (NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case bltz_op: if ((long)regs->regs[insn.i_format.rs] < 0) { epc = epc + 4 + (insn.i_format.simmediate << 2); @@ -465,7 +465,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgezl_op: if (NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case bgez_op: if ((long)regs->regs[insn.i_format.rs] >= 0) { epc = epc + 4 + (insn.i_format.simmediate << 2); @@ -561,7 +561,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case jalx_op: case jal_op: regs->regs[31] = regs->cp0_epc + 8; - /* fall through */ + fallthrough; case j_op: epc += 4; epc >>= 28; @@ -578,7 +578,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case beql_op: if (NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case beq_op: if (regs->regs[insn.i_format.rs] == regs->regs[insn.i_format.rt]) { @@ -593,7 +593,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bnel_op: if (NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case bne_op: if (regs->regs[insn.i_format.rs] != regs->regs[insn.i_format.rt]) { @@ -608,7 +608,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case blezl_op: /* not really i_format */ if (!insn.i_format.rt && NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case blez_op: /* * Compact branches for R6 for the @@ -644,7 +644,7 @@ int __compute_return_epc_for_insn(struct pt_regs *regs, case bgtzl_op: if (!insn.i_format.rt && NO_R6EMU) goto sigill_r2r6; - /* fall through */ + fallthrough; case bgtz_op: /* * Compact branches for R6 for the diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c index 17a9cbb8b3df..995ad9e69ded 100644 --- a/arch/mips/kernel/cevt-r4k.c +++ b/arch/mips/kernel/cevt-r4k.c @@ -8,6 +8,7 @@ */ #include <linux/clockchips.h> #include <linux/interrupt.h> +#include <linux/cpufreq.h> #include <linux/percpu.h> #include <linux/smp.h> #include <linux/irq.h> @@ -250,6 +251,49 @@ unsigned int __weak get_c0_compare_int(void) return MIPS_CPU_IRQ_BASE + cp0_compare_irq; } +#ifdef CONFIG_CPU_FREQ + +static unsigned long mips_ref_freq; + +static int r4k_cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct clock_event_device *cd; + unsigned long rate; + int cpu; + + if (!mips_ref_freq) + mips_ref_freq = freq->old; + + if (val == CPUFREQ_POSTCHANGE) { + rate = cpufreq_scale(mips_hpt_frequency, mips_ref_freq, + freq->new); + + for_each_cpu(cpu, freq->policy->cpus) { + cd = &per_cpu(mips_clockevent_device, cpu); + + clockevents_update_freq(cd, rate); + } + } + + return 0; +} + +static struct notifier_block r4k_cpufreq_notifier = { + .notifier_call = r4k_cpufreq_callback, +}; + +static int __init r4k_register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&r4k_cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); + +} +core_initcall(r4k_register_cpufreq_notifier); + +#endif /* !CONFIG_CPU_FREQ */ + int r4k_clockevent_init(void) { unsigned long flags = IRQF_PERCPU | IRQF_TIMER | IRQF_SHARED; diff --git a/arch/mips/kernel/cps-vec-ns16550.S b/arch/mips/kernel/cps-vec-ns16550.S index d5a67b4ce9f6..30725e1df987 100644 --- a/arch/mips/kernel/cps-vec-ns16550.S +++ b/arch/mips/kernel/cps-vec-ns16550.S @@ -14,16 +14,30 @@ #define UART_TX_OFS (UART_TX << CONFIG_MIPS_CPS_NS16550_SHIFT) #define UART_LSR_OFS (UART_LSR << CONFIG_MIPS_CPS_NS16550_SHIFT) +#if CONFIG_MIPS_CPS_NS16550_WIDTH == 1 +# define UART_L lb +# define UART_S sb +#elif CONFIG_MIPS_CPS_NS16550_WIDTH == 2 +# define UART_L lh +# define UART_S sh +#elif CONFIG_MIPS_CPS_NS16550_WIDTH == 4 +# define UART_L lw +# define UART_S sw +#else +# define UART_L lb +# define UART_S sb +#endif + /** * _mips_cps_putc() - write a character to the UART * @a0: ASCII character to write * @t9: UART base address */ LEAF(_mips_cps_putc) -1: lw t0, UART_LSR_OFS(t9) +1: UART_L t0, UART_LSR_OFS(t9) andi t0, t0, UART_LSR_TEMT beqz t0, 1b - sb a0, UART_TX_OFS(t9) + UART_S a0, UART_TX_OFS(t9) jr ra END(_mips_cps_putc) diff --git a/arch/mips/kernel/cpu-probe.c b/arch/mips/kernel/cpu-probe.c index f21a2304401f..6b93162d7c5a 100644 --- a/arch/mips/kernel/cpu-probe.c +++ b/arch/mips/kernel/cpu-probe.c @@ -28,6 +28,8 @@ #include <asm/spram.h> #include <linux/uaccess.h> +#include <asm/mach-loongson64/cpucfg-emul.h> + /* Hardware capabilities */ unsigned int elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); @@ -92,6 +94,7 @@ static void cpu_set_fpu_2008(struct cpuinfo_mips *c) { if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { unsigned long sr, fir, fcsr, fcsr0, fcsr1; @@ -172,6 +175,7 @@ static void cpu_set_nofpu_2008(struct cpuinfo_mips *c) case STRICT: if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { c->options |= MIPS_CPU_NAN_2008 | MIPS_CPU_NAN_LEGACY; } else { @@ -263,9 +267,11 @@ static void cpu_set_nofpu_id(struct cpuinfo_mips *c) value = 0; if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) value |= MIPS_FPIR_D | MIPS_FPIR_S; if (c->isa_level & (MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) value |= MIPS_FPIR_F64 | MIPS_FPIR_L | MIPS_FPIR_W; if (c->options & MIPS_CPU_NAN_2008) @@ -286,6 +292,7 @@ static void cpu_set_fpu_opts(struct cpuinfo_mips *c) if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { if (c->fpu_id & MIPS_FPIR_3D) c->ases |= MIPS_ASE_MIPS3D; @@ -532,22 +539,26 @@ static inline void cpu_probe_vmbits(struct cpuinfo_mips *c) static void set_isa(struct cpuinfo_mips *c, unsigned int isa) { switch (isa) { + case MIPS_CPU_ISA_M64R5: + c->isa_level |= MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5; + set_elf_base_platform("mips64r5"); + fallthrough; case MIPS_CPU_ISA_M64R2: c->isa_level |= MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2; set_elf_base_platform("mips64r2"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_M64R1: c->isa_level |= MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1; set_elf_base_platform("mips64"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_V: c->isa_level |= MIPS_CPU_ISA_V; set_elf_base_platform("mips5"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_IV: c->isa_level |= MIPS_CPU_ISA_IV; set_elf_base_platform("mips4"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_III: c->isa_level |= MIPS_CPU_ISA_II | MIPS_CPU_ISA_III; set_elf_base_platform("mips3"); @@ -557,20 +568,24 @@ static void set_isa(struct cpuinfo_mips *c, unsigned int isa) case MIPS_CPU_ISA_M64R6: c->isa_level |= MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6; set_elf_base_platform("mips64r6"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_M32R6: c->isa_level |= MIPS_CPU_ISA_M32R6; set_elf_base_platform("mips32r6"); /* Break here so we don't add incompatible ISAs */ break; + case MIPS_CPU_ISA_M32R5: + c->isa_level |= MIPS_CPU_ISA_M32R5; + set_elf_base_platform("mips32r5"); + fallthrough; case MIPS_CPU_ISA_M32R2: c->isa_level |= MIPS_CPU_ISA_M32R2; set_elf_base_platform("mips32r2"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_M32R1: c->isa_level |= MIPS_CPU_ISA_M32R1; set_elf_base_platform("mips32"); - /* fall through */ + fallthrough; case MIPS_CPU_ISA_II: c->isa_level |= MIPS_CPU_ISA_II; set_elf_base_platform("mips2"); @@ -620,14 +635,14 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) config = read_c0_config6(); if (flags & FTLB_EN) - config |= MIPS_CONF6_FTLBEN; + config |= MIPS_CONF6_MTI_FTLBEN; else - config &= ~MIPS_CONF6_FTLBEN; + config &= ~MIPS_CONF6_MTI_FTLBEN; if (flags & FTLB_SET_PROB) { - config &= ~(3 << MIPS_CONF6_FTLBP_SHIFT); + config &= ~(3 << MIPS_CONF6_MTI_FTLBP_SHIFT); config |= calculate_ftlb_probability(c) - << MIPS_CONF6_FTLBP_SHIFT; + << MIPS_CONF6_MTI_FTLBP_SHIFT; } write_c0_config6(config); @@ -647,10 +662,10 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) config = read_c0_config6(); if (flags & FTLB_EN) /* Enable FTLB */ - write_c0_config6(config & ~MIPS_CONF6_FTLBDIS); + write_c0_config6(config & ~MIPS_CONF6_LOONGSON_FTLBDIS); else /* Disable FTLB */ - write_c0_config6(config | MIPS_CONF6_FTLBDIS); + write_c0_config6(config | MIPS_CONF6_LOONGSON_FTLBDIS); break; default: return 1; @@ -659,6 +674,52 @@ static int set_ftlb_enable(struct cpuinfo_mips *c, enum ftlb_flags flags) return 0; } +static int mm_config(struct cpuinfo_mips *c) +{ + unsigned int config0, update, mm; + + config0 = read_c0_config(); + mm = config0 & MIPS_CONF_MM; + + /* + * It's implementation dependent what type of write-merge is supported + * and whether it can be enabled/disabled. If it is settable lets make + * the merging allowed by default. Some platforms might have + * write-through caching unsupported. In this case just ignore the + * CP0.Config.MM bit field value. + */ + switch (c->cputype) { + case CPU_24K: + case CPU_34K: + case CPU_74K: + case CPU_P5600: + case CPU_P6600: + c->options |= MIPS_CPU_MM_FULL; + update = MIPS_CONF_MM_FULL; + break; + case CPU_1004K: + case CPU_1074K: + case CPU_INTERAPTIV: + case CPU_PROAPTIV: + mm = 0; + fallthrough; + default: + update = 0; + break; + } + + if (update) { + config0 = (config0 & ~MIPS_CONF_MM) | update; + write_c0_config(config0); + } else if (mm == MIPS_CONF_MM_SYSAD) { + c->options |= MIPS_CPU_MM_SYSAD; + } else if (mm == MIPS_CONF_MM_FULL) { + c->options |= MIPS_CPU_MM_FULL; + } + + return 0; +} + static inline unsigned int decode_config0(struct cpuinfo_mips *c) { unsigned int config0; @@ -850,7 +911,7 @@ static inline unsigned int decode_config4(struct cpuinfo_mips *c) MIPS_CONF4_VTLBSIZEEXT_SHIFT) * 0x40; c->tlbsize = c->tlbsizevtlb; ftlb_page = MIPS_CONF4_VFTLBPAGESIZE; - /* fall through */ + fallthrough; case MIPS_CONF4_MMUEXTDEF_FTLBSIZEEXT: if (mips_ftlb_disabled) break; @@ -1750,13 +1811,19 @@ static inline void cpu_probe_mips(struct cpuinfo_mips *c, unsigned int cpu) spram_config(); + mm_config(c); + switch (__get_cpu_type(c->cputype)) { + case CPU_M5150: + case CPU_P5600: + set_isa(c, MIPS_CPU_ISA_M32R5); + break; case CPU_I6500: c->options |= MIPS_CPU_SHARED_FTLB_ENTRIES; - /* fall-through */ + fallthrough; case CPU_I6400: c->options |= MIPS_CPU_SHARED_FTLB_RAM; - /* fall-through */ + fallthrough; default: break; } @@ -1932,10 +1999,53 @@ platform: } } +#ifdef CONFIG_CPU_LOONGSON64 +#include <loongson_regs.h> + +static inline void decode_cpucfg(struct cpuinfo_mips *c) +{ + u32 cfg1 = read_cpucfg(LOONGSON_CFG1); + u32 cfg2 = read_cpucfg(LOONGSON_CFG2); + u32 cfg3 = read_cpucfg(LOONGSON_CFG3); + + if (cfg1 & LOONGSON_CFG1_MMI) + c->ases |= MIPS_ASE_LOONGSON_MMI; + + if (cfg2 & LOONGSON_CFG2_LEXT1) + c->ases |= MIPS_ASE_LOONGSON_EXT; + + if (cfg2 & LOONGSON_CFG2_LEXT2) + c->ases |= MIPS_ASE_LOONGSON_EXT2; + + if (cfg2 & LOONGSON_CFG2_LSPW) + c->options |= MIPS_CPU_LDPTE; + + if (cfg3 & LOONGSON_CFG3_LCAMP) + c->ases |= MIPS_ASE_LOONGSON_CAM; +} + static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { + decode_configs(c); + switch (c->processor_id & PRID_IMP_MASK) { - case PRID_IMP_LOONGSON_64C: /* Loongson-2/3 */ + case PRID_IMP_LOONGSON_64R: /* Loongson-64 Reduced */ + switch (c->processor_id & PRID_REV_MASK) { + case PRID_REV_LOONGSON2K_R1_0: + case PRID_REV_LOONGSON2K_R1_1: + case PRID_REV_LOONGSON2K_R1_2: + case PRID_REV_LOONGSON2K_R1_3: + c->cputype = CPU_LOONGSON64; + __cpu_name[cpu] = "Loongson-2K"; + set_elf_platform(cpu, "gs264e"); + set_isa(c, MIPS_CPU_ISA_M64R2); + break; + } + c->writecombine = _CACHE_UNCACHED_ACCELERATED; + c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_EXT | + MIPS_ASE_LOONGSON_EXT2); + break; + case PRID_IMP_LOONGSON_64C: /* Loongson-3 Classic */ switch (c->processor_id & PRID_REV_MASK) { case PRID_REV_LOONGSON3A_R2_0: case PRID_REV_LOONGSON3A_R2_1: @@ -1952,8 +2062,14 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) set_isa(c, MIPS_CPU_ISA_M64R2); break; } - - decode_configs(c); + /* + * Loongson-3 Classic did not implement MIPS standard TLBINV + * but implemented TLBINVF and EHINV. As currently we're only + * using these two features, enable MIPS_CPU_TLBINV as well. + * + * Also some early Loongson-3A2000 had wrong TLB type in Config + * register, we correct it here. + */ c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE; c->writecombine = _CACHE_UNCACHED_ACCELERATED; c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | @@ -1964,17 +2080,17 @@ static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) __cpu_name[cpu] = "ICT Loongson-3"; set_elf_platform(cpu, "loongson3a"); set_isa(c, MIPS_CPU_ISA_M64R2); - decode_configs(c); - c->options |= MIPS_CPU_FTLB | MIPS_CPU_TLBINV | MIPS_CPU_LDPTE; + decode_cpucfg(c); c->writecombine = _CACHE_UNCACHED_ACCELERATED; - c->ases |= (MIPS_ASE_LOONGSON_MMI | MIPS_ASE_LOONGSON_CAM | - MIPS_ASE_LOONGSON_EXT | MIPS_ASE_LOONGSON_EXT2); break; default: panic("Unknown Loongson Processor ID!"); break; } } +#else +static inline void cpu_probe_loongson(struct cpuinfo_mips *c, unsigned int cpu) { } +#endif static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) { @@ -2028,7 +2144,7 @@ static inline void cpu_probe_ingenic(struct cpuinfo_mips *c, unsigned int cpu) default: break; } - /* fall-through */ + fallthrough; case PRID_IMP_XBURST_REV2: c->cputype = CPU_XBURST; c->writecombine = _CACHE_UNCACHED_ACCELERATED; @@ -2286,6 +2402,13 @@ void cpu_probe(void) cpu_probe_vmbits(c); + /* Synthesize CPUCFG data if running on Loongson processors; + * no-op otherwise. + * + * This looks at previously probed features, so keep this at bottom. + */ + loongson3_cpucfg_synthesize_data(c); + #ifdef CONFIG_64BIT if (cpu == 0) __ua_limit = ~((1ull << cpu_vmbits) - 1); diff --git a/arch/mips/kernel/csrc-r4k.c b/arch/mips/kernel/csrc-r4k.c index 437dda64fd7a..edc4afc080fa 100644 --- a/arch/mips/kernel/csrc-r4k.c +++ b/arch/mips/kernel/csrc-r4k.c @@ -6,6 +6,7 @@ * Copyright (C) 2007 by Ralf Baechle */ #include <linux/clocksource.h> +#include <linux/cpufreq.h> #include <linux/init.h> #include <linux/sched_clock.h> @@ -65,6 +66,45 @@ static bool rdhwr_count_usable(void) return false; } +#ifdef CONFIG_CPU_FREQ + +static bool __read_mostly r4k_clock_unstable; + +static void r4k_clocksource_unstable(char *reason) +{ + if (r4k_clock_unstable) + return; + + r4k_clock_unstable = true; + + pr_info("R4K timer is unstable due to %s\n", reason); + + clocksource_mark_unstable(&clocksource_mips); +} + +static int r4k_cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + if (val == CPUFREQ_POSTCHANGE) + r4k_clocksource_unstable("CPU frequency change"); + + return 0; +} + +static struct notifier_block r4k_cpufreq_notifier = { + .notifier_call = r4k_cpufreq_callback, +}; + +static int __init r4k_register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&r4k_cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); + +} +core_initcall(r4k_register_cpufreq_notifier); + +#endif /* !CONFIG_CPU_FREQ */ + int __init init_r4k_clocksource(void) { if (!cpu_has_counter || !mips_hpt_frequency) diff --git a/arch/mips/kernel/entry.S b/arch/mips/kernel/entry.S index 4849a48afc0f..4b896f5023ff 100644 --- a/arch/mips/kernel/entry.S +++ b/arch/mips/kernel/entry.S @@ -169,8 +169,8 @@ syscall_exit_work: jal syscall_trace_leave b resume_userspace -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR6) || \ - defined(CONFIG_MIPS_MT) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_MIPSR6) || defined(CONFIG_MIPS_MT) /* * MIPS32R2 Instruction Hazard Barrier - must be called @@ -183,4 +183,4 @@ LEAF(mips_ihb) nop END(mips_ihb) -#endif /* CONFIG_CPU_MIPSR2 or CONFIG_CPU_MIPSR6 or CONFIG_MIPS_MT */ +#endif /* CONFIG_CPU_MIPSR2 - CONFIG_CPU_MIPSR6 or CONFIG_MIPS_MT */ diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S index 0a43c9125267..a1b966f3578e 100644 --- a/arch/mips/kernel/genex.S +++ b/arch/mips/kernel/genex.S @@ -476,20 +476,20 @@ NESTED(nmi_handler, PT_SIZE, sp) .endm .macro __build_clear_fpe + CLI + TRACE_IRQS_OFF .set push /* gas fails to assemble cfc1 for some archs (octeon).*/ \ .set mips1 SET_HARDFLOAT cfc1 a1, fcr31 .set pop - CLI - TRACE_IRQS_OFF .endm .macro __build_clear_msa_fpe - _cfcmsa a1, MSA_CSR CLI TRACE_IRQS_OFF + _cfcmsa a1, MSA_CSR .endm .macro __build_clear_ade @@ -501,17 +501,17 @@ NESTED(nmi_handler, PT_SIZE, sp) .macro __BUILD_silent exception .endm - /* Gas tries to parse the PRINT argument as a string containing + /* Gas tries to parse the ASM_PRINT argument as a string containing string escapes and emits bogus warnings if it believes to recognize an unknown escape code. So make the arguments start with an n and gas will believe \n is ok ... */ .macro __BUILD_verbose nexception LONG_L a1, PT_EPC(sp) #ifdef CONFIG_32BIT - PRINT("Got \nexception at %08lx\012") + ASM_PRINT("Got \nexception at %08lx\012") #endif #ifdef CONFIG_64BIT - PRINT("Got \nexception at %016lx\012") + ASM_PRINT("Got \nexception at %016lx\012") #endif .endm diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S index 351d40fe0859..3b02ffe46304 100644 --- a/arch/mips/kernel/head.S +++ b/arch/mips/kernel/head.S @@ -132,6 +132,9 @@ dtb_found: #endif MTC0 zero, CP0_CONTEXT # clear context register +#ifdef CONFIG_64BIT + MTC0 zero, CP0_XCONTEXT +#endif PTR_LA $28, init_thread_union /* Set the SP after an empty pt_regs. */ PTR_LI sp, _THREAD_SIZE - 32 - PT_SIZE diff --git a/arch/mips/kernel/idle.c b/arch/mips/kernel/idle.c index 37f8e78e2869..5bc3b04693c7 100644 --- a/arch/mips/kernel/idle.c +++ b/arch/mips/kernel/idle.c @@ -180,7 +180,8 @@ void __init check_wait(void) break; case CPU_LOONGSON64: if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >= - (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)) + (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) || + (c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) cpu_wait = r4k_wait; break; @@ -201,7 +202,7 @@ void __init check_wait(void) */ if (IS_ENABLED(CONFIG_MIPS_EJTAG_FDC_TTY)) break; - /* fall through */ + fallthrough; case CPU_M14KC: case CPU_M14KEC: case CPU_24K: diff --git a/arch/mips/kernel/mips-cm.c b/arch/mips/kernel/mips-cm.c index cdb93ed91cde..f60af512c877 100644 --- a/arch/mips/kernel/mips-cm.c +++ b/arch/mips/kernel/mips-cm.c @@ -114,14 +114,56 @@ static char *cm2_core[8] = { "Exclusive/OK", "Exclusive/Data" }; +static char *cm2_l2_type[4] = { + [0x0] = "None", + [0x1] = "Tag RAM single/double ECC error", + [0x2] = "Data RAM single/double ECC error", + [0x3] = "WS RAM uncorrectable dirty parity" +}; + +static char *cm2_l2_instr[32] = { + [0x00] = "L2_NOP", + [0x01] = "L2_ERR_CORR", + [0x02] = "L2_TAG_INV", + [0x03] = "L2_WS_CLEAN", + [0x04] = "L2_RD_MDYFY_WR", + [0x05] = "L2_WS_MRU", + [0x06] = "L2_EVICT_LN2", + [0x07] = "0x07", + [0x08] = "L2_EVICT", + [0x09] = "L2_REFL", + [0x0a] = "L2_RD", + [0x0b] = "L2_WR", + [0x0c] = "L2_EVICT_MRU", + [0x0d] = "L2_SYNC", + [0x0e] = "L2_REFL_ERR", + [0x0f] = "0x0f", + [0x10] = "L2_INDX_WB_INV", + [0x11] = "L2_INDX_LD_TAG", + [0x12] = "L2_INDX_ST_TAG", + [0x13] = "L2_INDX_ST_DATA", + [0x14] = "L2_INDX_ST_ECC", + [0x15] = "0x15", + [0x16] = "0x16", + [0x17] = "0x17", + [0x18] = "L2_FTCH_AND_LCK", + [0x19] = "L2_HIT_INV", + [0x1a] = "L2_HIT_WB_INV", + [0x1b] = "L2_HIT_WB", + [0x1c] = "0x1c", + [0x1d] = "0x1d", + [0x1e] = "0x1e", + [0x1f] = "0x1f" +}; + static char *cm2_causes[32] = { "None", "GC_WR_ERR", "GC_RD_ERR", "COH_WR_ERR", "COH_RD_ERR", "MMIO_WR_ERR", "MMIO_RD_ERR", "0x07", "0x08", "0x09", "0x0a", "0x0b", "0x0c", "0x0d", "0x0e", "0x0f", - "0x10", "0x11", "0x12", "0x13", - "0x14", "0x15", "0x16", "INTVN_WR_ERR", - "INTVN_RD_ERR", "0x19", "0x1a", "0x1b", + "0x10", "INTVN_WR_ERR", "INTVN_RD_ERR", "0x13", + "0x14", "0x15", "0x16", "0x17", + "L2_RD_UNCORR", "L2_WR_UNCORR", "L2_CORR", "0x1b", "0x1c", "0x1d", "0x1e", "0x1f" }; @@ -360,7 +402,7 @@ void mips_cm_error_report(void) "CCA=%lu TR=%s MCmd=%s STag=%lu " "SPort=%lu\n", cca_bits, cm2_tr[tr_bits], cm2_cmd[cmd_bits], stag_bits, sport_bits); - } else { + } else if (cause < 24) { /* glob state & sresp together */ unsigned long c3_bits = (cm_error >> 18) & 7; unsigned long c2_bits = (cm_error >> 15) & 7; @@ -377,6 +419,22 @@ void mips_cm_error_report(void) cm2_core[c1_bits], cm2_core[c0_bits], sc_bit ? "True" : "False", cm2_cmd[cmd_bits], sport_bits); + } else { + unsigned long muc_bit = (cm_error >> 23) & 1; + unsigned long ins_bits = (cm_error >> 18) & 0x1f; + unsigned long arr_bits = (cm_error >> 16) & 3; + unsigned long dw_bits = (cm_error >> 12) & 15; + unsigned long way_bits = (cm_error >> 9) & 7; + unsigned long mway_bit = (cm_error >> 8) & 1; + unsigned long syn_bits = (cm_error >> 0) & 0xFF; + + snprintf(buf, sizeof(buf), + "Type=%s%s Instr=%s DW=%lu Way=%lu " + "MWay=%s Syndrome=0x%02lx", + muc_bit ? "Multi-UC " : "", + cm2_l2_type[arr_bits], + cm2_l2_instr[ins_bits], dw_bits, way_bits, + mway_bit ? "True" : "False", syn_bits); } pr_err("CM_ERROR=%08llx %s <%s>\n", cm_error, cm2_causes[cause], buf); diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index b4d210bfcdae..a39ec755e4c2 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -1109,7 +1109,7 @@ repeat: err = SIGILL; break; } - /* fall through */ + fallthrough; case beql_op: case bnel_op: if (delay_slot(regs)) { diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 128fc9999c56..efce5defcc5c 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -90,6 +90,7 @@ struct mips_pmu { unsigned int num_counters; }; +static int counter_bits; static struct mips_pmu mipspmu; #define M_PERFCTL_EVENT(event) (((event) << MIPS_PERFCTRL_EVENT_S) & \ @@ -118,6 +119,7 @@ static struct mips_pmu mipspmu; #define M_PERFCTL_CONFIG_MASK 0x1f #endif +#define CNTR_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) #ifdef CONFIG_MIPS_PERF_SHARED_TC_COUNTERS static DEFINE_RWLOCK(pmuint_rwlock); @@ -154,6 +156,31 @@ static void pause_local_counters(void); static irqreturn_t mipsxx_pmu_handle_irq(int, void *); static int mipsxx_pmu_handle_shared_irq(void); +/* 0: Not Loongson-3 + * 1: Loongson-3A1000/3B1000/3B1500 + * 2: Loongson-3A2000/3A3000 + * 3: Loongson-3A4000+ + */ + +#define LOONGSON_PMU_TYPE0 0 +#define LOONGSON_PMU_TYPE1 1 +#define LOONGSON_PMU_TYPE2 2 +#define LOONGSON_PMU_TYPE3 3 + +static inline int get_loongson3_pmu_type(void) +{ + if (boot_cpu_type() != CPU_LOONGSON64) + return LOONGSON_PMU_TYPE0; + if ((boot_cpu_data.processor_id & PRID_COMP_MASK) == PRID_COMP_LEGACY) + return LOONGSON_PMU_TYPE1; + if ((boot_cpu_data.processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64C) + return LOONGSON_PMU_TYPE2; + if ((boot_cpu_data.processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64G) + return LOONGSON_PMU_TYPE3; + + return LOONGSON_PMU_TYPE0; +} + static unsigned int mipsxx_pmu_swizzle_perf_idx(unsigned int idx) { if (vpe_id() == 1) @@ -186,17 +213,18 @@ static u64 mipsxx_pmu_read_counter(unsigned int idx) static u64 mipsxx_pmu_read_counter_64(unsigned int idx) { + u64 mask = CNTR_BIT_MASK(counter_bits); idx = mipsxx_pmu_swizzle_perf_idx(idx); switch (idx) { case 0: - return read_c0_perfcntr0_64(); + return read_c0_perfcntr0_64() & mask; case 1: - return read_c0_perfcntr1_64(); + return read_c0_perfcntr1_64() & mask; case 2: - return read_c0_perfcntr2_64(); + return read_c0_perfcntr2_64() & mask; case 3: - return read_c0_perfcntr3_64(); + return read_c0_perfcntr3_64() & mask; default: WARN_ONCE(1, "Invalid performance counter number (%d)\n", idx); return 0; @@ -225,6 +253,7 @@ static void mipsxx_pmu_write_counter(unsigned int idx, u64 val) static void mipsxx_pmu_write_counter_64(unsigned int idx, u64 val) { + val &= CNTR_BIT_MASK(counter_bits); idx = mipsxx_pmu_swizzle_perf_idx(idx); switch (idx) { @@ -286,12 +315,16 @@ static int mipsxx_pmu_alloc_counter(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) { int i; + unsigned long cntr_mask; /* * We only need to care the counter mask. The range has been * checked definitely. */ - unsigned long cntr_mask = (hwc->event_base >> 8) & 0xffff; + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + cntr_mask = (hwc->event_base >> 10) & 0xffff; + else + cntr_mask = (hwc->event_base >> 8) & 0xffff; for (i = mipspmu.num_counters - 1; i >= 0; i--) { /* @@ -320,10 +353,16 @@ static void mipsxx_pmu_enable_event(struct hw_perf_event *evt, int idx) WARN_ON(idx < 0 || idx >= mipspmu.num_counters); - cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | - (evt->config_base & M_PERFCTL_CONFIG_MASK) | - /* Make sure interrupt enabled. */ - MIPS_PERFCTRL_IE; + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0x3ff) | + (evt->config_base & M_PERFCTL_CONFIG_MASK) | + /* Make sure interrupt enabled. */ + MIPS_PERFCTRL_IE; + else + cpuc->saved_ctrl[idx] = M_PERFCTL_EVENT(evt->event_base & 0xff) | + (evt->config_base & M_PERFCTL_CONFIG_MASK) | + /* Make sure interrupt enabled. */ + MIPS_PERFCTRL_IE; if (IS_ENABLED(CONFIG_CPU_BMIPS5000)) { /* enable the counter for the calling thread */ @@ -396,6 +435,10 @@ static int mipspmu_event_set_period(struct perf_event *event, local64_set(&hwc->prev_count, mipspmu.overflow - left); + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + mipsxx_pmu_write_control(idx, + M_PERFCTL_EVENT(hwc->event_base & 0x3ff)); + mipspmu.write_counter(idx, mipspmu.overflow - left); perf_event_update_userpage(event); @@ -667,8 +710,14 @@ static unsigned int mipspmu_perf_event_encode(const struct mips_perf_event *pev) (pev->event_id & 0xff); else #endif /* CONFIG_MIPS_MT_SMP */ - return ((pev->cntr_mask & 0xffff00) | - (pev->event_id & 0xff)); + { + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) + return (pev->cntr_mask & 0xfffc00) | + (pev->event_id & 0x3ff); + else + return (pev->cntr_mask & 0xffff00) | + (pev->event_id & 0xff); + } } static const struct mips_perf_event *mipspmu_map_general_event(int idx) @@ -783,26 +832,104 @@ static int n_counters(void) return counters; } +static void loongson3_reset_counters(void *arg) +{ + int counters = (int)(long)arg; + + switch (counters) { + case 4: + mipsxx_pmu_write_control(3, 0); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 127<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 191<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 255<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 319<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 383<<5); + mipspmu.write_counter(3, 0); + mipsxx_pmu_write_control(3, 575<<5); + mipspmu.write_counter(3, 0); + fallthrough; + case 3: + mipsxx_pmu_write_control(2, 0); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 127<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 191<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 255<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 319<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 383<<5); + mipspmu.write_counter(2, 0); + mipsxx_pmu_write_control(2, 575<<5); + mipspmu.write_counter(2, 0); + fallthrough; + case 2: + mipsxx_pmu_write_control(1, 0); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 127<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 191<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 255<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 319<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 383<<5); + mipspmu.write_counter(1, 0); + mipsxx_pmu_write_control(1, 575<<5); + mipspmu.write_counter(1, 0); + fallthrough; + case 1: + mipsxx_pmu_write_control(0, 0); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 127<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 191<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 255<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 319<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 383<<5); + mipspmu.write_counter(0, 0); + mipsxx_pmu_write_control(0, 575<<5); + mipspmu.write_counter(0, 0); + break; + } +} + static void reset_counters(void *arg) { int counters = (int)(long)arg; + + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) { + loongson3_reset_counters(arg); + return; + } + switch (counters) { case 4: mipsxx_pmu_write_control(3, 0); mipspmu.write_counter(3, 0); - /* fall through */ + fallthrough; case 3: mipsxx_pmu_write_control(2, 0); mipspmu.write_counter(2, 0); - /* fall through */ + fallthrough; case 2: mipsxx_pmu_write_control(1, 0); mipspmu.write_counter(1, 0); - /* fall through */ + fallthrough; case 1: mipsxx_pmu_write_control(0, 0); mipspmu.write_counter(0, 0); - /* fall through */ + break; } } @@ -834,13 +961,30 @@ static const struct mips_perf_event i6x00_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_BRANCH_MISSES] = { 0x16, CNTR_EVEN | CNTR_ODD }, }; -static const struct mips_perf_event loongson3_event_map[PERF_COUNT_HW_MAX] = { +static const struct mips_perf_event loongson3_event_map1[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_EVEN }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x00, CNTR_ODD }, [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x01, CNTR_EVEN }, [PERF_COUNT_HW_BRANCH_MISSES] = { 0x01, CNTR_ODD }, }; +static const struct mips_perf_event loongson3_event_map2[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x80, CNTR_ALL }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x81, CNTR_ALL }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x18, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x94, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x9c, CNTR_ALL }, +}; + +static const struct mips_perf_event loongson3_event_map3[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = { 0x00, CNTR_ALL }, + [PERF_COUNT_HW_INSTRUCTIONS] = { 0x01, CNTR_ALL }, + [PERF_COUNT_HW_CACHE_REFERENCES] = { 0x1c, CNTR_ALL }, + [PERF_COUNT_HW_CACHE_MISSES] = { 0x1d, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = { 0x02, CNTR_ALL }, + [PERF_COUNT_HW_BRANCH_MISSES] = { 0x08, CNTR_ALL }, +}; + static const struct mips_perf_event octeon_event_map[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { 0x01, CNTR_ALL }, [PERF_COUNT_HW_INSTRUCTIONS] = { 0x03, CNTR_ALL }, @@ -1064,7 +1208,7 @@ static const struct mips_perf_event i6x00_cache_map }, }; -static const struct mips_perf_event loongson3_cache_map +static const struct mips_perf_event loongson3_cache_map1 [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = { @@ -1109,12 +1253,127 @@ static const struct mips_perf_event loongson3_cache_map [C(BPU)] = { /* Using the same code for *HW_BRANCH* */ [C(OP_READ)] = { - [C(RESULT_ACCESS)] = { 0x02, CNTR_EVEN }, - [C(RESULT_MISS)] = { 0x02, CNTR_ODD }, + [C(RESULT_ACCESS)] = { 0x01, CNTR_EVEN }, + [C(RESULT_MISS)] = { 0x01, CNTR_ODD }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x01, CNTR_EVEN }, + [C(RESULT_MISS)] = { 0x01, CNTR_ODD }, + }, +}, +}; + +static const struct mips_perf_event loongson3_cache_map2 + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + /* + * Like some other architectures (e.g. ARM), the performance + * counters don't differentiate between read and write + * accesses/misses, so this isn't strictly correct, but it's the + * best we can do. Writes and reads get combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x156, CNTR_ALL }, }, [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = { 0x02, CNTR_EVEN }, - [C(RESULT_MISS)] = { 0x02, CNTR_ODD }, + [C(RESULT_ACCESS)] = { 0x155, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x153, CNTR_ALL }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x18, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_MISS)] = { 0x18, CNTR_ALL }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x1b6, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = { 0x1b7, CNTR_ALL }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { 0x1bf, CNTR_ALL }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x92, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_MISS)] = { 0x92, CNTR_ALL }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, + }, + [C(OP_WRITE)] = { + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, + }, +}, +[C(BPU)] = { + /* Using the same code for *HW_BRANCH* */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x94, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x9c, CNTR_ALL }, + }, +}, +}; + +static const struct mips_perf_event loongson3_cache_map3 + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + /* + * Like some other architectures (e.g. ARM), the performance + * counters don't differentiate between read and write + * accesses/misses, so this isn't strictly correct, but it's the + * best we can do. Writes and reads get combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x1e, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x1f, CNTR_ALL }, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = { 0xaa, CNTR_ALL }, + [C(RESULT_MISS)] = { 0xa9, CNTR_ALL }, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x1c, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x1d, CNTR_ALL }, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x2e, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x2f, CNTR_ALL }, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x14, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x1b, CNTR_ALL }, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_MISS)] = { 0x1a, CNTR_ALL }, + }, +}, +[C(BPU)] = { + /* Using the same code for *HW_BRANCH* */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = { 0x02, CNTR_ALL }, + [C(RESULT_MISS)] = { 0x08, CNTR_ALL }, }, }, }; @@ -1178,7 +1437,6 @@ static const struct mips_perf_event bmips5000_cache_map }, }; - static const struct mips_perf_event octeon_cache_map [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -1512,6 +1770,7 @@ static irqreturn_t mipsxx_pmu_handle_irq(int irq, void *dev) static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) { /* currently most cores have 7-bit event numbers */ + int pmu_type; unsigned int raw_id = config & 0xff; unsigned int base_id = raw_id & 0x7f; @@ -1624,8 +1883,33 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config) raw_id > 127 ? CNTR_ODD : CNTR_EVEN; break; case CPU_LOONGSON64: - raw_event.cntr_mask = raw_id > 127 ? CNTR_ODD : CNTR_EVEN; - break; + pmu_type = get_loongson3_pmu_type(); + + switch (pmu_type) { + case LOONGSON_PMU_TYPE1: + raw_event.cntr_mask = + raw_id > 127 ? CNTR_ODD : CNTR_EVEN; + break; + case LOONGSON_PMU_TYPE2: + base_id = config & 0x3ff; + raw_event.cntr_mask = CNTR_ALL; + + if ((base_id >= 1 && base_id < 28) || + (base_id >= 64 && base_id < 90) || + (base_id >= 128 && base_id < 164) || + (base_id >= 192 && base_id < 200) || + (base_id >= 256 && base_id < 274) || + (base_id >= 320 && base_id < 358) || + (base_id >= 384 && base_id < 574)) + break; + + return ERR_PTR(-EOPNOTSUPP); + case LOONGSON_PMU_TYPE3: + base_id = raw_id; + raw_event.cntr_mask = CNTR_ALL; + break; + } + break; } raw_event.event_id = base_id; @@ -1683,8 +1967,7 @@ static const struct mips_perf_event *xlp_pmu_map_raw_event(u64 config) static int __init init_hw_perf_events(void) { - int counters, irq; - int counter_bits; + int counters, irq, pmu_type; pr_info("Performance counters: "); @@ -1771,8 +2054,25 @@ init_hw_perf_events(void) break; case CPU_LOONGSON64: mipspmu.name = "mips/loongson3"; - mipspmu.general_event_map = &loongson3_event_map; - mipspmu.cache_event_map = &loongson3_cache_map; + pmu_type = get_loongson3_pmu_type(); + + switch (pmu_type) { + case LOONGSON_PMU_TYPE1: + counters = 2; + mipspmu.general_event_map = &loongson3_event_map1; + mipspmu.cache_event_map = &loongson3_cache_map1; + break; + case LOONGSON_PMU_TYPE2: + counters = 4; + mipspmu.general_event_map = &loongson3_event_map2; + mipspmu.cache_event_map = &loongson3_cache_map2; + break; + case LOONGSON_PMU_TYPE3: + counters = 4; + mipspmu.general_event_map = &loongson3_event_map3; + mipspmu.cache_event_map = &loongson3_cache_map3; + break; + } break; case CPU_CAVIUM_OCTEON: case CPU_CAVIUM_OCTEON_PLUS: @@ -1803,19 +2103,26 @@ init_hw_perf_events(void) mipspmu.irq = irq; if (read_c0_perfctrl0() & MIPS_PERFCTRL_W) { - mipspmu.max_period = (1ULL << 63) - 1; - mipspmu.valid_count = (1ULL << 63) - 1; - mipspmu.overflow = 1ULL << 63; + if (get_loongson3_pmu_type() == LOONGSON_PMU_TYPE2) { + counter_bits = 48; + mipspmu.max_period = (1ULL << 47) - 1; + mipspmu.valid_count = (1ULL << 47) - 1; + mipspmu.overflow = 1ULL << 47; + } else { + counter_bits = 64; + mipspmu.max_period = (1ULL << 63) - 1; + mipspmu.valid_count = (1ULL << 63) - 1; + mipspmu.overflow = 1ULL << 63; + } mipspmu.read_counter = mipsxx_pmu_read_counter_64; mipspmu.write_counter = mipsxx_pmu_write_counter_64; - counter_bits = 64; } else { + counter_bits = 32; mipspmu.max_period = (1ULL << 31) - 1; mipspmu.valid_count = (1ULL << 31) - 1; mipspmu.overflow = 1ULL << 31; mipspmu.read_counter = mipsxx_pmu_read_counter; mipspmu.write_counter = mipsxx_pmu_write_counter; - counter_bits = 32; } on_each_cpu(reset_counters, (void *)(long)counters, 1); diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c index f8d36710cd58..4184d641f05e 100644 --- a/arch/mips/kernel/proc.c +++ b/arch/mips/kernel/proc.c @@ -98,12 +98,16 @@ static int show_cpuinfo(struct seq_file *m, void *v) seq_printf(m, "%s", " mips32r1"); if (cpu_has_mips32r2) seq_printf(m, "%s", " mips32r2"); + if (cpu_has_mips32r5) + seq_printf(m, "%s", " mips32r5"); if (cpu_has_mips32r6) seq_printf(m, "%s", " mips32r6"); if (cpu_has_mips64r1) seq_printf(m, "%s", " mips64r1"); if (cpu_has_mips64r2) seq_printf(m, "%s", " mips64r2"); + if (cpu_has_mips64r5) + seq_printf(m, "%s", " mips64r5"); if (cpu_has_mips64r6) seq_printf(m, "%s", " mips64r6"); seq_printf(m, "\n"); diff --git a/arch/mips/kernel/r4k_fpu.S b/arch/mips/kernel/r4k_fpu.S index 59be5c812aa2..b91e91106475 100644 --- a/arch/mips/kernel/r4k_fpu.S +++ b/arch/mips/kernel/r4k_fpu.S @@ -41,7 +41,7 @@ LEAF(_save_fp) EXPORT_SYMBOL(_save_fp) #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) mfc0 t0, CP0_STATUS #endif fpu_save_double a0 t0 t1 # clobbers t1 @@ -53,7 +53,7 @@ EXPORT_SYMBOL(_save_fp) */ LEAF(_restore_fp) #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) mfc0 t0, CP0_STATUS #endif fpu_restore_double a0 t0 t1 # clobbers t1 @@ -103,10 +103,10 @@ LEAF(_save_fp_context) .set pop #if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) .set push SET_HARDFLOAT -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) .set mips32r2 .set fp=64 mfc0 t0, CP0_STATUS @@ -170,11 +170,11 @@ LEAF(_save_fp_context) LEAF(_restore_fp_context) EX lw t1, 0(a1) -#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ - defined(CONFIG_CPU_MIPSR6) +#if defined(CONFIG_64BIT) || defined(CONFIG_CPU_MIPSR2) || \ + defined(CONFIG_CPU_MIPSR5) || defined(CONFIG_CPU_MIPSR6) .set push SET_HARDFLOAT -#ifdef CONFIG_CPU_MIPSR2 +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) .set mips32r2 .set fp=64 mfc0 t0, CP0_STATUS diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S index 41df8221bb8f..50c9a57e0d3a 100644 --- a/arch/mips/kernel/scall64-o32.S +++ b/arch/mips/kernel/scall64-o32.S @@ -41,7 +41,7 @@ NESTED(handle_sys, PT_SIZE, sp) #if 0 SAVE_ALL move a1, v0 - PRINT("Scall %ld\n") + ASM_PRINT("Scall %ld\n") RESTORE_ALL #endif diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 10bef8f78e7c..7b537fa2035d 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -575,7 +575,7 @@ static int __init bootcmdline_scan_chosen(unsigned long node, const char *uname, #endif /* CONFIG_OF_EARLY_FLATTREE */ -static void __init bootcmdline_init(char **cmdline_p) +static void __init bootcmdline_init(void) { bool dt_bootargs = false; @@ -654,13 +654,11 @@ static void __init bootcmdline_init(char **cmdline_p) */ static void __init arch_mem_init(char **cmdline_p) { - extern void plat_mem_setup(void); - /* call board setup routine */ plat_mem_setup(); memblock_set_bottom_up(true); - bootcmdline_init(cmdline_p); + bootcmdline_init(); strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = command_line; @@ -702,7 +700,17 @@ static void __init arch_mem_init(char **cmdline_p) memblock_reserve(crashk_res.start, resource_size(&crashk_res)); #endif device_tree_init(); + + /* + * In order to reduce the possibility of kernel panic when failed to + * get IO TLB memory under CONFIG_SWIOTLB, it is better to allocate + * low memory as small as possible before plat_swiotlb_setup(), so + * make sparse_init() using top-down allocation. + */ + memblock_set_bottom_up(false); sparse_init(); + memblock_set_bottom_up(true); + plat_swiotlb_setup(); dma_contiguous_reserve(PFN_PHYS(max_low_pfn)); @@ -831,7 +839,7 @@ arch_initcall(debugfs_mips); /* User defined DMA coherency from command line. */ enum coherent_io_user_state coherentio = IO_COHERENCE_DEFAULT; EXPORT_SYMBOL_GPL(coherentio); -int hw_coherentio = 0; /* Actual hardware supported DMA coherency setting. */ +int hw_coherentio; /* Actual hardware supported DMA coherency setting. */ static int __init setcoherentio(char *str) { diff --git a/arch/mips/kernel/signal.c b/arch/mips/kernel/signal.c index f6efabcb4e92..a0262729cd4c 100644 --- a/arch/mips/kernel/signal.c +++ b/arch/mips/kernel/signal.c @@ -52,7 +52,7 @@ struct sigframe { /* Matches struct ucontext from its uc_mcontext field onwards */ struct sigcontext sf_sc; sigset_t sf_mask; - unsigned long long sf_extcontext[0]; + unsigned long long sf_extcontext[]; }; struct rt_sigframe { @@ -824,7 +824,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) regs->regs[2] = EINTR; break; } - /* fallthrough */ + fallthrough; case ERESTARTNOINTR: regs->regs[7] = regs->regs[26]; regs->regs[2] = regs->regs[0]; diff --git a/arch/mips/kernel/spram.c b/arch/mips/kernel/spram.c index 26d355462ace..d5d96214cce5 100644 --- a/arch/mips/kernel/spram.c +++ b/arch/mips/kernel/spram.c @@ -209,11 +209,11 @@ void spram_config(void) case CPU_P6600: config0 = read_c0_config(); /* FIXME: addresses are Malta specific */ - if (config0 & (1<<24)) { + if (config0 & MIPS_CONF_ISP) { probe_spram("ISPRAM", 0x1c000000, &ispram_load_tag, &ispram_store_tag); } - if (config0 & (1<<23)) + if (config0 & MIPS_CONF_DSP) probe_spram("DSPRAM", 0x1c100000, &dspram_load_tag, &dspram_store_tag); } diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 1f9e8ad636cc..f777141f5256 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -376,3 +376,4 @@ 435 n32 clone3 __sys_clone3 437 n32 openat2 sys_openat2 438 n32 pidfd_getfd sys_pidfd_getfd +439 n32 faccessat2 sys_faccessat2 diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index c0b9d802dbf6..da8c76394e17 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -352,3 +352,4 @@ 435 n64 clone3 __sys_clone3 437 n64 openat2 sys_openat2 438 n64 pidfd_getfd sys_pidfd_getfd +439 n64 faccessat2 sys_faccessat2 diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index ac586774c980..13280625d312 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -425,3 +425,4 @@ 435 o32 clone3 __sys_clone3 437 o32 openat2 sys_openat2 438 o32 pidfd_getfd sys_pidfd_getfd +439 o32 faccessat2 sys_faccessat2 diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c index 37e9413a393d..caa01457dce6 100644 --- a/arch/mips/kernel/time.c +++ b/arch/mips/kernel/time.c @@ -18,12 +18,82 @@ #include <linux/smp.h> #include <linux/spinlock.h> #include <linux/export.h> +#include <linux/cpufreq.h> +#include <linux/delay.h> #include <asm/cpu-features.h> #include <asm/cpu-type.h> #include <asm/div64.h> #include <asm/time.h> +#ifdef CONFIG_CPU_FREQ + +static DEFINE_PER_CPU(unsigned long, pcp_lpj_ref); +static DEFINE_PER_CPU(unsigned long, pcp_lpj_ref_freq); +static unsigned long glb_lpj_ref; +static unsigned long glb_lpj_ref_freq; + +static int cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpumask *cpus = freq->policy->cpus; + unsigned long lpj; + int cpu; + + /* + * Skip lpj numbers adjustment if the CPU-freq transition is safe for + * the loops delay. (Is this possible?) + */ + if (freq->flags & CPUFREQ_CONST_LOOPS) + return NOTIFY_OK; + + /* Save the initial values of the lpjes for future scaling. */ + if (!glb_lpj_ref) { + glb_lpj_ref = boot_cpu_data.udelay_val; + glb_lpj_ref_freq = freq->old; + + for_each_online_cpu(cpu) { + per_cpu(pcp_lpj_ref, cpu) = + cpu_data[cpu].udelay_val; + per_cpu(pcp_lpj_ref_freq, cpu) = freq->old; + } + } + + /* + * Adjust global lpj variable and per-CPU udelay_val number in + * accordance with the new CPU frequency. + */ + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + loops_per_jiffy = cpufreq_scale(glb_lpj_ref, + glb_lpj_ref_freq, + freq->new); + + for_each_cpu(cpu, cpus) { + lpj = cpufreq_scale(per_cpu(pcp_lpj_ref, cpu), + per_cpu(pcp_lpj_ref_freq, cpu), + freq->new); + cpu_data[cpu].udelay_val = (unsigned int)lpj; + } + } + + return NOTIFY_OK; +} + +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpufreq_callback, +}; + +static int __init register_cpufreq_notifier(void) +{ + return cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); +} +core_initcall(register_cpufreq_notifier); + +#endif /* CONFIG_CPU_FREQ */ + /* * forward reference */ diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 31968cbd6464..22f805a73921 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -71,6 +71,8 @@ #include <asm/tlbex.h> #include <asm/uasm.h> +#include <asm/mach-loongson64/cpucfg-emul.h> + extern void check_wait(void); extern asmlinkage void rollback_handle_int(void); extern asmlinkage void handle_int(void); @@ -693,6 +695,48 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode) return -1; /* Must be something else ... */ } +/* + * Loongson-3 CSR instructions emulation + */ + +#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION + +#define LWC2 0xc8000000 +#define RS BASE +#define CSR_OPCODE2 0x00000118 +#define CSR_OPCODE2_MASK 0x000007ff +#define CSR_FUNC_MASK RT +#define CSR_FUNC_CPUCFG 0x8 + +static int simulate_loongson3_cpucfg(struct pt_regs *regs, + unsigned int opcode) +{ + int op = opcode & OPCODE; + int op2 = opcode & CSR_OPCODE2_MASK; + int csr_func = (opcode & CSR_FUNC_MASK) >> 16; + + if (op == LWC2 && op2 == CSR_OPCODE2 && csr_func == CSR_FUNC_CPUCFG) { + int rd = (opcode & RD) >> 11; + int rs = (opcode & RS) >> 21; + __u64 sel = regs->regs[rs]; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, 0); + + /* Do not emulate on unsupported core models. */ + if (!loongson3_cpucfg_emulation_enabled(¤t_cpu_data)) + return -1; + + regs->regs[rd] = loongson3_cpucfg_read_synthesized( + ¤t_cpu_data, sel); + + return 0; + } + + /* Not ours. */ + return -1; +} +#endif /* CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION */ + asmlinkage void do_ov(struct pt_regs *regs) { enum ctx_state prev_state; @@ -1166,6 +1210,11 @@ no_r2_instr: if (status < 0) status = simulate_fp(regs, opcode, old_epc, old31); + +#ifdef CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION + if (status < 0) + status = simulate_loongson3_cpucfg(regs, opcode); +#endif } else if (cpu_has_mmips) { unsigned short mmop[2] = { 0 }; @@ -1401,8 +1450,7 @@ asmlinkage void do_cpu(struct pt_regs *regs) force_sig(SIGILL); break; } - /* Fall through. */ - + fallthrough; case 1: { void __user *fault_addr; unsigned long fcr31; diff --git a/arch/mips/kernel/unaligned.c b/arch/mips/kernel/unaligned.c index ca6fc4762d97..0adce604fa44 100644 --- a/arch/mips/kernel/unaligned.c +++ b/arch/mips/kernel/unaligned.c @@ -89,12 +89,10 @@ #include <asm/fpu.h> #include <asm/fpu_emulator.h> #include <asm/inst.h> +#include <asm/unaligned-emul.h> #include <asm/mmu_context.h> #include <linux/uaccess.h> -#define STR(x) __STR(x) -#define __STR(x) #x - enum { UNALIGNED_ACTION_QUIET, UNALIGNED_ACTION_SIGNAL, @@ -108,778 +106,6 @@ static u32 unaligned_action; #endif extern void show_registers(struct pt_regs *regs); -#ifdef __BIG_ENDIAN -#define _LoadHW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ (".set\tnoat\n" \ - "1:\t"type##_lb("%0", "0(%2)")"\n" \ - "2:\t"type##_lbu("$1", "1(%2)")"\n\t"\ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - "3:\t.set\tat\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _LoadW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_lwl("%0", "(%2)")"\n" \ - "2:\t"type##_lwr("%0", "3(%2)")"\n\t"\ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -/* For CPUs without lwl instruction */ -#define _LoadW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n" \ - ".set\tnoat\n\t" \ - "1:"type##_lb("%0", "0(%2)")"\n\t" \ - "2:"type##_lbu("$1", "1(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:"type##_lbu("$1", "2(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:"type##_lbu("$1", "3(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ - -#define _LoadHWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tnoat\n" \ - "1:\t"type##_lbu("%0", "0(%2)")"\n" \ - "2:\t"type##_lbu("$1", "1(%2)")"\n\t"\ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".set\tat\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _LoadWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_lwl("%0", "(%2)")"\n" \ - "2:\t"type##_lwr("%0", "3(%2)")"\n\t"\ - "dsll\t%0, %0, 32\n\t" \ - "dsrl\t%0, %0, 32\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - "\t.section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _LoadDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - "1:\tldl\t%0, (%2)\n" \ - "2:\tldr\t%0, 7(%2)\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - "\t.section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -/* For CPUs without lwl and ldl instructions */ -#define _LoadWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:"type##_lbu("%0", "0(%2)")"\n\t" \ - "2:"type##_lbu("$1", "1(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:"type##_lbu("$1", "2(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:"type##_lbu("$1", "3(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _LoadDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:lb\t%0, 0(%2)\n\t" \ - "2:lbu\t $1, 1(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:lbu\t$1, 2(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:lbu\t$1, 3(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "5:lbu\t$1, 4(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "6:lbu\t$1, 5(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "7:lbu\t$1, 6(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "8:lbu\t$1, 7(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n\t" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ - - -#define _StoreHW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tnoat\n" \ - "1:\t"type##_sb("%1", "1(%2)")"\n" \ - "srl\t$1, %1, 0x8\n" \ - "2:\t"type##_sb("$1", "0(%2)")"\n" \ - ".set\tat\n\t" \ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT));\ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _StoreW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_swl("%1", "(%2)")"\n" \ - "2:\t"type##_swr("%1", "3(%2)")"\n\t"\ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _StoreDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - "1:\tsdl\t%1,(%2)\n" \ - "2:\tsdr\t%1, 7(%2)\n\t" \ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -#define _StoreW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:"type##_sb("%1", "3(%2)")"\n\t" \ - "srl\t$1, %1, 0x8\n\t" \ - "2:"type##_sb("$1", "2(%2)")"\n\t" \ - "srl\t$1, $1, 0x8\n\t" \ - "3:"type##_sb("$1", "1(%2)")"\n\t" \ - "srl\t$1, $1, 0x8\n\t" \ - "4:"type##_sb("$1", "0(%2)")"\n\t" \ - ".set\tpop\n\t" \ - "li\t%0, 0\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%0, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT) \ - : "memory"); \ -} while(0) - -#define _StoreDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:sb\t%1, 7(%2)\n\t" \ - "dsrl\t$1, %1, 0x8\n\t" \ - "2:sb\t$1, 6(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "3:sb\t$1, 5(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "4:sb\t$1, 4(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "5:sb\t$1, 3(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "6:sb\t$1, 2(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "7:sb\t$1, 1(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "8:sb\t$1, 0(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - ".set\tpop\n\t" \ - "li\t%0, 0\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%0, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ - ".previous" \ - : "=&r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT) \ - : "memory"); \ -} while(0) - -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ - -#else /* __BIG_ENDIAN */ - -#define _LoadHW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ (".set\tnoat\n" \ - "1:\t"type##_lb("%0", "1(%2)")"\n" \ - "2:\t"type##_lbu("$1", "0(%2)")"\n\t"\ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - "3:\t.set\tat\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _LoadW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_lwl("%0", "3(%2)")"\n" \ - "2:\t"type##_lwr("%0", "(%2)")"\n\t"\ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -/* For CPUs without lwl instruction */ -#define _LoadW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n" \ - ".set\tnoat\n\t" \ - "1:"type##_lb("%0", "3(%2)")"\n\t" \ - "2:"type##_lbu("$1", "2(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:"type##_lbu("$1", "1(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:"type##_lbu("$1", "0(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ - - -#define _LoadHWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tnoat\n" \ - "1:\t"type##_lbu("%0", "1(%2)")"\n" \ - "2:\t"type##_lbu("$1", "0(%2)")"\n\t"\ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".set\tat\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _LoadWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_lwl("%0", "3(%2)")"\n" \ - "2:\t"type##_lwr("%0", "(%2)")"\n\t"\ - "dsll\t%0, %0, 32\n\t" \ - "dsrl\t%0, %0, 32\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - "\t.section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _LoadDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - "1:\tldl\t%0, 7(%2)\n" \ - "2:\tldr\t%0, (%2)\n\t" \ - "li\t%1, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - "\t.section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%1, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -/* For CPUs without lwl and ldl instructions */ -#define _LoadWU(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:"type##_lbu("%0", "3(%2)")"\n\t" \ - "2:"type##_lbu("$1", "2(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:"type##_lbu("$1", "1(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:"type##_lbu("$1", "0(%2)")"\n\t" \ - "sll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _LoadDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:lb\t%0, 7(%2)\n\t" \ - "2:lbu\t$1, 6(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "3:lbu\t$1, 5(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "4:lbu\t$1, 4(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "5:lbu\t$1, 3(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "6:lbu\t$1, 2(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "7:lbu\t$1, 1(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "8:lbu\t$1, 0(%2)\n\t" \ - "dsll\t%0, 0x8\n\t" \ - "or\t%0, $1\n\t" \ - "li\t%1, 0\n" \ - ".set\tpop\n\t" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%1, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ - ".previous" \ - : "=&r" (value), "=r" (res) \ - : "r" (addr), "i" (-EFAULT)); \ -} while(0) -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ - -#define _StoreHW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tnoat\n" \ - "1:\t"type##_sb("%1", "0(%2)")"\n" \ - "srl\t$1,%1, 0x8\n" \ - "2:\t"type##_sb("$1", "1(%2)")"\n" \ - ".set\tat\n\t" \ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT));\ -} while(0) - -#ifndef CONFIG_CPU_NO_LOAD_STORE_LR -#define _StoreW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - "1:\t"type##_swl("%1", "3(%2)")"\n" \ - "2:\t"type##_swr("%1", "(%2)")"\n\t"\ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#define _StoreDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - "1:\tsdl\t%1, 7(%2)\n" \ - "2:\tsdr\t%1, (%2)\n\t" \ - "li\t%0, 0\n" \ - "3:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "4:\tli\t%0, %3\n\t" \ - "j\t3b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 4b\n\t" \ - STR(PTR)"\t2b, 4b\n\t" \ - ".previous" \ - : "=r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT)); \ -} while(0) - -#else /* CONFIG_CPU_NO_LOAD_STORE_LR */ -/* For CPUs without swl and sdl instructions */ -#define _StoreW(addr, value, res, type) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:"type##_sb("%1", "0(%2)")"\n\t" \ - "srl\t$1, %1, 0x8\n\t" \ - "2:"type##_sb("$1", "1(%2)")"\n\t" \ - "srl\t$1, $1, 0x8\n\t" \ - "3:"type##_sb("$1", "2(%2)")"\n\t" \ - "srl\t$1, $1, 0x8\n\t" \ - "4:"type##_sb("$1", "3(%2)")"\n\t" \ - ".set\tpop\n\t" \ - "li\t%0, 0\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%0, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - ".previous" \ - : "=&r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT) \ - : "memory"); \ -} while(0) - -#define _StoreDW(addr, value, res) \ -do { \ - __asm__ __volatile__ ( \ - ".set\tpush\n\t" \ - ".set\tnoat\n\t" \ - "1:sb\t%1, 0(%2)\n\t" \ - "dsrl\t$1, %1, 0x8\n\t" \ - "2:sb\t$1, 1(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "3:sb\t$1, 2(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "4:sb\t$1, 3(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "5:sb\t$1, 4(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "6:sb\t$1, 5(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "7:sb\t$1, 6(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - "8:sb\t$1, 7(%2)\n\t" \ - "dsrl\t$1, $1, 0x8\n\t" \ - ".set\tpop\n\t" \ - "li\t%0, 0\n" \ - "10:\n\t" \ - ".insn\n\t" \ - ".section\t.fixup,\"ax\"\n\t" \ - "11:\tli\t%0, %3\n\t" \ - "j\t10b\n\t" \ - ".previous\n\t" \ - ".section\t__ex_table,\"a\"\n\t" \ - STR(PTR)"\t1b, 11b\n\t" \ - STR(PTR)"\t2b, 11b\n\t" \ - STR(PTR)"\t3b, 11b\n\t" \ - STR(PTR)"\t4b, 11b\n\t" \ - STR(PTR)"\t5b, 11b\n\t" \ - STR(PTR)"\t6b, 11b\n\t" \ - STR(PTR)"\t7b, 11b\n\t" \ - STR(PTR)"\t8b, 11b\n\t" \ - ".previous" \ - : "=&r" (res) \ - : "r" (value), "r" (addr), "i" (-EFAULT) \ - : "memory"); \ -} while(0) - -#endif /* CONFIG_CPU_NO_LOAD_STORE_LR */ -#endif - -#define LoadHWU(addr, value, res) _LoadHWU(addr, value, res, kernel) -#define LoadHWUE(addr, value, res) _LoadHWU(addr, value, res, user) -#define LoadWU(addr, value, res) _LoadWU(addr, value, res, kernel) -#define LoadWUE(addr, value, res) _LoadWU(addr, value, res, user) -#define LoadHW(addr, value, res) _LoadHW(addr, value, res, kernel) -#define LoadHWE(addr, value, res) _LoadHW(addr, value, res, user) -#define LoadW(addr, value, res) _LoadW(addr, value, res, kernel) -#define LoadWE(addr, value, res) _LoadW(addr, value, res, user) -#define LoadDW(addr, value, res) _LoadDW(addr, value, res) - -#define StoreHW(addr, value, res) _StoreHW(addr, value, res, kernel) -#define StoreHWE(addr, value, res) _StoreHW(addr, value, res, user) -#define StoreW(addr, value, res) _StoreW(addr, value, res, kernel) -#define StoreWE(addr, value, res) _StoreW(addr, value, res, user) -#define StoreDW(addr, value, res) _StoreDW(addr, value, res) - static void emulate_load_store_insn(struct pt_regs *regs, void __user *addr, unsigned int __user *pc) { diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index a5f00ec73ea6..f185a85a27c1 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -55,7 +55,7 @@ SECTIONS /* . = 0xa800000000300000; */ . = 0xffffffff80300000; #endif - . = VMLINUX_LOAD_ADDRESS; + . = LINKER_LOAD_ADDRESS; /* read-only */ _text = .; /* Text and read-only data */ .text : { diff --git a/arch/mips/kernel/watch.c b/arch/mips/kernel/watch.c index ba73b4077668..c9263b95cb2e 100644 --- a/arch/mips/kernel/watch.c +++ b/arch/mips/kernel/watch.c @@ -27,15 +27,15 @@ void mips_install_watch_registers(struct task_struct *t) case 4: write_c0_watchlo3(watches->watchlo[3]); write_c0_watchhi3(watchhi | watches->watchhi[3]); - /* fall through */ + fallthrough; case 3: write_c0_watchlo2(watches->watchlo[2]); write_c0_watchhi2(watchhi | watches->watchhi[2]); - /* fall through */ + fallthrough; case 2: write_c0_watchlo1(watches->watchlo[1]); write_c0_watchhi1(watchhi | watches->watchhi[1]); - /* fall through */ + fallthrough; case 1: write_c0_watchlo0(watches->watchlo[0]); write_c0_watchhi0(watchhi | watches->watchhi[0]); @@ -58,13 +58,13 @@ void mips_read_watch_registers(void) BUG(); case 4: watches->watchhi[3] = (read_c0_watchhi3() & watchhi_mask); - /* fall through */ + fallthrough; case 3: watches->watchhi[2] = (read_c0_watchhi2() & watchhi_mask); - /* fall through */ + fallthrough; case 2: watches->watchhi[1] = (read_c0_watchhi1() & watchhi_mask); - /* fall through */ + fallthrough; case 1: watches->watchhi[0] = (read_c0_watchhi0() & watchhi_mask); } @@ -91,25 +91,25 @@ void mips_clear_watch_registers(void) BUG(); case 8: write_c0_watchlo7(0); - /* fall through */ + fallthrough; case 7: write_c0_watchlo6(0); - /* fall through */ + fallthrough; case 6: write_c0_watchlo5(0); - /* fall through */ + fallthrough; case 5: write_c0_watchlo4(0); - /* fall through */ + fallthrough; case 4: write_c0_watchlo3(0); - /* fall through */ + fallthrough; case 3: write_c0_watchlo2(0); - /* fall through */ + fallthrough; case 2: write_c0_watchlo1(0); - /* fall through */ + fallthrough; case 1: write_c0_watchlo0(0); } diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 754094b40a75..7ccf9b096783 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -64,7 +64,7 @@ static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc, switch (insn.r_format.func) { case jalr_op: arch->gprs[insn.r_format.rd] = epc + 8; - /* Fall through */ + fallthrough; case jr_op: nextpc = arch->gprs[insn.r_format.rs]; break; @@ -140,7 +140,7 @@ static int kvm_compute_return_epc(struct kvm_vcpu *vcpu, unsigned long instpc, /* These are unconditional and in j_format. */ case jal_op: arch->gprs[31] = instpc + 8; - /* fall through */ + fallthrough; case j_op: epc += 4; epc >>= 28; @@ -1724,14 +1724,14 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, case lhu_op: vcpu->mmio_needed = 1; /* unsigned */ - /* fall through */ + fallthrough; case lh_op: run->mmio.len = 2; break; case lbu_op: vcpu->mmio_needed = 1; /* unsigned */ - /* fall through */ + fallthrough; case lb_op: run->mmio.len = 1; break; @@ -1790,7 +1790,7 @@ static enum emulation_result kvm_mips_guest_cache_op(int (*fn)(unsigned long), return EMULATE_EXCEPT; default: break; - }; + } } } @@ -1965,7 +1965,7 @@ enum emulation_result kvm_mips_emulate_inst(u32 cause, u32 *opc, break; default: goto unknown; - }; + } break; unknown: #endif diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 8f05dd0a0f4e..3b0148c99c0d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -39,40 +39,41 @@ #define VECTORSPACING 0x100 /* for EI/VI mode */ #endif -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x) struct kvm_stats_debugfs_item debugfs_entries[] = { - { "wait", VCPU_STAT(wait_exits), KVM_STAT_VCPU }, - { "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU }, - { "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU }, - { "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU }, - { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, - { "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU }, - { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU }, - { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU }, - { "addrerr_st", VCPU_STAT(addrerr_st_exits), KVM_STAT_VCPU }, - { "addrerr_ld", VCPU_STAT(addrerr_ld_exits), KVM_STAT_VCPU }, - { "syscall", VCPU_STAT(syscall_exits), KVM_STAT_VCPU }, - { "resvd_inst", VCPU_STAT(resvd_inst_exits), KVM_STAT_VCPU }, - { "break_inst", VCPU_STAT(break_inst_exits), KVM_STAT_VCPU }, - { "trap_inst", VCPU_STAT(trap_inst_exits), KVM_STAT_VCPU }, - { "msa_fpe", VCPU_STAT(msa_fpe_exits), KVM_STAT_VCPU }, - { "fpe", VCPU_STAT(fpe_exits), KVM_STAT_VCPU }, - { "msa_disabled", VCPU_STAT(msa_disabled_exits), KVM_STAT_VCPU }, - { "flush_dcache", VCPU_STAT(flush_dcache_exits), KVM_STAT_VCPU }, + VCPU_STAT("wait", wait_exits), + VCPU_STAT("cache", cache_exits), + VCPU_STAT("signal", signal_exits), + VCPU_STAT("interrupt", int_exits), + VCPU_STAT("cop_unusable", cop_unusable_exits), + VCPU_STAT("tlbmod", tlbmod_exits), + VCPU_STAT("tlbmiss_ld", tlbmiss_ld_exits), + VCPU_STAT("tlbmiss_st", tlbmiss_st_exits), + VCPU_STAT("addrerr_st", addrerr_st_exits), + VCPU_STAT("addrerr_ld", addrerr_ld_exits), + VCPU_STAT("syscall", syscall_exits), + VCPU_STAT("resvd_inst", resvd_inst_exits), + VCPU_STAT("break_inst", break_inst_exits), + VCPU_STAT("trap_inst", trap_inst_exits), + VCPU_STAT("msa_fpe", msa_fpe_exits), + VCPU_STAT("fpe", fpe_exits), + VCPU_STAT("msa_disabled", msa_disabled_exits), + VCPU_STAT("flush_dcache", flush_dcache_exits), #ifdef CONFIG_KVM_MIPS_VZ - { "vz_gpsi", VCPU_STAT(vz_gpsi_exits), KVM_STAT_VCPU }, - { "vz_gsfc", VCPU_STAT(vz_gsfc_exits), KVM_STAT_VCPU }, - { "vz_hc", VCPU_STAT(vz_hc_exits), KVM_STAT_VCPU }, - { "vz_grr", VCPU_STAT(vz_grr_exits), KVM_STAT_VCPU }, - { "vz_gva", VCPU_STAT(vz_gva_exits), KVM_STAT_VCPU }, - { "vz_ghfc", VCPU_STAT(vz_ghfc_exits), KVM_STAT_VCPU }, - { "vz_gpa", VCPU_STAT(vz_gpa_exits), KVM_STAT_VCPU }, - { "vz_resvd", VCPU_STAT(vz_resvd_exits), KVM_STAT_VCPU }, + VCPU_STAT("vz_gpsi", vz_gpsi_exits), + VCPU_STAT("vz_gsfc", vz_gsfc_exits), + VCPU_STAT("vz_hc", vz_hc_exits), + VCPU_STAT("vz_grr", vz_grr_exits), + VCPU_STAT("vz_gva", vz_gva_exits), + VCPU_STAT("vz_ghfc", vz_ghfc_exits), + VCPU_STAT("vz_gpa", vz_gpa_exits), + VCPU_STAT("vz_resvd", vz_resvd_exits), #endif - { "halt_successful_poll", VCPU_STAT(halt_successful_poll), KVM_STAT_VCPU }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), KVM_STAT_VCPU }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid), KVM_STAT_VCPU }, - { "halt_wakeup", VCPU_STAT(halt_wakeup), KVM_STAT_VCPU }, + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), {NULL} }; @@ -80,13 +81,13 @@ bool kvm_trace_guest_mode_change; int kvm_guest_mode_change_trace_reg(void) { - kvm_trace_guest_mode_change = 1; + kvm_trace_guest_mode_change = true; return 0; } void kvm_guest_mode_change_trace_unreg(void) { - kvm_trace_guest_mode_change = 0; + kvm_trace_guest_mode_change = false; } /* @@ -284,8 +285,7 @@ static enum hrtimer_restart kvm_mips_comparecount_wakeup(struct hrtimer *timer) kvm_mips_callbacks->queue_timer_int(vcpu); vcpu->arch.wait = 0; - if (swq_has_sleeper(&vcpu->wq)) - swake_up_one(&vcpu->wq); + rcuwait_wake_up(&vcpu->wait); return kvm_mips_count_timeout(vcpu); } @@ -439,8 +439,9 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, return -ENOIOCTLCMD; } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; int r = -EINTR; vcpu_load(vcpu); @@ -511,8 +512,7 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, dvcpu->arch.wait = 0; - if (swq_has_sleeper(&dvcpu->wq)) - swake_up_one(&dvcpu->wq); + rcuwait_wake_up(&dvcpu->wait); return 0; } diff --git a/arch/mips/kvm/tlb.c b/arch/mips/kvm/tlb.c index 7cd92166a0b9..5d436c5216cc 100644 --- a/arch/mips/kvm/tlb.c +++ b/arch/mips/kvm/tlb.c @@ -469,7 +469,7 @@ void kvm_vz_local_flush_guesttlb_all(void) cvmmemctl2 |= CVMMEMCTL2_INHIBITTS; write_c0_cvmmemctl2(cvmmemctl2); break; - }; + } /* Invalidate guest entries in guest TLB */ write_gc0_entrylo0(0); @@ -486,7 +486,7 @@ void kvm_vz_local_flush_guesttlb_all(void) if (cvmmemctl2) { cvmmemctl2 &= ~CVMMEMCTL2_INHIBITTS; write_c0_cvmmemctl2(cvmmemctl2); - }; + } write_gc0_index(old_index); write_gc0_entryhi(old_entryhi); diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index dde20887a70d..51f51009a53f 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -1118,7 +1118,7 @@ static enum emulation_result kvm_vz_gpsi_cache(union mips_instruction inst, break; default: break; - }; + } kvm_err("@ %#lx/%#lx CACHE (cache: %#x, op: %#x, base[%d]: %#lx, offset: %#x\n", curr_pc, vcpu->arch.gprs[31], cache, op, base, arch->gprs[base], @@ -1183,7 +1183,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, KVM_TRACE_HWR(rd, sel), 0); goto unknown; - }; + } trace_kvm_hwr(vcpu, KVM_TRACE_RDHWR, KVM_TRACE_HWR(rd, sel), arch->gprs[rt]); @@ -1192,7 +1192,7 @@ static enum emulation_result kvm_trap_vz_handle_gpsi(u32 cause, u32 *opc, break; default: goto unknown; - }; + } break; unknown: @@ -1946,7 +1946,7 @@ static int kvm_vz_get_one_reg(struct kvm_vcpu *vcpu, default: *v = (long)kvm_read_c0_guest_prid(cop0); break; - }; + } break; case KVM_REG_MIPS_CP0_EBASE: *v = kvm_vz_read_gc0_ebase(); @@ -2185,7 +2185,7 @@ static int kvm_vz_set_one_reg(struct kvm_vcpu *vcpu, default: kvm_write_c0_guest_prid(cop0, v); break; - }; + } break; case KVM_REG_MIPS_CP0_EBASE: kvm_vz_write_gc0_ebase(v); @@ -2980,7 +2980,7 @@ static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu) */ /* PageGrain */ - if (cpu_has_mips_r6) + if (cpu_has_mips_r5 || cpu_has_mips_r6) kvm_write_sw_gc0_pagegrain(cop0, PG_RIE | PG_XIE | PG_IEC); /* Wired */ if (cpu_has_mips_r6) @@ -2988,7 +2988,7 @@ static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu) read_gc0_wired() & MIPSR6_WIRED_LIMIT); /* Status */ kvm_write_sw_gc0_status(cop0, ST0_BEV | ST0_ERL); - if (cpu_has_mips_r6) + if (cpu_has_mips_r5 || cpu_has_mips_r6) kvm_change_sw_gc0_status(cop0, ST0_FR, read_gc0_status()); /* IntCtl */ kvm_write_sw_gc0_intctl(cop0, read_gc0_intctl() & @@ -3086,7 +3086,7 @@ static int kvm_vz_vcpu_setup(struct kvm_vcpu *vcpu) } /* reset HTW registers */ - if (cpu_guest_has_htw && cpu_has_mips_r6) { + if (cpu_guest_has_htw && (cpu_has_mips_r5 || cpu_has_mips_r6)) { /* PWField */ kvm_write_sw_gc0_pwfield(cop0, 0x0c30c302); /* PWSize */ diff --git a/arch/mips/lantiq/Platform b/arch/mips/lantiq/Platform index b3ec49838fd7..0bc9c0fbd431 100644 --- a/arch/mips/lantiq/Platform +++ b/arch/mips/lantiq/Platform @@ -2,7 +2,6 @@ # Lantiq # -platform-$(CONFIG_LANTIQ) += lantiq/ cflags-$(CONFIG_LANTIQ) += -I$(srctree)/arch/mips/include/asm/mach-lantiq load-$(CONFIG_LANTIQ) = 0xffffffff80002000 cflags-$(CONFIG_SOC_TYPE_XWAY) += -I$(srctree)/arch/mips/include/asm/mach-lantiq/xway diff --git a/arch/mips/lasat/Kconfig b/arch/mips/lasat/Kconfig deleted file mode 100644 index 11b89e94b835..000000000000 --- a/arch/mips/lasat/Kconfig +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -config PICVUE - tristate "PICVUE LCD display driver" - depends on LASAT - -config PICVUE_PROC - tristate "PICVUE LCD display driver /proc interface" - depends on PICVUE && PROC_FS - -config DS1603 - bool "DS1603 RTC driver" - depends on LASAT - -config LASAT_SYSCTL - bool "LASAT sysctl interface" - depends on LASAT diff --git a/arch/mips/lasat/Makefile b/arch/mips/lasat/Makefile deleted file mode 100644 index 1789b227ef20..000000000000 --- a/arch/mips/lasat/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the LASAT specific kernel interface routines under Linux. -# - -obj-y += reset.o setup.o prom.o lasat_board.o \ - at93c.o interrupt.o serial.o - -obj-$(CONFIG_LASAT_SYSCTL) += sysctl.o -obj-$(CONFIG_DS1603) += ds1603.o -obj-$(CONFIG_PICVUE) += picvue.o -obj-$(CONFIG_PICVUE_PROC) += picvue_proc.o - -clean: - make -C image clean diff --git a/arch/mips/lasat/Platform b/arch/mips/lasat/Platform deleted file mode 100644 index 760252828bf1..000000000000 --- a/arch/mips/lasat/Platform +++ /dev/null @@ -1,7 +0,0 @@ -# -# LASAT platforms -# -platform-$(CONFIG_LASAT) += lasat/ -cflags-$(CONFIG_LASAT) += \ - -I$(srctree)/arch/mips/include/asm/mach-lasat -load-$(CONFIG_LASAT) += 0xffffffff80000000 diff --git a/arch/mips/lasat/at93c.c b/arch/mips/lasat/at93c.c deleted file mode 100644 index f895fe94b937..000000000000 --- a/arch/mips/lasat/at93c.c +++ /dev/null @@ -1,148 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Atmel AT93C46 serial eeprom driver - * - * Brian Murphy <brian.murphy@eicon.com> - * - */ -#include <linux/kernel.h> -#include <linux/delay.h> -#include <asm/lasat/lasat.h> - -#include "at93c.h" - -#define AT93C_ADDR_SHIFT 7 -#define AT93C_ADDR_MAX ((1 << AT93C_ADDR_SHIFT) - 1) -#define AT93C_RCMD (0x6 << AT93C_ADDR_SHIFT) -#define AT93C_WCMD (0x5 << AT93C_ADDR_SHIFT) -#define AT93C_WENCMD 0x260 -#define AT93C_WDSCMD 0x200 - -struct at93c_defs *at93c; - -static void at93c_reg_write(u32 val) -{ - *at93c->reg = val; -} - -static u32 at93c_reg_read(void) -{ - u32 tmp = *at93c->reg; - return tmp; -} - -static u32 at93c_datareg_read(void) -{ - u32 tmp = *at93c->rdata_reg; - return tmp; -} - -static void at93c_cycle_clk(u32 data) -{ - at93c_reg_write(data | at93c->clk); - lasat_ndelay(250); - at93c_reg_write(data & ~at93c->clk); - lasat_ndelay(250); -} - -static void at93c_write_databit(u8 bit) -{ - u32 data = at93c_reg_read(); - if (bit) - data |= 1 << at93c->wdata_shift; - else - data &= ~(1 << at93c->wdata_shift); - - at93c_reg_write(data); - lasat_ndelay(100); - at93c_cycle_clk(data); -} - -static unsigned int at93c_read_databit(void) -{ - u32 data; - - at93c_cycle_clk(at93c_reg_read()); - data = (at93c_datareg_read() >> at93c->rdata_shift) & 1; - return data; -} - -static u8 at93c_read_byte(void) -{ - int i; - u8 data = 0; - - for (i = 0; i <= 7; i++) { - data <<= 1; - data |= at93c_read_databit(); - } - return data; -} - -static void at93c_write_bits(u32 data, int size) -{ - int i; - int shift = size - 1; - u32 mask = (1 << shift); - - for (i = 0; i < size; i++) { - at93c_write_databit((data & mask) >> shift); - data <<= 1; - } -} - -static void at93c_init_op(void) -{ - at93c_reg_write((at93c_reg_read() | at93c->cs) & - ~at93c->clk & ~(1 << at93c->rdata_shift)); - lasat_ndelay(50); -} - -static void at93c_end_op(void) -{ - at93c_reg_write(at93c_reg_read() & ~at93c->cs); - lasat_ndelay(250); -} - -static void at93c_wait(void) -{ - at93c_init_op(); - while (!at93c_read_databit()) - ; - at93c_end_op(); -}; - -static void at93c_disable_wp(void) -{ - at93c_init_op(); - at93c_write_bits(AT93C_WENCMD, 10); - at93c_end_op(); -} - -static void at93c_enable_wp(void) -{ - at93c_init_op(); - at93c_write_bits(AT93C_WDSCMD, 10); - at93c_end_op(); -} - -u8 at93c_read(u8 addr) -{ - u8 byte; - at93c_init_op(); - at93c_write_bits((addr & AT93C_ADDR_MAX)|AT93C_RCMD, 10); - byte = at93c_read_byte(); - at93c_end_op(); - return byte; -} - -void at93c_write(u8 addr, u8 data) -{ - at93c_disable_wp(); - at93c_init_op(); - at93c_write_bits((addr & AT93C_ADDR_MAX)|AT93C_WCMD, 10); - at93c_write_bits(data, 8); - at93c_end_op(); - at93c_wait(); - at93c_enable_wp(); -} diff --git a/arch/mips/lasat/at93c.h b/arch/mips/lasat/at93c.h deleted file mode 100644 index 7a99a02d81d0..000000000000 --- a/arch/mips/lasat/at93c.h +++ /dev/null @@ -1,19 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Atmel AT93C46 serial eeprom driver - * - * Brian Murphy <brian.murphy@eicon.com> - * - */ - -extern struct at93c_defs { - volatile u32 *reg; - volatile u32 *rdata_reg; - int rdata_shift; - int wdata_shift; - u32 cs; - u32 clk; -} *at93c; - -u8 at93c_read(u8 addr); -void at93c_write(u8 addr, u8 data); diff --git a/arch/mips/lasat/ds1603.c b/arch/mips/lasat/ds1603.c deleted file mode 100644 index e6ce39fefa78..000000000000 --- a/arch/mips/lasat/ds1603.c +++ /dev/null @@ -1,190 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Dallas Semiconductors 1603 RTC driver - * - * Brian Murphy <brian@murphy.dk> - * - */ -#include <linux/kernel.h> -#include <asm/lasat/lasat.h> -#include <linux/delay.h> -#include <asm/lasat/ds1603.h> -#include <asm/time.h> - -#include "ds1603.h" - -#define READ_TIME_CMD 0x81 -#define SET_TIME_CMD 0x80 -#define TRIMMER_SET_CMD 0xC0 -#define TRIMMER_VALUE_MASK 0x38 -#define TRIMMER_SHIFT 3 - -struct ds_defs *ds1603; - -/* HW specific register functions */ -static void rtc_reg_write(unsigned long val) -{ - *ds1603->reg = val; -} - -static unsigned long rtc_reg_read(void) -{ - unsigned long tmp = *ds1603->reg; - return tmp; -} - -static unsigned long rtc_datareg_read(void) -{ - unsigned long tmp = *ds1603->data_reg; - return tmp; -} - -static void rtc_nrst_high(void) -{ - rtc_reg_write(rtc_reg_read() | ds1603->rst); -} - -static void rtc_nrst_low(void) -{ - rtc_reg_write(rtc_reg_read() & ~ds1603->rst); -} - -static void rtc_cycle_clock(unsigned long data) -{ - data |= ds1603->clk; - rtc_reg_write(data); - lasat_ndelay(250); - if (ds1603->data_reversed) - data &= ~ds1603->data; - else - data |= ds1603->data; - data &= ~ds1603->clk; - rtc_reg_write(data); - lasat_ndelay(250 + ds1603->huge_delay); -} - -static void rtc_write_databit(unsigned int bit) -{ - unsigned long data = rtc_reg_read(); - if (ds1603->data_reversed) - bit = !bit; - if (bit) - data |= ds1603->data; - else - data &= ~ds1603->data; - - rtc_reg_write(data); - lasat_ndelay(50 + ds1603->huge_delay); - rtc_cycle_clock(data); -} - -static unsigned int rtc_read_databit(void) -{ - unsigned int data; - - data = (rtc_datareg_read() & (1 << ds1603->data_read_shift)) - >> ds1603->data_read_shift; - rtc_cycle_clock(rtc_reg_read()); - return data; -} - -static void rtc_write_byte(unsigned int byte) -{ - int i; - - for (i = 0; i <= 7; i++) { - rtc_write_databit(byte & 1L); - byte >>= 1; - } -} - -static void rtc_write_word(unsigned long word) -{ - int i; - - for (i = 0; i <= 31; i++) { - rtc_write_databit(word & 1L); - word >>= 1; - } -} - -static unsigned long rtc_read_word(void) -{ - int i; - unsigned long word = 0; - unsigned long shift = 0; - - for (i = 0; i <= 31; i++) { - word |= rtc_read_databit() << shift; - shift++; - } - return word; -} - -static void rtc_init_op(void) -{ - rtc_nrst_high(); - - rtc_reg_write(rtc_reg_read() & ~ds1603->clk); - - lasat_ndelay(50); -} - -static void rtc_end_op(void) -{ - rtc_nrst_low(); - lasat_ndelay(1000); -} - -void read_persistent_clock64(struct timespec64 *ts) -{ - unsigned long word; - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - rtc_init_op(); - rtc_write_byte(READ_TIME_CMD); - word = rtc_read_word(); - rtc_end_op(); - spin_unlock_irqrestore(&rtc_lock, flags); - - ts->tv_sec = word; - ts->tv_nsec = 0; -} - -int update_persistent_clock64(struct timespec64 now) -{ - time64_t time = now.tv_sec; - unsigned long flags; - - spin_lock_irqsave(&rtc_lock, flags); - rtc_init_op(); - rtc_write_byte(SET_TIME_CMD); - /* - * Due to the hardware limitation, we cast to 'unsigned long' type, - * so it will overflow in year 2106 on 32-bit machine. - */ - rtc_write_word((unsigned long)time); - rtc_end_op(); - spin_unlock_irqrestore(&rtc_lock, flags); - - return 0; -} - -void ds1603_set_trimmer(unsigned int trimval) -{ - rtc_init_op(); - rtc_write_byte(((trimval << TRIMMER_SHIFT) & TRIMMER_VALUE_MASK) - | (TRIMMER_SET_CMD)); - rtc_end_op(); -} - -void ds1603_disable(void) -{ - ds1603_set_trimmer(TRIMMER_DISABLE_RTC); -} - -void ds1603_enable(void) -{ - ds1603_set_trimmer(TRIMMER_DEFAULT); -} diff --git a/arch/mips/lasat/ds1603.h b/arch/mips/lasat/ds1603.h deleted file mode 100644 index 00987d3bdc21..000000000000 --- a/arch/mips/lasat/ds1603.h +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Dallas Semiconductors 1603 RTC driver - * - * Brian Murphy <brian@murphy.dk> - * - */ -#ifndef __DS1603_H -#define __DS1603_H - -struct ds_defs { - volatile u32 *reg; - volatile u32 *data_reg; - u32 rst; - u32 clk; - u32 data; - u32 data_read_shift; - char data_reversed; - u32 huge_delay; -}; - -extern struct ds_defs *ds1603; - -void ds1603_set_trimmer(unsigned int); -void ds1603_enable(void); -void ds1603_disable(void); -void ds1603_init(struct ds_defs *); - -#define TRIMMER_DEFAULT 3 -#define TRIMMER_DISABLE_RTC 0 - -#endif diff --git a/arch/mips/lasat/image/Makefile b/arch/mips/lasat/image/Makefile deleted file mode 100644 index 78ce4cff1012..000000000000 --- a/arch/mips/lasat/image/Makefile +++ /dev/null @@ -1,53 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# MAKEFILE FOR THE MIPS LINUX BOOTLOADER AND ROM DEBUGGER -# -# i-data Networks -# -# Author: Thomas Horsten <thh@i-data.com> -# - -ifndef Version - Version = "$(USER)-test" -endif - -MKLASATIMG = mklasatimg -MKLASATIMG_ARCH = mq2,mqpro,sp100,sp200 -KERNEL_IMAGE = vmlinux - -LDSCRIPT= -L$(srctree)/$(src) -Tromscript.normal - -HEAD_DEFINES := -D_kernel_start=$(VMLINUX_LOAD_ADDRESS) \ - -D_kernel_entry=$(VMLINUX_ENTRY_ADDRESS) \ - -D VERSION="\"$(Version)\"" \ - -D TIMESTAMP=$(shell date +%s) - -$(obj)/head.o: $(obj)/head.S $(KERNEL_IMAGE) - $(CC) -fno-pic $(HEAD_DEFINES) $(LINUXINCLUDE) -c -o $@ $< - -OBJECTS = head.o kImage.o - -rom.sw: $(obj)/rom.sw -rom.bin: $(obj)/rom.bin - -$(obj)/rom.sw: $(obj)/rom.bin - $(MKLASATIMG) -o $@ -k $^ -m $(MKLASATIMG_ARCH) - -$(obj)/rom.bin: $(obj)/rom - $(OBJCOPY) -O binary -S $^ $@ - -# Rule to make the bootloader -$(obj)/rom: $(addprefix $(obj)/,$(OBJECTS)) - $(LD) $(KBUILD_LDFLAGS) $(LDSCRIPT) -o $@ $^ - -$(obj)/%.o: $(obj)/%.gz - $(LD) -r -o $@ -b binary $< - -$(obj)/%.gz: $(obj)/%.bin - gzip -cf -9 $< > $@ - -$(obj)/kImage.bin: $(KERNEL_IMAGE) - $(OBJCOPY) -O binary -S $^ $@ - -clean: - rm -f rom rom.bin rom.sw kImage.bin kImage.o diff --git a/arch/mips/lasat/image/head.S b/arch/mips/lasat/image/head.S deleted file mode 100644 index 1a27312d4c2e..000000000000 --- a/arch/mips/lasat/image/head.S +++ /dev/null @@ -1,32 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <asm/lasat/head.h> - - .text - .section .text..start, "ax" - .set noreorder - .set mips3 - - /* Magic words identifying a software image */ - .word LASAT_K_MAGIC0_VAL - .word LASAT_K_MAGIC1_VAL - - /* Image header version */ - .word 0x00000002 - - /* image start and size */ - .word _image_start - .word _image_size - - /* start of kernel and entrypoint in uncompressed image */ - .word _kernel_start - .word _kernel_entry - - /* Here we have room for future flags */ - - .org 0x40 -reldate: - .word TIMESTAMP - - .org 0x50 -release: - .string VERSION diff --git a/arch/mips/lasat/image/romscript.normal b/arch/mips/lasat/image/romscript.normal deleted file mode 100644 index 0864c963e188..000000000000 --- a/arch/mips/lasat/image/romscript.normal +++ /dev/null @@ -1,23 +0,0 @@ -OUTPUT_ARCH(mips) - -SECTIONS -{ - .text : - { - *(.text..start) - } - - /* Data in ROM */ - - .data ALIGN(0x10) : - { - *(.data) - } - _image_start = ADDR(.data); - _image_size = SIZEOF(.data); - - .other : - { - *(.*) - } -} diff --git a/arch/mips/lasat/interrupt.c b/arch/mips/lasat/interrupt.c deleted file mode 100644 index 7965bbd0d319..000000000000 --- a/arch/mips/lasat/interrupt.c +++ /dev/null @@ -1,119 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * Routines for generic manipulation of the interrupts found on the - * Lasat boards. - */ -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/irq.h> - -#include <asm/irq_cpu.h> -#include <asm/lasat/lasat.h> -#include <asm/lasat/lasatint.h> - -#include <irq.h> - -static volatile int *lasat_int_status; -static volatile int *lasat_int_mask; -static volatile int lasat_int_mask_shift; - -void disable_lasat_irq(struct irq_data *d) -{ - unsigned int irq_nr = d->irq - LASAT_IRQ_BASE; - - *lasat_int_mask &= ~(1 << irq_nr) << lasat_int_mask_shift; -} - -void enable_lasat_irq(struct irq_data *d) -{ - unsigned int irq_nr = d->irq - LASAT_IRQ_BASE; - - *lasat_int_mask |= (1 << irq_nr) << lasat_int_mask_shift; -} - -static struct irq_chip lasat_irq_type = { - .name = "Lasat", - .irq_mask = disable_lasat_irq, - .irq_unmask = enable_lasat_irq, -}; - -static inline int ls1bit32(unsigned int x) -{ - int b = 31, s; - - s = 16; if (x << 16 == 0) s = 0; b -= s; x <<= s; - s = 8; if (x << 8 == 0) s = 0; b -= s; x <<= s; - s = 4; if (x << 4 == 0) s = 0; b -= s; x <<= s; - s = 2; if (x << 2 == 0) s = 0; b -= s; x <<= s; - s = 1; if (x << 1 == 0) s = 0; b -= s; - - return b; -} - -static unsigned long (*get_int_status)(void); - -static unsigned long get_int_status_100(void) -{ - return *lasat_int_status & *lasat_int_mask; -} - -static unsigned long get_int_status_200(void) -{ - unsigned long int_status; - - int_status = *lasat_int_status; - int_status &= (int_status >> LASATINT_MASK_SHIFT_200) & 0xffff; - return int_status; -} - -asmlinkage void plat_irq_dispatch(void) -{ - unsigned long int_status; - unsigned int cause = read_c0_cause(); - int irq; - - if (cause & CAUSEF_IP7) { /* R4000 count / compare IRQ */ - do_IRQ(7); - return; - } - - int_status = get_int_status(); - - /* if int_status == 0, then the interrupt has already been cleared */ - if (int_status) { - irq = LASAT_IRQ_BASE + ls1bit32(int_status); - - do_IRQ(irq); - } -} - -void __init arch_init_irq(void) -{ - int irq = LASAT_CASCADE_IRQ; - int i; - - if (IS_LASAT_200()) { - lasat_int_status = (void *)LASAT_INT_STATUS_REG_200; - lasat_int_mask = (void *)LASAT_INT_MASK_REG_200; - lasat_int_mask_shift = LASATINT_MASK_SHIFT_200; - get_int_status = get_int_status_200; - *lasat_int_mask &= 0xffff; - } else { - lasat_int_status = (void *)LASAT_INT_STATUS_REG_100; - lasat_int_mask = (void *)LASAT_INT_MASK_REG_100; - lasat_int_mask_shift = LASATINT_MASK_SHIFT_100; - get_int_status = get_int_status_100; - *lasat_int_mask = 0; - } - - mips_cpu_irq_init(); - - for (i = LASAT_IRQ_BASE; i <= LASAT_IRQ_END; i++) - irq_set_chip_and_handler(i, &lasat_irq_type, handle_level_irq); - - if (request_irq(irq, no_action, IRQF_NO_THREAD, "cascade", NULL)) - pr_err("Failed to request irq %d (cascade)\n", irq); -} diff --git a/arch/mips/lasat/lasat_board.c b/arch/mips/lasat/lasat_board.c deleted file mode 100644 index 80e1ba541148..000000000000 --- a/arch/mips/lasat/lasat_board.c +++ /dev/null @@ -1,268 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Thomas Horsten <thh@lasat.com> - * Copyright (C) 2000 LASAT Networks A/S. - * - * Routines specific to the LASAT boards - */ -#include <linux/types.h> -#include <linux/crc32.h> -#include <asm/lasat/lasat.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <linux/mutex.h> -#include <asm/addrspace.h> -#include "at93c.h" -/* New model description table */ -#include "lasat_models.h" - -static DEFINE_MUTEX(lasat_eeprom_mutex); - -#define EEPROM_CRC(data, len) (~crc32(~0, data, len)) - -struct lasat_info lasat_board_info; - -int EEPROMRead(unsigned int pos, unsigned char *data, int len) -{ - int i; - - for (i = 0; i < len; i++) - *data++ = at93c_read(pos++); - - return 0; -} - -int EEPROMWrite(unsigned int pos, unsigned char *data, int len) -{ - int i; - - for (i = 0; i < len; i++) - at93c_write(pos++, *data++); - - return 0; -} - -static void init_flash_sizes(void) -{ - unsigned long *lb = lasat_board_info.li_flashpart_base; - unsigned long *ls = lasat_board_info.li_flashpart_size; - int i; - - ls[LASAT_MTD_BOOTLOADER] = 0x40000; - ls[LASAT_MTD_SERVICE] = 0xC0000; - ls[LASAT_MTD_NORMAL] = 0x100000; - - if (!IS_LASAT_200()) { - lasat_board_info.li_flash_base = 0x1e000000; - - lb[LASAT_MTD_BOOTLOADER] = 0x1e400000; - - if (lasat_board_info.li_flash_size > 0x200000) { - ls[LASAT_MTD_CONFIG] = 0x100000; - ls[LASAT_MTD_FS] = 0x500000; - } - } else { - lasat_board_info.li_flash_base = 0x10000000; - - if (lasat_board_info.li_flash_size < 0x1000000) { - lb[LASAT_MTD_BOOTLOADER] = 0x10000000; - ls[LASAT_MTD_CONFIG] = 0x100000; - if (lasat_board_info.li_flash_size >= 0x400000) - ls[LASAT_MTD_FS] = - lasat_board_info.li_flash_size - 0x300000; - } - } - - for (i = 1; i < LASAT_MTD_LAST; i++) - lb[i] = lb[i-1] + ls[i-1]; -} - -int lasat_init_board_info(void) -{ - int c; - unsigned long crc; - unsigned long cfg0, cfg1; - const struct product_info *ppi; - int i_n_base_models = N_BASE_MODELS; - const char * const * i_txt_base_models = txt_base_models; - int i_n_prids = N_PRIDS; - - memset(&lasat_board_info, 0, sizeof(lasat_board_info)); - - /* First read the EEPROM info */ - EEPROMRead(0, (unsigned char *)&lasat_board_info.li_eeprom_info, - sizeof(struct lasat_eeprom_struct)); - - /* Check the CRC */ - crc = EEPROM_CRC((unsigned char *)(&lasat_board_info.li_eeprom_info), - sizeof(struct lasat_eeprom_struct) - 4); - - if (crc != lasat_board_info.li_eeprom_info.crc32) { - printk(KERN_WARNING "WARNING...\nWARNING...\nEEPROM CRC does " - "not match calculated, attempting to soldier on...\n"); - } - - if (lasat_board_info.li_eeprom_info.version != LASAT_EEPROM_VERSION) { - printk(KERN_WARNING "WARNING...\nWARNING...\nEEPROM version " - "%d, wanted version %d, attempting to soldier on...\n", - (unsigned int)lasat_board_info.li_eeprom_info.version, - LASAT_EEPROM_VERSION); - } - - cfg0 = lasat_board_info.li_eeprom_info.cfg[0]; - cfg1 = lasat_board_info.li_eeprom_info.cfg[1]; - - if (LASAT_W0_DSCTYPE(cfg0) != 1) { - printk(KERN_WARNING "WARNING...\nWARNING...\n" - "Invalid configuration read from EEPROM, attempting to " - "soldier on..."); - } - /* We have a valid configuration */ - - switch (LASAT_W0_SDRAMBANKSZ(cfg0)) { - case 0: - lasat_board_info.li_memsize = 0x0800000; - break; - case 1: - lasat_board_info.li_memsize = 0x1000000; - break; - case 2: - lasat_board_info.li_memsize = 0x2000000; - break; - case 3: - lasat_board_info.li_memsize = 0x4000000; - break; - case 4: - lasat_board_info.li_memsize = 0x8000000; - break; - default: - lasat_board_info.li_memsize = 0; - } - - switch (LASAT_W0_SDRAMBANKS(cfg0)) { - case 0: - break; - case 1: - lasat_board_info.li_memsize *= 2; - break; - default: - break; - } - - switch (LASAT_W0_BUSSPEED(cfg0)) { - case 0x0: - lasat_board_info.li_bus_hz = 60000000; - break; - case 0x1: - lasat_board_info.li_bus_hz = 66000000; - break; - case 0x2: - lasat_board_info.li_bus_hz = 66666667; - break; - case 0x3: - lasat_board_info.li_bus_hz = 80000000; - break; - case 0x4: - lasat_board_info.li_bus_hz = 83333333; - break; - case 0x5: - lasat_board_info.li_bus_hz = 100000000; - break; - } - - switch (LASAT_W0_CPUCLK(cfg0)) { - case 0x0: - lasat_board_info.li_cpu_hz = - lasat_board_info.li_bus_hz; - break; - case 0x1: - lasat_board_info.li_cpu_hz = - lasat_board_info.li_bus_hz + - (lasat_board_info.li_bus_hz >> 1); - break; - case 0x2: - lasat_board_info.li_cpu_hz = - lasat_board_info.li_bus_hz + - lasat_board_info.li_bus_hz; - break; - case 0x3: - lasat_board_info.li_cpu_hz = - lasat_board_info.li_bus_hz + - lasat_board_info.li_bus_hz + - (lasat_board_info.li_bus_hz >> 1); - break; - case 0x4: - lasat_board_info.li_cpu_hz = - lasat_board_info.li_bus_hz + - lasat_board_info.li_bus_hz + - lasat_board_info.li_bus_hz; - break; - } - - /* Flash size */ - switch (LASAT_W1_FLASHSIZE(cfg1)) { - case 0: - lasat_board_info.li_flash_size = 0x200000; - break; - case 1: - lasat_board_info.li_flash_size = 0x400000; - break; - case 2: - lasat_board_info.li_flash_size = 0x800000; - break; - case 3: - lasat_board_info.li_flash_size = 0x1000000; - break; - case 4: - lasat_board_info.li_flash_size = 0x2000000; - break; - } - - init_flash_sizes(); - - lasat_board_info.li_bmid = LASAT_W0_BMID(cfg0); - lasat_board_info.li_prid = lasat_board_info.li_eeprom_info.prid; - if (lasat_board_info.li_prid == 0xffff || lasat_board_info.li_prid == 0) - lasat_board_info.li_prid = lasat_board_info.li_bmid; - - /* Base model stuff */ - if (lasat_board_info.li_bmid > i_n_base_models) - lasat_board_info.li_bmid = i_n_base_models; - strcpy(lasat_board_info.li_bmstr, - i_txt_base_models[lasat_board_info.li_bmid]); - - /* Product ID dependent values */ - c = lasat_board_info.li_prid; - if (c >= i_n_prids) { - strcpy(lasat_board_info.li_namestr, "Unknown Model"); - strcpy(lasat_board_info.li_typestr, "Unknown Type"); - } else { - ppi = &vendor_info_table[0].vi_product_info[c]; - strcpy(lasat_board_info.li_namestr, ppi->pi_name); - if (ppi->pi_type) - strcpy(lasat_board_info.li_typestr, ppi->pi_type); - else - sprintf(lasat_board_info.li_typestr, "%d", 10 * c); - } - - return 0; -} - -void lasat_write_eeprom_info(void) -{ - unsigned long crc; - - mutex_lock(&lasat_eeprom_mutex); - - /* Generate the CRC */ - crc = EEPROM_CRC((unsigned char *)(&lasat_board_info.li_eeprom_info), - sizeof(struct lasat_eeprom_struct) - 4); - lasat_board_info.li_eeprom_info.crc32 = crc; - - /* Write the EEPROM info */ - EEPROMWrite(0, (unsigned char *)&lasat_board_info.li_eeprom_info, - sizeof(struct lasat_eeprom_struct)); - - mutex_unlock(&lasat_eeprom_mutex); -} diff --git a/arch/mips/lasat/lasat_models.h b/arch/mips/lasat/lasat_models.h deleted file mode 100644 index 474e57342484..000000000000 --- a/arch/mips/lasat/lasat_models.h +++ /dev/null @@ -1,68 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Model description tables - */ -#include <linux/kernel.h> - -struct product_info { - const char *pi_name; - const char *pi_type; -}; - -struct vendor_info { - const char *vi_name; - const struct product_info *vi_product_info; -}; - -/* - * Base models - */ -static const char * const txt_base_models[] = { - "MQ 2", "MQ Pro", "SP 25", "SP 50", "SP 100", "SP 5000", "SP 7000", - "SP 1000", "Unknown" -}; -#define N_BASE_MODELS (ARRAY_SIZE(txt_base_models) - 1) - -/* - * Eicon Networks - */ -static const char txt_en_mq[] = "Masquerade"; -static const char txt_en_sp[] = "Safepipe"; - -static const struct product_info product_info_eicon[] = { - { txt_en_mq, "II" }, /* 0 */ - { txt_en_mq, "Pro" }, /* 1 */ - { txt_en_sp, "25" }, /* 2 */ - { txt_en_sp, "50" }, /* 3 */ - { txt_en_sp, "100" }, /* 4 */ - { txt_en_sp, "5000" }, /* 5 */ - { txt_en_sp, "7000" }, /* 6 */ - { txt_en_sp, "30" }, /* 7 */ - { txt_en_sp, "5100" }, /* 8 */ - { txt_en_sp, "7100" }, /* 9 */ - { txt_en_sp, "1110" }, /* 10 */ - { txt_en_sp, "3020" }, /* 11 */ - { txt_en_sp, "3030" }, /* 12 */ - { txt_en_sp, "5020" }, /* 13 */ - { txt_en_sp, "5030" }, /* 14 */ - { txt_en_sp, "1120" }, /* 15 */ - { txt_en_sp, "1130" }, /* 16 */ - { txt_en_sp, "6010" }, /* 17 */ - { txt_en_sp, "6110" }, /* 18 */ - { txt_en_sp, "6210" }, /* 19 */ - { txt_en_sp, "1020" }, /* 20 */ - { txt_en_sp, "1040" }, /* 21 */ - { txt_en_sp, "1050" }, /* 22 */ - { txt_en_sp, "1060" }, /* 23 */ -}; - -#define N_PRIDS ARRAY_SIZE(product_info_eicon) - -/* - * The vendor table - */ -static struct vendor_info const vendor_info_table[] = { - { "Eicon Networks", product_info_eicon }, -}; - -#define N_VENDORS ARRAY_SIZE(vendor_info_table) diff --git a/arch/mips/lasat/picvue.c b/arch/mips/lasat/picvue.c deleted file mode 100644 index 08298ccf5ccf..000000000000 --- a/arch/mips/lasat/picvue.c +++ /dev/null @@ -1,242 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Picvue PVC160206 display driver - * - * Brian Murphy <brian@murphy.dk> - * - */ -#include <linux/kernel.h> -#include <linux/delay.h> -#include <asm/bootinfo.h> -#include <asm/lasat/lasat.h> -#include <linux/module.h> -#include <linux/errno.h> -#include <linux/string.h> - -#include "picvue.h" - -#define PVC_BUSY 0x80 -#define PVC_NLINES 2 -#define PVC_DISPMEM 80 -#define PVC_LINELEN PVC_DISPMEM / PVC_NLINES - -struct pvc_defs *picvue; - -static void pvc_reg_write(u32 val) -{ - *picvue->reg = val; -} - -static u32 pvc_reg_read(void) -{ - u32 tmp = *picvue->reg; - return tmp; -} - -static void pvc_write_byte(u32 data, u8 byte) -{ - data |= picvue->e; - pvc_reg_write(data); - data &= ~picvue->data_mask; - data |= byte << picvue->data_shift; - pvc_reg_write(data); - ndelay(220); - pvc_reg_write(data & ~picvue->e); - ndelay(220); -} - -static u8 pvc_read_byte(u32 data) -{ - u8 byte; - - data |= picvue->e; - pvc_reg_write(data); - ndelay(220); - byte = (pvc_reg_read() & picvue->data_mask) >> picvue->data_shift; - data &= ~picvue->e; - pvc_reg_write(data); - ndelay(220); - return byte; -} - -static u8 pvc_read_data(void) -{ - u32 data = pvc_reg_read(); - u8 byte; - data |= picvue->rw; - data &= ~picvue->rs; - pvc_reg_write(data); - ndelay(40); - byte = pvc_read_byte(data); - data |= picvue->rs; - pvc_reg_write(data); - return byte; -} - -#define TIMEOUT 1000 -static int pvc_wait(void) -{ - int i = TIMEOUT; - int err = 0; - - while ((pvc_read_data() & PVC_BUSY) && i) - i--; - if (i == 0) - err = -ETIME; - - return err; -} - -#define MODE_INST 0 -#define MODE_DATA 1 -static void pvc_write(u8 byte, int mode) -{ - u32 data = pvc_reg_read(); - data &= ~picvue->rw; - if (mode == MODE_DATA) - data |= picvue->rs; - else - data &= ~picvue->rs; - pvc_reg_write(data); - ndelay(40); - pvc_write_byte(data, byte); - if (mode == MODE_DATA) - data &= ~picvue->rs; - else - data |= picvue->rs; - pvc_reg_write(data); - pvc_wait(); -} - -void pvc_write_string(const unsigned char *str, u8 addr, int line) -{ - int i = 0; - - if (line > 0 && (PVC_NLINES > 1)) - addr += 0x40 * line; - pvc_write(0x80 | addr, MODE_INST); - - while (*str != 0 && i < PVC_LINELEN) { - pvc_write(*str++, MODE_DATA); - i++; - } -} - -void pvc_write_string_centered(const unsigned char *str, int line) -{ - int len = strlen(str); - u8 addr; - - if (len > PVC_VISIBLE_CHARS) - addr = 0; - else - addr = (PVC_VISIBLE_CHARS - strlen(str))/2; - - pvc_write_string(str, addr, line); -} - -void pvc_dump_string(const unsigned char *str) -{ - int len = strlen(str); - - pvc_write_string(str, 0, 0); - if (len > PVC_VISIBLE_CHARS) - pvc_write_string(&str[PVC_VISIBLE_CHARS], 0, 1); -} - -#define BM_SIZE 8 -#define MAX_PROGRAMMABLE_CHARS 8 -int pvc_program_cg(int charnum, u8 bitmap[BM_SIZE]) -{ - int i; - int addr; - - if (charnum > MAX_PROGRAMMABLE_CHARS) - return -ENOENT; - - addr = charnum * 8; - pvc_write(0x40 | addr, MODE_INST); - - for (i = 0; i < BM_SIZE; i++) - pvc_write(bitmap[i], MODE_DATA); - return 0; -} - -#define FUNC_SET_CMD 0x20 -#define EIGHT_BYTE (1 << 4) -#define FOUR_BYTE 0 -#define TWO_LINES (1 << 3) -#define ONE_LINE 0 -#define LARGE_FONT (1 << 2) -#define SMALL_FONT 0 - -static void pvc_funcset(u8 cmd) -{ - pvc_write(FUNC_SET_CMD | (cmd & (EIGHT_BYTE|TWO_LINES|LARGE_FONT)), - MODE_INST); -} - -#define ENTRYMODE_CMD 0x4 -#define AUTO_INC (1 << 1) -#define AUTO_DEC 0 -#define CURSOR_FOLLOWS_DISP (1 << 0) - -static void pvc_entrymode(u8 cmd) -{ - pvc_write(ENTRYMODE_CMD | (cmd & (AUTO_INC|CURSOR_FOLLOWS_DISP)), - MODE_INST); -} - -#define DISP_CNT_CMD 0x08 -#define DISP_OFF 0 -#define DISP_ON (1 << 2) -#define CUR_ON (1 << 1) -#define CUR_BLINK (1 << 0) -void pvc_dispcnt(u8 cmd) -{ - pvc_write(DISP_CNT_CMD | (cmd & (DISP_ON|CUR_ON|CUR_BLINK)), MODE_INST); -} - -#define MOVE_CMD 0x10 -#define DISPLAY (1 << 3) -#define CURSOR 0 -#define RIGHT (1 << 2) -#define LEFT 0 -void pvc_move(u8 cmd) -{ - pvc_write(MOVE_CMD | (cmd & (DISPLAY|RIGHT)), MODE_INST); -} - -#define CLEAR_CMD 0x1 -void pvc_clear(void) -{ - pvc_write(CLEAR_CMD, MODE_INST); -} - -#define HOME_CMD 0x2 -void pvc_home(void) -{ - pvc_write(HOME_CMD, MODE_INST); -} - -int pvc_init(void) -{ - u8 cmd = EIGHT_BYTE; - - if (PVC_NLINES == 2) - cmd |= (SMALL_FONT|TWO_LINES); - else - cmd |= (LARGE_FONT|ONE_LINE); - pvc_funcset(cmd); - pvc_dispcnt(DISP_ON); - pvc_entrymode(AUTO_INC); - - pvc_clear(); - pvc_write_string_centered("Display", 0); - pvc_write_string_centered("Initialized", 1); - - return 0; -} - -module_init(pvc_init); -MODULE_LICENSE("GPL"); diff --git a/arch/mips/lasat/picvue.h b/arch/mips/lasat/picvue.h deleted file mode 100644 index 161d3bf50811..000000000000 --- a/arch/mips/lasat/picvue.h +++ /dev/null @@ -1,45 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Picvue PVC160206 display driver - * - * Brian Murphy <brian.murphy@eicon.com> - * - */ -struct pvc_defs { - volatile u32 *reg; - u32 data_shift; - u32 data_mask; - u32 e; - u32 rw; - u32 rs; -}; - -extern struct pvc_defs *picvue; - -#define PVC_NLINES 2 -#define PVC_DISPMEM 80 -#define PVC_LINELEN PVC_DISPMEM / PVC_NLINES -#define PVC_VISIBLE_CHARS 16 - -void pvc_write_string(const unsigned char *str, u8 addr, int line); -void pvc_write_string_centered(const unsigned char *str, int line); -void pvc_dump_string(const unsigned char *str); - -#define BM_SIZE 8 -#define MAX_PROGRAMMABLE_CHARS 8 -int pvc_program_cg(int charnum, u8 bitmap[BM_SIZE]); - -void pvc_dispcnt(u8 cmd); -#define DISP_OFF 0 -#define DISP_ON (1 << 2) -#define CUR_ON (1 << 1) -#define CUR_BLINK (1 << 0) - -void pvc_move(u8 cmd); -#define DISPLAY (1 << 3) -#define CURSOR 0 -#define RIGHT (1 << 2) -#define LEFT 0 - -void pvc_clear(void); -void pvc_home(void); diff --git a/arch/mips/lasat/picvue_proc.c b/arch/mips/lasat/picvue_proc.c deleted file mode 100644 index 61c033494af5..000000000000 --- a/arch/mips/lasat/picvue_proc.c +++ /dev/null @@ -1,208 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Picvue PVC160206 display driver - * - * Brian Murphy <brian.murphy@eicon.com> - * - */ -#include <linux/bug.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/errno.h> - -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/interrupt.h> - -#include <linux/timer.h> -#include <linux/mutex.h> -#include <linux/uaccess.h> - -#include "picvue.h" - -static DEFINE_MUTEX(pvc_mutex); -static char pvc_lines[PVC_NLINES][PVC_LINELEN+1]; -static int pvc_linedata[PVC_NLINES]; -static char *pvc_linename[PVC_NLINES] = {"line1", "line2"}; -#define DISPLAY_DIR_NAME "display" -static int scroll_dir, scroll_interval; - -static struct timer_list timer; - -static void pvc_display(unsigned long data) -{ - int i; - - pvc_clear(); - for (i = 0; i < PVC_NLINES; i++) - pvc_write_string(pvc_lines[i], 0, i); -} - -static DECLARE_TASKLET(pvc_display_tasklet, &pvc_display, 0); - -static int pvc_line_proc_show(struct seq_file *m, void *v) -{ - int lineno = *(int *)m->private; - - if (lineno < 0 || lineno >= PVC_NLINES) { - printk(KERN_WARNING "proc_read_line: invalid lineno %d\n", lineno); - return 0; - } - - mutex_lock(&pvc_mutex); - seq_printf(m, "%s\n", pvc_lines[lineno]); - mutex_unlock(&pvc_mutex); - - return 0; -} - -static int pvc_line_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, pvc_line_proc_show, PDE_DATA(inode)); -} - -static ssize_t pvc_line_proc_write(struct file *file, const char __user *buf, - size_t count, loff_t *pos) -{ - int lineno = *(int *)PDE_DATA(file_inode(file)); - char kbuf[PVC_LINELEN]; - size_t len; - - BUG_ON(lineno < 0 || lineno >= PVC_NLINES); - - len = min(count, sizeof(kbuf) - 1); - if (copy_from_user(kbuf, buf, len)) - return -EFAULT; - kbuf[len] = '\0'; - - if (len > 0 && kbuf[len - 1] == '\n') - len--; - - mutex_lock(&pvc_mutex); - strncpy(pvc_lines[lineno], kbuf, len); - pvc_lines[lineno][len] = '\0'; - mutex_unlock(&pvc_mutex); - - tasklet_schedule(&pvc_display_tasklet); - - return count; -} - -static const struct proc_ops pvc_line_proc_ops = { - .proc_open = pvc_line_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, - .proc_write = pvc_line_proc_write, -}; - -static ssize_t pvc_scroll_proc_write(struct file *file, const char __user *buf, - size_t count, loff_t *pos) -{ - char kbuf[42]; - size_t len; - int cmd; - - len = min(count, sizeof(kbuf) - 1); - if (copy_from_user(kbuf, buf, len)) - return -EFAULT; - kbuf[len] = '\0'; - - cmd = simple_strtol(kbuf, NULL, 10); - - mutex_lock(&pvc_mutex); - if (scroll_interval != 0) - del_timer(&timer); - - if (cmd == 0) { - scroll_dir = 0; - scroll_interval = 0; - } else { - if (cmd < 0) { - scroll_dir = -1; - scroll_interval = -cmd; - } else { - scroll_dir = 1; - scroll_interval = cmd; - } - add_timer(&timer); - } - mutex_unlock(&pvc_mutex); - - return count; -} - -static int pvc_scroll_proc_show(struct seq_file *m, void *v) -{ - mutex_lock(&pvc_mutex); - seq_printf(m, "%d\n", scroll_dir * scroll_interval); - mutex_unlock(&pvc_mutex); - - return 0; -} - -static int pvc_scroll_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, pvc_scroll_proc_show, NULL); -} - -static const struct proc_ops pvc_scroll_proc_ops = { - .proc_open = pvc_scroll_proc_open, - .proc_read = seq_read, - .proc_lseek = seq_lseek, - .proc_release = single_release, - .proc_write = pvc_scroll_proc_write, -}; - -void pvc_proc_timerfunc(struct timer_list *unused) -{ - if (scroll_dir < 0) - pvc_move(DISPLAY|RIGHT); - else if (scroll_dir > 0) - pvc_move(DISPLAY|LEFT); - - timer.expires = jiffies + scroll_interval; - add_timer(&timer); -} - -static void pvc_proc_cleanup(void) -{ - remove_proc_subtree(DISPLAY_DIR_NAME, NULL); - del_timer_sync(&timer); -} - -static int __init pvc_proc_init(void) -{ - struct proc_dir_entry *dir, *proc_entry; - int i; - - dir = proc_mkdir(DISPLAY_DIR_NAME, NULL); - if (dir == NULL) - goto error; - - for (i = 0; i < PVC_NLINES; i++) { - strcpy(pvc_lines[i], ""); - pvc_linedata[i] = i; - } - for (i = 0; i < PVC_NLINES; i++) { - proc_entry = proc_create_data(pvc_linename[i], 0644, dir, - &pvc_line_proc_ops, &pvc_linedata[i]); - if (proc_entry == NULL) - goto error; - } - proc_entry = proc_create("scroll", 0644, dir, &pvc_scroll_proc_ops); - if (proc_entry == NULL) - goto error; - - timer_setup(&timer, pvc_proc_timerfunc, 0); - - return 0; -error: - pvc_proc_cleanup(); - return -ENOMEM; -} - -module_init(pvc_proc_init); -module_exit(pvc_proc_cleanup); -MODULE_LICENSE("GPL"); diff --git a/arch/mips/lasat/prom.c b/arch/mips/lasat/prom.c deleted file mode 100644 index 5ce1407de2d5..000000000000 --- a/arch/mips/lasat/prom.c +++ /dev/null @@ -1,126 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * PROM interface routines. - */ -#include <linux/types.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/ctype.h> -#include <linux/kernel.h> -#include <linux/mm.h> -#include <linux/memblock.h> -#include <linux/ioport.h> -#include <asm/bootinfo.h> -#include <asm/lasat/lasat.h> -#include <asm/cpu.h> -#include <asm/setup.h> - -#include "at93c.h" -#include <asm/lasat/eeprom.h> -#include "prom.h" - -#define RESET_VECTOR 0xbfc00000 -#define PROM_JUMP_TABLE_ENTRY(n) (*((u32 *)(RESET_VECTOR + 0x20) + n)) -#define PROM_DISPLAY_ADDR PROM_JUMP_TABLE_ENTRY(0) -#define PROM_PUTC_ADDR PROM_JUMP_TABLE_ENTRY(1) -#define PROM_MONITOR_ADDR PROM_JUMP_TABLE_ENTRY(2) - -static void null_prom_display(const char *string, int pos, int clear) -{ -} - -static void null_prom_monitor(void) -{ -} - -static void null_prom_putc(char c) -{ -} - -/* these are functions provided by the bootloader */ -static void (*__prom_putc)(char c) = null_prom_putc; - -void prom_putchar(char c) -{ - __prom_putc(c); -} - -void (*prom_display)(const char *string, int pos, int clear) = - null_prom_display; -void (*prom_monitor)(void) = null_prom_monitor; - -unsigned int lasat_ndelay_divider; - -static void setup_prom_vectors(void) -{ - u32 version = *(u32 *)(RESET_VECTOR + 0x90); - - if (version >= 307) { - prom_display = (void *)PROM_DISPLAY_ADDR; - __prom_putc = (void *)PROM_PUTC_ADDR; - prom_monitor = (void *)PROM_MONITOR_ADDR; - } - printk(KERN_DEBUG "prom vectors set up\n"); -} - -static struct at93c_defs at93c_defs[N_MACHTYPES] = { - { - .reg = (void *)AT93C_REG_100, - .rdata_reg = (void *)AT93C_RDATA_REG_100, - .rdata_shift = AT93C_RDATA_SHIFT_100, - .wdata_shift = AT93C_WDATA_SHIFT_100, - .cs = AT93C_CS_M_100, - .clk = AT93C_CLK_M_100 - }, { - .reg = (void *)AT93C_REG_200, - .rdata_reg = (void *)AT93C_RDATA_REG_200, - .rdata_shift = AT93C_RDATA_SHIFT_200, - .wdata_shift = AT93C_WDATA_SHIFT_200, - .cs = AT93C_CS_M_200, - .clk = AT93C_CLK_M_200 - }, -}; - -void __init prom_init(void) -{ - int argc = fw_arg0; - char **argv = (char **) fw_arg1; - - setup_prom_vectors(); - - if (IS_LASAT_200()) { - printk(KERN_INFO "LASAT 200 board\n"); - lasat_ndelay_divider = LASAT_200_DIVIDER; - at93c = &at93c_defs[1]; - } else { - printk(KERN_INFO "LASAT 100 board\n"); - lasat_ndelay_divider = LASAT_100_DIVIDER; - at93c = &at93c_defs[0]; - } - - lasat_init_board_info(); /* Read info from EEPROM */ - - /* Get the command line */ - if (argc > 0) { - strncpy(arcs_cmdline, argv[0], COMMAND_LINE_SIZE-1); - arcs_cmdline[COMMAND_LINE_SIZE-1] = '\0'; - } - - /* Set the I/O base address */ - set_io_port_base(KSEG1); - - /* Set memory regions */ - ioport_resource.start = 0; - ioport_resource.end = 0xffffffff; /* Wrong, fixme. */ - - add_memory_region(0, lasat_board_info.li_memsize, BOOT_MEM_RAM); -} - -void __init prom_free_prom_memory(void) -{ -} - -const char *get_system_type(void) -{ - return lasat_board_info.li_bmstr; -} diff --git a/arch/mips/lasat/prom.h b/arch/mips/lasat/prom.h deleted file mode 100644 index 3d1df853e9d3..000000000000 --- a/arch/mips/lasat/prom.h +++ /dev/null @@ -1,8 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PROM_H -#define __PROM_H - -extern void (*prom_display)(const char *string, int pos, int clear); -extern void (*prom_monitor)(void); - -#endif /* __PROM_H */ diff --git a/arch/mips/lasat/reset.c b/arch/mips/lasat/reset.c deleted file mode 100644 index 7c516ed9af15..000000000000 --- a/arch/mips/lasat/reset.c +++ /dev/null @@ -1,48 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Thomas Horsten <thh@lasat.com> - * Copyright (C) 2000 LASAT Networks A/S. - * - * Reset the LASAT board. - */ -#include <linux/kernel.h> -#include <linux/pm.h> - -#include <asm/reboot.h> -#include <asm/lasat/lasat.h> - -#include "picvue.h" -#include "prom.h" - -static void lasat_machine_restart(char *command); -static void lasat_machine_halt(void); - -/* Used to set machine to boot in service mode via /proc interface */ -int lasat_boot_to_service; - -static void lasat_machine_restart(char *command) -{ - local_irq_disable(); - - if (lasat_boot_to_service) { - *(volatile unsigned int *)0xa0000024 = 0xdeadbeef; - *(volatile unsigned int *)0xa00000fc = 0xfedeabba; - } - *lasat_misc->reset_reg = 0xbedead; - for (;;) ; -} - -static void lasat_machine_halt(void) -{ - local_irq_disable(); - - prom_monitor(); - for (;;) ; -} - -void lasat_reboot_setup(void) -{ - _machine_restart = lasat_machine_restart; - _machine_halt = lasat_machine_halt; - pm_power_off = lasat_machine_halt; -} diff --git a/arch/mips/lasat/serial.c b/arch/mips/lasat/serial.c deleted file mode 100644 index 16b242713420..000000000000 --- a/arch/mips/lasat/serial.c +++ /dev/null @@ -1,80 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Registration of Lasat UART platform device. - * - * Copyright (C) 2007 Brian Murphy <brian@murphy.dk> - */ -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/platform_device.h> -#include <linux/serial_8250.h> - -#include <asm/lasat/lasat.h> -#include <asm/lasat/serial.h> - -static struct resource lasat_serial_res[2] __initdata; - -static struct plat_serial8250_port lasat_serial8250_port[] = { - { - .iotype = UPIO_MEM, - .flags = UPF_IOREMAP | UPF_BOOT_AUTOCONF | - UPF_SKIP_TEST, - }, - {}, -}; - -static __init int lasat_uart_add(void) -{ - struct platform_device *pdev; - int retval; - - pdev = platform_device_alloc("serial8250", -1); - if (!pdev) - return -ENOMEM; - - if (!IS_LASAT_200()) { - lasat_serial_res[0].start = KSEG1ADDR(LASAT_UART_REGS_BASE_100); - lasat_serial_res[0].end = lasat_serial_res[0].start + LASAT_UART_REGS_SHIFT_100 * 8 - 1; - lasat_serial_res[0].flags = IORESOURCE_MEM; - lasat_serial_res[1].start = LASATINT_UART_100; - lasat_serial_res[1].end = LASATINT_UART_100; - lasat_serial_res[1].flags = IORESOURCE_IRQ; - - lasat_serial8250_port[0].mapbase = LASAT_UART_REGS_BASE_100; - lasat_serial8250_port[0].uartclk = LASAT_BASE_BAUD_100 * 16; - lasat_serial8250_port[0].regshift = LASAT_UART_REGS_SHIFT_100; - lasat_serial8250_port[0].irq = LASATINT_UART_100; - } else { - lasat_serial_res[0].start = KSEG1ADDR(LASAT_UART_REGS_BASE_200); - lasat_serial_res[0].end = lasat_serial_res[0].start + LASAT_UART_REGS_SHIFT_200 * 8 - 1; - lasat_serial_res[0].flags = IORESOURCE_MEM; - lasat_serial_res[1].start = LASATINT_UART_200; - lasat_serial_res[1].end = LASATINT_UART_200; - lasat_serial_res[1].flags = IORESOURCE_IRQ; - - lasat_serial8250_port[0].mapbase = LASAT_UART_REGS_BASE_200; - lasat_serial8250_port[0].uartclk = LASAT_BASE_BAUD_200 * 16; - lasat_serial8250_port[0].regshift = LASAT_UART_REGS_SHIFT_200; - lasat_serial8250_port[0].irq = LASATINT_UART_200; - } - - pdev->id = PLAT8250_DEV_PLATFORM; - pdev->dev.platform_data = lasat_serial8250_port; - - retval = platform_device_add_resources(pdev, lasat_serial_res, ARRAY_SIZE(lasat_serial_res)); - if (retval) - goto err_free_device; - - retval = platform_device_add(pdev); - if (retval) - goto err_free_device; - - return 0; - -err_free_device: - platform_device_put(pdev); - - return retval; -} -device_initcall(lasat_uart_add); diff --git a/arch/mips/lasat/setup.c b/arch/mips/lasat/setup.c deleted file mode 100644 index 0743243fd86d..000000000000 --- a/arch/mips/lasat/setup.c +++ /dev/null @@ -1,141 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999 MIPS Technologies, Inc. All rights reserved. - * - * Thomas Horsten <thh@lasat.com> - * Copyright (C) 2000 LASAT Networks A/S. - * - * Brian Murphy <brian@murphy.dk> - * - * Lasat specific setup. - */ -#include <linux/init.h> -#include <linux/sched.h> -#include <linux/pci.h> -#include <linux/interrupt.h> -#include <linux/tty.h> - -#include <asm/time.h> -#include <asm/cpu.h> -#include <asm/bootinfo.h> -#include <asm/irq.h> -#include <asm/lasat/lasat.h> -#include <asm/lasat/serial.h> - -#ifdef CONFIG_PICVUE -#include <linux/notifier.h> -#endif - -#include "ds1603.h" -#include <asm/lasat/ds1603.h> -#include <asm/lasat/picvue.h> -#include <asm/lasat/eeprom.h> - -#include "prom.h" - -int lasat_command_line; -void lasatint_init(void); - -extern void lasat_reboot_setup(void); -extern void pcisetup(void); -extern void edhac_init(void *, void *, void *); -extern void addrflt_init(void); - -struct lasat_misc lasat_misc_info[N_MACHTYPES] = { - { - .reset_reg = (void *)KSEG1ADDR(0x1c840000), - .flash_wp_reg = (void *)KSEG1ADDR(0x1c800000), 2 - }, { - .reset_reg = (void *)KSEG1ADDR(0x11080000), - .flash_wp_reg = (void *)KSEG1ADDR(0x11000000), 6 - } -}; - -struct lasat_misc *lasat_misc; - -#ifdef CONFIG_DS1603 -static struct ds_defs ds_defs[N_MACHTYPES] = { - { (void *)DS1603_REG_100, (void *)DS1603_REG_100, - DS1603_RST_100, DS1603_CLK_100, DS1603_DATA_100, - DS1603_DATA_SHIFT_100, 0, 0 }, - { (void *)DS1603_REG_200, (void *)DS1603_DATA_REG_200, - DS1603_RST_200, DS1603_CLK_200, DS1603_DATA_200, - DS1603_DATA_READ_SHIFT_200, 1, 2000 } -}; -#endif - -#ifdef CONFIG_PICVUE -#include "picvue.h" -static struct pvc_defs pvc_defs[N_MACHTYPES] = { - { (void *)PVC_REG_100, PVC_DATA_SHIFT_100, PVC_DATA_M_100, - PVC_E_100, PVC_RW_100, PVC_RS_100 }, - { (void *)PVC_REG_200, PVC_DATA_SHIFT_200, PVC_DATA_M_200, - PVC_E_200, PVC_RW_200, PVC_RS_200 } -}; -#endif - -static int lasat_panic_display(struct notifier_block *this, - unsigned long event, void *ptr) -{ -#ifdef CONFIG_PICVUE - unsigned char *string = ptr; - if (string == NULL) - string = "Kernel Panic"; - pvc_dump_string(string); -#endif - return NOTIFY_DONE; -} - -static int lasat_panic_prom_monitor(struct notifier_block *this, - unsigned long event, void *ptr) -{ - prom_monitor(); - return NOTIFY_DONE; -} - -static struct notifier_block lasat_panic_block[] = -{ - { - .notifier_call = lasat_panic_display, - .priority = INT_MAX - }, { - .notifier_call = lasat_panic_prom_monitor, - .priority = INT_MIN - } -}; - -void __init plat_time_init(void) -{ - mips_hpt_frequency = lasat_board_info.li_cpu_hz / 2; - - change_c0_status(ST0_IM, IE_IRQ0); -} - -void __init plat_mem_setup(void) -{ - int i; - int lasat_type = IS_LASAT_200() ? 1 : 0; - - lasat_misc = &lasat_misc_info[lasat_type]; -#ifdef CONFIG_PICVUE - picvue = &pvc_defs[lasat_type]; -#endif - - /* Set up panic notifier */ - for (i = 0; i < ARRAY_SIZE(lasat_panic_block); i++) - atomic_notifier_chain_register(&panic_notifier_list, - &lasat_panic_block[i]); - - lasat_reboot_setup(); - -#ifdef CONFIG_DS1603 - ds1603 = &ds_defs[lasat_type]; -#endif - -#ifdef DYNAMIC_SERIAL_INIT - serial_init(); -#endif - - pr_info("Lasat specific initialization complete\n"); -} diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c deleted file mode 100644 index 2119541a5b8b..000000000000 --- a/arch/mips/lasat/sysctl.c +++ /dev/null @@ -1,265 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Thomas Horsten <thh@lasat.com> - * Copyright (C) 2000 LASAT Networks A/S. - * - * Routines specific to the LASAT boards - */ -#include <linux/types.h> -#include <asm/lasat/lasat.h> - -#include <linux/sysctl.h> -#include <linux/stddef.h> -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/ctype.h> -#include <linux/string.h> -#include <linux/net.h> -#include <linux/inet.h> -#include <linux/uaccess.h> - -#include <asm/time.h> - -#ifdef CONFIG_DS1603 -#include "ds1603.h" -#endif - - -/* And the same for proc */ -int proc_dolasatstring(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int r; - - r = proc_dostring(table, write, buffer, lenp, ppos); - if ((!write) || r) - return r; - - lasat_write_eeprom_info(); - - return 0; -} - -#ifdef CONFIG_DS1603 -static int rtctmp; - -/* proc function to read/write RealTime Clock */ -int proc_dolasatrtc(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - struct timespec64 ts; - int r; - - if (!write) { - read_persistent_clock64(&ts); - rtctmp = ts.tv_sec; - /* check for time < 0 and set to 0 */ - if (rtctmp < 0) - rtctmp = 0; - } - r = proc_dointvec(table, write, buffer, lenp, ppos); - if (r) - return r; - - if (write) { - /* - * Due to the RTC hardware limitation, we can not actually - * use the full 64-bit range here. - */ - ts.tv_sec = rtctmp; - ts.tv_nsec = 0; - - update_persistent_clock64(ts); - } - - return 0; -} -#endif - -#ifdef CONFIG_INET -int proc_lasat_ip(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - unsigned int ip; - char *p, c; - int len; - char ipbuf[32]; - - if (!table->data || !table->maxlen || !*lenp || - (*ppos && !write)) { - *lenp = 0; - return 0; - } - - if (write) { - len = 0; - p = buffer; - while (len < *lenp) { - c = *p; - p++; - if (c == 0 || c == '\n') - break; - len++; - } - if (len >= sizeof(ipbuf)-1) - len = sizeof(ipbuf) - 1; - memcpy(ipbuf, buffer, len); - ipbuf[len] = 0; - *ppos += *lenp; - /* Now see if we can convert it to a valid IP */ - ip = in_aton(ipbuf); - *(unsigned int *)(table->data) = ip; - lasat_write_eeprom_info(); - } else { - ip = *(unsigned int *)(table->data); - sprintf(ipbuf, "%d.%d.%d.%d", - (ip) & 0xff, - (ip >> 8) & 0xff, - (ip >> 16) & 0xff, - (ip >> 24) & 0xff); - len = strlen(ipbuf); - if (len > *lenp) - len = *lenp; - if (len) - memcpy(buffer, ipbuf, len); - if (len < *lenp) { - *((char *)buffer + len) = '\n'; - len++; - } - *lenp = len; - *ppos += len; - } - - return 0; -} -#endif - -int proc_lasat_prid(struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos) -{ - int r; - - r = proc_dointvec(table, write, buffer, lenp, ppos); - if (r < 0) - return r; - if (write) { - lasat_board_info.li_eeprom_info.prid = - lasat_board_info.li_prid; - lasat_write_eeprom_info(); - lasat_init_board_info(); - } - return 0; -} - -extern int lasat_boot_to_service; - -static struct ctl_table lasat_table[] = { - { - .procname = "cpu-hz", - .data = &lasat_board_info.li_cpu_hz, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "bus-hz", - .data = &lasat_board_info.li_bus_hz, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "bmid", - .data = &lasat_board_info.li_bmid, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = proc_dointvec, - }, - { - .procname = "prid", - .data = &lasat_board_info.li_prid, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_lasat_prid, - }, -#ifdef CONFIG_INET - { - .procname = "ipaddr", - .data = &lasat_board_info.li_eeprom_info.ipaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_lasat_ip, - }, - { - .procname = "netmask", - .data = &lasat_board_info.li_eeprom_info.netmask, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_lasat_ip, - }, -#endif - { - .procname = "passwd_hash", - .data = &lasat_board_info.li_eeprom_info.passwd_hash, - .maxlen = - sizeof(lasat_board_info.li_eeprom_info.passwd_hash), - .mode = 0600, - .proc_handler = proc_dolasatstring, - }, - { - .procname = "boot-service", - .data = &lasat_boot_to_service, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, -#ifdef CONFIG_DS1603 - { - .procname = "rtc", - .data = &rtctmp, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dolasatrtc, - }, -#endif - { - .procname = "namestr", - .data = &lasat_board_info.li_namestr, - .maxlen = sizeof(lasat_board_info.li_namestr), - .mode = 0444, - .proc_handler = proc_dostring, - }, - { - .procname = "typestr", - .data = &lasat_board_info.li_typestr, - .maxlen = sizeof(lasat_board_info.li_typestr), - .mode = 0444, - .proc_handler = proc_dostring, - }, - {} -}; - -static struct ctl_table lasat_root_table[] = { - { - .procname = "lasat", - .mode = 0555, - .child = lasat_table - }, - {} -}; - -static int __init lasat_register_sysctl(void) -{ - struct ctl_table_header *lasat_table_header; - - lasat_table_header = - register_sysctl_table(lasat_root_table); - if (!lasat_table_header) { - printk(KERN_ERR "Unable to register LASAT sysctl\n"); - return -ENOMEM; - } - - return 0; -} - -arch_initcall(lasat_register_sysctl); diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S index fda7b57b826e..87fda0713b84 100644 --- a/arch/mips/lib/csum_partial.S +++ b/arch/mips/lib/csum_partial.S @@ -279,7 +279,8 @@ EXPORT_SYMBOL(csum_partial) #endif /* odd buffer alignment? */ -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_LOONGSON64) .set push .set arch=mips32r2 wsbh v1, sum @@ -732,7 +733,8 @@ EXPORT_SYMBOL(csum_partial) addu sum, v1 #endif -#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_LOONGSON64) +#if defined(CONFIG_CPU_MIPSR2) || defined(CONFIG_CPU_MIPSR5) || \ + defined(CONFIG_CPU_LOONGSON64) .set push .set arch=mips32r2 wsbh v1, sum diff --git a/arch/mips/loongson2ef/Kconfig b/arch/mips/loongson2ef/Kconfig index 595dd48e1e4d..96dc6eba4310 100644 --- a/arch/mips/loongson2ef/Kconfig +++ b/arch/mips/loongson2ef/Kconfig @@ -46,7 +46,6 @@ config LEMOTE_MACH2F select CSRC_R4K if ! MIPS_EXTERNAL_TIMER select DMA_NONCOHERENT select GENERIC_ISA_DMA_SUPPORT_BROKEN - select HAVE_CLK select FORCE_PCI select I8259 select IRQ_MIPS_CPU diff --git a/arch/mips/loongson2ef/Platform b/arch/mips/loongson2ef/Platform index 3aca42963f35..cdad3c1a9a18 100644 --- a/arch/mips/loongson2ef/Platform +++ b/arch/mips/loongson2ef/Platform @@ -26,7 +26,6 @@ endif # Loongson Machines' Support # -platform-$(CONFIG_MACH_LOONGSON2EF) += loongson2ef/ cflags-$(CONFIG_MACH_LOONGSON2EF) += -I$(srctree)/arch/mips/include/asm/mach-loongson2ef -mno-branch-likely load-$(CONFIG_LEMOTE_FULOONG2E) += 0xffffffff80100000 load-$(CONFIG_LEMOTE_MACH2F) += 0xffffffff80200000 diff --git a/arch/mips/loongson2ef/common/init.c b/arch/mips/loongson2ef/common/init.c index 45512178be77..ce3f02f75e2a 100644 --- a/arch/mips/loongson2ef/common/init.c +++ b/arch/mips/loongson2ef/common/init.c @@ -19,10 +19,10 @@ unsigned long __maybe_unused _loongson_addrwincfg_base; static void __init mips_nmi_setup(void) { void *base; - extern char except_vec_nmi; + extern char except_vec_nmi[]; base = (void *)(CAC_BASE + 0x380); - memcpy(base, &except_vec_nmi, 0x80); + memcpy(base, except_vec_nmi, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } diff --git a/arch/mips/loongson2ef/lemote-2f/clock.c b/arch/mips/loongson2ef/lemote-2f/clock.c index 414f282c8ab5..850b6b9f8f15 100644 --- a/arch/mips/loongson2ef/lemote-2f/clock.c +++ b/arch/mips/loongson2ef/lemote-2f/clock.c @@ -6,22 +6,12 @@ * License. See the file "COPYING" in the main directory of this archive * for more details. */ -#include <linux/clk.h> #include <linux/cpufreq.h> #include <linux/errno.h> #include <linux/export.h> -#include <linux/list.h> -#include <linux/mutex.h> -#include <linux/spinlock.h> -#include <asm/clock.h> #include <asm/mach-loongson2ef/loongson.h> -static LIST_HEAD(clock_list); -static DEFINE_SPINLOCK(clock_lock); -static DEFINE_MUTEX(clock_list_sem); - -/* Minimum CLK support */ enum { DC_ZERO, DC_25PT = 2, DC_37PT, DC_50PT, DC_62PT, DC_75PT, DC_87PT, DC_DISABLE, DC_RESV @@ -41,103 +31,21 @@ struct cpufreq_frequency_table loongson2_clockmod_table[] = { }; EXPORT_SYMBOL_GPL(loongson2_clockmod_table); -static struct clk cpu_clk = { - .name = "cpu_clk", - .flags = CLK_ALWAYS_ENABLED | CLK_RATE_PROPAGATES, - .rate = 800000000, -}; - -struct clk *clk_get(struct device *dev, const char *id) -{ - return &cpu_clk; -} -EXPORT_SYMBOL(clk_get); - -static void propagate_rate(struct clk *clk) -{ - struct clk *clkp; - - list_for_each_entry(clkp, &clock_list, node) { - if (likely(clkp->parent != clk)) - continue; - if (likely(clkp->ops && clkp->ops->recalc)) - clkp->ops->recalc(clkp); - if (unlikely(clkp->flags & CLK_RATE_PROPAGATES)) - propagate_rate(clkp); - } -} - -int clk_enable(struct clk *clk) -{ - return 0; -} -EXPORT_SYMBOL(clk_enable); - -void clk_disable(struct clk *clk) +int loongson2_cpu_set_rate(unsigned long rate_khz) { -} -EXPORT_SYMBOL(clk_disable); - -unsigned long clk_get_rate(struct clk *clk) -{ - if (!clk) - return 0; - - return (unsigned long)clk->rate; -} -EXPORT_SYMBOL(clk_get_rate); - -void clk_put(struct clk *clk) -{ -} -EXPORT_SYMBOL(clk_put); - -int clk_set_rate(struct clk *clk, unsigned long rate) -{ - unsigned int rate_khz = rate / 1000; struct cpufreq_frequency_table *pos; - int ret = 0; int regval; - if (likely(clk->ops && clk->ops->set_rate)) { - unsigned long flags; - - spin_lock_irqsave(&clock_lock, flags); - ret = clk->ops->set_rate(clk, rate, 0); - spin_unlock_irqrestore(&clock_lock, flags); - } - - if (unlikely(clk->flags & CLK_RATE_PROPAGATES)) - propagate_rate(clk); - cpufreq_for_each_valid_entry(pos, loongson2_clockmod_table) if (rate_khz == pos->frequency) break; if (rate_khz != pos->frequency) return -ENOTSUPP; - clk->rate = rate; - regval = readl(LOONGSON_CHIPCFG); regval = (regval & ~0x7) | (pos->driver_data - 1); writel(regval, LOONGSON_CHIPCFG); - return ret; -} -EXPORT_SYMBOL_GPL(clk_set_rate); - -long clk_round_rate(struct clk *clk, unsigned long rate) -{ - if (likely(clk->ops && clk->ops->round_rate)) { - unsigned long flags, rounded; - - spin_lock_irqsave(&clock_lock, flags); - rounded = clk->ops->round_rate(clk, rate); - spin_unlock_irqrestore(&clock_lock, flags); - - return rounded; - } - - return rate; + return 0; } -EXPORT_SYMBOL_GPL(clk_round_rate); +EXPORT_SYMBOL_GPL(loongson2_cpu_set_rate); diff --git a/arch/mips/loongson32/Platform b/arch/mips/loongson32/Platform index 7f8e342f1ef5..3b9673e7a2fa 100644 --- a/arch/mips/loongson32/Platform +++ b/arch/mips/loongson32/Platform @@ -1,4 +1,3 @@ cflags-$(CONFIG_CPU_LOONGSON32) += -march=mips32r2 -Wa,--trap -platform-$(CONFIG_MACH_LOONGSON32) += loongson32/ cflags-$(CONFIG_MACH_LOONGSON32) += -I$(srctree)/arch/mips/include/asm/mach-loongson32 load-$(CONFIG_CPU_LOONGSON32) += 0xffffffff80200000 diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig index 48b29c198acf..517f1f8e81fb 100644 --- a/arch/mips/loongson64/Kconfig +++ b/arch/mips/loongson64/Kconfig @@ -4,18 +4,12 @@ if MACH_LOONGSON64 config RS780_HPET bool "RS780/SBX00 HPET Timer" depends on MACH_LOONGSON64 + depends on BROKEN select MIPS_EXTERNAL_TIMER help This option enables the hpet timer of AMD RS780/SBX00. - If you want to enable the Loongson3 CPUFreq Driver, Please enable - this option at first, otherwise, You will get wrong system time. - - If unsure, say Yes. - - -config LOONGSON_MC146818 - bool - default n + Note: This driver is doing some dangerous hack. Please only enable + it on RS780E systems. endif # MACH_LOONGSON64 diff --git a/arch/mips/loongson64/Makefile b/arch/mips/loongson64/Makefile index b7f40b179c71..39c06f52b08f 100644 --- a/arch/mips/loongson64/Makefile +++ b/arch/mips/loongson64/Makefile @@ -2,12 +2,12 @@ # # Makefile for Loongson-3 family machines # -obj-$(CONFIG_MACH_LOONGSON64) += cop2-ex.o platform.o acpi_init.o dma.o \ +obj-$(CONFIG_MACH_LOONGSON64) += cop2-ex.o platform.o dma.o \ setup.o init.o env.o time.o reset.o \ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_NUMA) += numa.o obj-$(CONFIG_RS780_HPET) += hpet.o -obj-$(CONFIG_PCI) += pci.o -obj-$(CONFIG_LOONGSON_MC146818) += rtc.o obj-$(CONFIG_SUSPEND) += pm.o +obj-$(CONFIG_PCI_QUIRKS) += vbios_quirk.o +obj-$(CONFIG_CPU_LOONGSON3_CPUCFG_EMULATION) += cpucfg-emul.o diff --git a/arch/mips/loongson64/Platform b/arch/mips/loongson64/Platform index d5eb94c9edb4..ec42c5085905 100644 --- a/arch/mips/loongson64/Platform +++ b/arch/mips/loongson64/Platform @@ -55,6 +55,5 @@ cflags-y += $(call cc-option,-mno-loongson-mmi) # Loongson Machines' Support # -platform-$(CONFIG_MACH_LOONGSON64) += loongson64/ cflags-$(CONFIG_MACH_LOONGSON64) += -I$(srctree)/arch/mips/include/asm/mach-loongson64 -mno-branch-likely load-$(CONFIG_CPU_LOONGSON64) += 0xffffffff80200000 diff --git a/arch/mips/loongson64/acpi_init.c b/arch/mips/loongson64/acpi_init.c deleted file mode 100644 index 8d7c119ddf91..000000000000 --- a/arch/mips/loongson64/acpi_init.c +++ /dev/null @@ -1,151 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/io.h> -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/export.h> - -#define SBX00_ACPI_IO_BASE 0x800 -#define SBX00_ACPI_IO_SIZE 0x100 - -#define ACPI_PM_EVT_BLK (SBX00_ACPI_IO_BASE + 0x00) /* 4 bytes */ -#define ACPI_PM_CNT_BLK (SBX00_ACPI_IO_BASE + 0x04) /* 2 bytes */ -#define ACPI_PMA_CNT_BLK (SBX00_ACPI_IO_BASE + 0x0F) /* 1 byte */ -#define ACPI_PM_TMR_BLK (SBX00_ACPI_IO_BASE + 0x18) /* 4 bytes */ -#define ACPI_GPE0_BLK (SBX00_ACPI_IO_BASE + 0x10) /* 8 bytes */ -#define ACPI_END (SBX00_ACPI_IO_BASE + 0x80) - -#define PM_INDEX 0xCD6 -#define PM_DATA 0xCD7 -#define PM2_INDEX 0xCD0 -#define PM2_DATA 0xCD1 - -/* - * SCI interrupt need acpi space, allocate here - */ - -static int __init register_acpi_resource(void) -{ - request_region(SBX00_ACPI_IO_BASE, SBX00_ACPI_IO_SIZE, "acpi"); - return 0; -} - -static void pmio_write_index(u16 index, u8 reg, u8 value) -{ - outb(reg, index); - outb(value, index + 1); -} - -static u8 pmio_read_index(u16 index, u8 reg) -{ - outb(reg, index); - return inb(index + 1); -} - -void pm_iowrite(u8 reg, u8 value) -{ - pmio_write_index(PM_INDEX, reg, value); -} -EXPORT_SYMBOL(pm_iowrite); - -u8 pm_ioread(u8 reg) -{ - return pmio_read_index(PM_INDEX, reg); -} -EXPORT_SYMBOL(pm_ioread); - -void pm2_iowrite(u8 reg, u8 value) -{ - pmio_write_index(PM2_INDEX, reg, value); -} -EXPORT_SYMBOL(pm2_iowrite); - -u8 pm2_ioread(u8 reg) -{ - return pmio_read_index(PM2_INDEX, reg); -} -EXPORT_SYMBOL(pm2_ioread); - -static void acpi_hw_clear_status(void) -{ - u16 value; - - /* PMStatus: Clear WakeStatus/PwrBtnStatus */ - value = inw(ACPI_PM_EVT_BLK); - value |= (1 << 8 | 1 << 15); - outw(value, ACPI_PM_EVT_BLK); - - /* GPEStatus: Clear all generated events */ - outl(inl(ACPI_GPE0_BLK), ACPI_GPE0_BLK); -} - -void acpi_registers_setup(void) -{ - u32 value; - - /* PM Status Base */ - pm_iowrite(0x20, ACPI_PM_EVT_BLK & 0xff); - pm_iowrite(0x21, ACPI_PM_EVT_BLK >> 8); - - /* PM Control Base */ - pm_iowrite(0x22, ACPI_PM_CNT_BLK & 0xff); - pm_iowrite(0x23, ACPI_PM_CNT_BLK >> 8); - - /* GPM Base */ - pm_iowrite(0x28, ACPI_GPE0_BLK & 0xff); - pm_iowrite(0x29, ACPI_GPE0_BLK >> 8); - - /* ACPI End */ - pm_iowrite(0x2e, ACPI_END & 0xff); - pm_iowrite(0x2f, ACPI_END >> 8); - - /* IO Decode: When AcpiDecodeEnable set, South-Bridge uses the contents - * of the PM registers at index 0x20~0x2B to decode ACPI I/O address. */ - pm_iowrite(0x0e, 1 << 3); - - /* SCI_EN set */ - outw(1, ACPI_PM_CNT_BLK); - - /* Enable to generate SCI */ - pm_iowrite(0x10, pm_ioread(0x10) | 1); - - /* GPM3/GPM9 enable */ - value = inl(ACPI_GPE0_BLK + 4); - outl(value | (1 << 14) | (1 << 22), ACPI_GPE0_BLK + 4); - - /* Set GPM9 as input */ - pm_iowrite(0x8d, pm_ioread(0x8d) & (~(1 << 1))); - - /* Set GPM9 as non-output */ - pm_iowrite(0x94, pm_ioread(0x94) | (1 << 3)); - - /* GPM3 config ACPI trigger SCIOUT */ - pm_iowrite(0x33, pm_ioread(0x33) & (~(3 << 4))); - - /* GPM9 config ACPI trigger SCIOUT */ - pm_iowrite(0x3d, pm_ioread(0x3d) & (~(3 << 2))); - - /* GPM3 config falling edge trigger */ - pm_iowrite(0x37, pm_ioread(0x37) & (~(1 << 6))); - - /* No wait for STPGNT# in ACPI Sx state */ - pm_iowrite(0x7c, pm_ioread(0x7c) | (1 << 6)); - - /* Set GPM3 pull-down enable */ - value = pm2_ioread(0xf6); - value |= ((1 << 7) | (1 << 3)); - pm2_iowrite(0xf6, value); - - /* Set GPM9 pull-down enable */ - value = pm2_ioread(0xf8); - value |= ((1 << 5) | (1 << 1)); - pm2_iowrite(0xf8, value); -} - -int __init sbx00_acpi_init(void) -{ - register_acpi_resource(); - acpi_registers_setup(); - acpi_hw_clear_status(); - - return 0; -} diff --git a/arch/mips/loongson64/cop2-ex.c b/arch/mips/loongson64/cop2-ex.c index 9efdfe430ff0..f130f62129b8 100644 --- a/arch/mips/loongson64/cop2-ex.c +++ b/arch/mips/loongson64/cop2-ex.c @@ -14,17 +14,30 @@ #include <linux/sched.h> #include <linux/notifier.h> #include <linux/ptrace.h> +#include <linux/uaccess.h> +#include <linux/sched/signal.h> #include <asm/fpu.h> #include <asm/cop2.h> +#include <asm/inst.h> +#include <asm/branch.h> #include <asm/current.h> #include <asm/mipsregs.h> +#include <asm/unaligned-emul.h> static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, void *data) { - int fpu_owned; + unsigned int res, fpu_owned; + unsigned long ra, value, value_next; + union mips_instruction insn; int fr = !test_thread_flag(TIF_32BIT_FPREGS); + struct pt_regs *regs = (struct pt_regs *)data; + void __user *addr = (void __user *)regs->cp0_badvaddr; + unsigned int __user *pc = (unsigned int __user *)exception_epc(regs); + + ra = regs->regs[31]; + __get_user(insn.word, pc); switch (action) { case CU2_EXCEPTION: @@ -49,9 +62,284 @@ static int loongson_cu2_call(struct notifier_block *nfb, unsigned long action, preempt_enable(); return NOTIFY_STOP; /* Don't call default notifier */ + + case CU2_LWC2_OP: + if (insn.loongson3_lswc2_format.ls == 0) + goto sigbus; + + if (insn.loongson3_lswc2_format.fr == 0) { /* gslq */ + if (!access_ok(addr, 16)) + goto sigbus; + + LoadDW(addr, value, res); + if (res) + goto fault; + + LoadDW(addr + 8, value_next, res); + if (res) + goto fault; + + regs->regs[insn.loongson3_lswc2_format.rt] = value; + regs->regs[insn.loongson3_lswc2_format.rq] = value_next; + compute_return_epc(regs); + } else { /* gslqc1 */ + if (!access_ok(addr, 16)) + goto sigbus; + + lose_fpu(1); + LoadDW(addr, value, res); + if (res) + goto fault; + + LoadDW(addr + 8, value_next, res); + if (res) + goto fault; + + set_fpr64(current->thread.fpu.fpr, + insn.loongson3_lswc2_format.rt, value); + set_fpr64(current->thread.fpu.fpr, + insn.loongson3_lswc2_format.rq, value_next); + compute_return_epc(regs); + own_fpu(1); + } + return NOTIFY_STOP; /* Don't call default notifier */ + + case CU2_SWC2_OP: + if (insn.loongson3_lswc2_format.ls == 0) + goto sigbus; + + if (insn.loongson3_lswc2_format.fr == 0) { /* gssq */ + if (!access_ok(addr, 16)) + goto sigbus; + + /* write upper 8 bytes first */ + value_next = regs->regs[insn.loongson3_lswc2_format.rq]; + + StoreDW(addr + 8, value_next, res); + if (res) + goto fault; + value = regs->regs[insn.loongson3_lswc2_format.rt]; + + StoreDW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + } else { /* gssqc1 */ + if (!access_ok(addr, 16)) + goto sigbus; + + lose_fpu(1); + value_next = get_fpr64(current->thread.fpu.fpr, + insn.loongson3_lswc2_format.rq); + + StoreDW(addr + 8, value_next, res); + if (res) + goto fault; + + value = get_fpr64(current->thread.fpu.fpr, + insn.loongson3_lswc2_format.rt); + + StoreDW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + own_fpu(1); + } + return NOTIFY_STOP; /* Don't call default notifier */ + + case CU2_LDC2_OP: + switch (insn.loongson3_lsdc2_format.opcode1) { + /* + * Loongson-3 overridden ldc2 instructions. + * opcode1 instruction + * 0x1 gslhx: load 2 bytes to GPR + * 0x2 gslwx: load 4 bytes to GPR + * 0x3 gsldx: load 8 bytes to GPR + * 0x6 gslwxc1: load 4 bytes to FPR + * 0x7 gsldxc1: load 8 bytes to FPR + */ + case 0x1: + if (!access_ok(addr, 2)) + goto sigbus; + + LoadHW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + regs->regs[insn.loongson3_lsdc2_format.rt] = value; + break; + case 0x2: + if (!access_ok(addr, 4)) + goto sigbus; + + LoadW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + regs->regs[insn.loongson3_lsdc2_format.rt] = value; + break; + case 0x3: + if (!access_ok(addr, 8)) + goto sigbus; + + LoadDW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + regs->regs[insn.loongson3_lsdc2_format.rt] = value; + break; + case 0x6: + die_if_kernel("Unaligned FP access in kernel code", regs); + BUG_ON(!used_math()); + if (!access_ok(addr, 4)) + goto sigbus; + + lose_fpu(1); + LoadW(addr, value, res); + if (res) + goto fault; + + set_fpr64(current->thread.fpu.fpr, + insn.loongson3_lsdc2_format.rt, value); + compute_return_epc(regs); + own_fpu(1); + + break; + case 0x7: + die_if_kernel("Unaligned FP access in kernel code", regs); + BUG_ON(!used_math()); + if (!access_ok(addr, 8)) + goto sigbus; + + lose_fpu(1); + LoadDW(addr, value, res); + if (res) + goto fault; + + set_fpr64(current->thread.fpu.fpr, + insn.loongson3_lsdc2_format.rt, value); + compute_return_epc(regs); + own_fpu(1); + break; + + } + return NOTIFY_STOP; /* Don't call default notifier */ + + case CU2_SDC2_OP: + switch (insn.loongson3_lsdc2_format.opcode1) { + /* + * Loongson-3 overridden sdc2 instructions. + * opcode1 instruction + * 0x1 gsshx: store 2 bytes from GPR + * 0x2 gsswx: store 4 bytes from GPR + * 0x3 gssdx: store 8 bytes from GPR + * 0x6 gsswxc1: store 4 bytes from FPR + * 0x7 gssdxc1: store 8 bytes from FPR + */ + case 0x1: + if (!access_ok(addr, 2)) + goto sigbus; + + compute_return_epc(regs); + value = regs->regs[insn.loongson3_lsdc2_format.rt]; + + StoreHW(addr, value, res); + if (res) + goto fault; + + break; + case 0x2: + if (!access_ok(addr, 4)) + goto sigbus; + + compute_return_epc(regs); + value = regs->regs[insn.loongson3_lsdc2_format.rt]; + + StoreW(addr, value, res); + if (res) + goto fault; + + break; + case 0x3: + if (!access_ok(addr, 8)) + goto sigbus; + + compute_return_epc(regs); + value = regs->regs[insn.loongson3_lsdc2_format.rt]; + + StoreDW(addr, value, res); + if (res) + goto fault; + + break; + + case 0x6: + die_if_kernel("Unaligned FP access in kernel code", regs); + BUG_ON(!used_math()); + + if (!access_ok(addr, 4)) + goto sigbus; + + lose_fpu(1); + value = get_fpr64(current->thread.fpu.fpr, + insn.loongson3_lsdc2_format.rt); + + StoreW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + own_fpu(1); + + break; + case 0x7: + die_if_kernel("Unaligned FP access in kernel code", regs); + BUG_ON(!used_math()); + + if (!access_ok(addr, 8)) + goto sigbus; + + lose_fpu(1); + value = get_fpr64(current->thread.fpu.fpr, + insn.loongson3_lsdc2_format.rt); + + StoreDW(addr, value, res); + if (res) + goto fault; + + compute_return_epc(regs); + own_fpu(1); + + break; + } + return NOTIFY_STOP; /* Don't call default notifier */ } return NOTIFY_OK; /* Let default notifier send signals */ + +fault: + /* roll back jump/branch */ + regs->regs[31] = ra; + regs->cp0_epc = (unsigned long)pc; + /* Did we have an exception handler installed? */ + if (fixup_exception(regs)) + return NOTIFY_STOP; /* Don't call default notifier */ + + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGSEGV); + + return NOTIFY_STOP; /* Don't call default notifier */ + +sigbus: + die_if_kernel("Unhandled kernel unaligned access", regs); + force_sig(SIGBUS); + + return NOTIFY_STOP; /* Don't call default notifier */ } static int __init loongson_cu2_setup(void) diff --git a/arch/mips/loongson64/cpucfg-emul.c b/arch/mips/loongson64/cpucfg-emul.c new file mode 100644 index 000000000000..cd619b47ba1f --- /dev/null +++ b/arch/mips/loongson64/cpucfg-emul.c @@ -0,0 +1,227 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/smp.h> +#include <linux/types.h> +#include <asm/cpu.h> +#include <asm/cpu-info.h> +#include <asm/elf.h> + +#include <loongson_regs.h> +#include <cpucfg-emul.h> + +static bool is_loongson(struct cpuinfo_mips *c) +{ + switch (c->processor_id & PRID_COMP_MASK) { + case PRID_COMP_LEGACY: + return ((c->processor_id & PRID_IMP_MASK) == + PRID_IMP_LOONGSON_64C); + + case PRID_COMP_LOONGSON: + return true; + + default: + return false; + } +} + +static u32 get_loongson_fprev(struct cpuinfo_mips *c) +{ + return c->fpu_id & LOONGSON_FPREV_MASK; +} + +static bool cpu_has_uca(void) +{ + u32 diag = read_c0_diag(); + u32 new_diag; + + if (diag & LOONGSON_DIAG_UCAC) + /* UCA is already enabled. */ + return true; + + /* See if UCAC bit can be flipped on. This should be safe. */ + new_diag = diag | LOONGSON_DIAG_UCAC; + write_c0_diag(new_diag); + new_diag = read_c0_diag(); + write_c0_diag(diag); + + return (new_diag & LOONGSON_DIAG_UCAC) != 0; +} + +static void probe_uca(struct cpuinfo_mips *c) +{ + if (cpu_has_uca()) + c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_LSUCA; +} + +static void decode_loongson_config6(struct cpuinfo_mips *c) +{ + u32 config6 = read_c0_config6(); + + if (config6 & MIPS_CONF6_LOONGSON_SFBEN) + c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_SFBP; + if (config6 & MIPS_CONF6_LOONGSON_LLEXC) + c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_LLEXC; + if (config6 & MIPS_CONF6_LOONGSON_SCRAND) + c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_SCRAND; +} + +static void patch_cpucfg_sel1(struct cpuinfo_mips *c) +{ + u64 ases = c->ases; + u64 options = c->options; + u32 data = c->loongson3_cpucfg_data[0]; + + if (options & MIPS_CPU_FPU) { + data |= LOONGSON_CFG1_FP; + data |= get_loongson_fprev(c) << LOONGSON_CFG1_FPREV_OFFSET; + } + if (ases & MIPS_ASE_LOONGSON_MMI) + data |= LOONGSON_CFG1_MMI; + if (ases & MIPS_ASE_MSA) + data |= LOONGSON_CFG1_MSA1; + + c->loongson3_cpucfg_data[0] = data; +} + +static void patch_cpucfg_sel2(struct cpuinfo_mips *c) +{ + u64 ases = c->ases; + u64 options = c->options; + u32 data = c->loongson3_cpucfg_data[1]; + + if (ases & MIPS_ASE_LOONGSON_EXT) + data |= LOONGSON_CFG2_LEXT1; + if (ases & MIPS_ASE_LOONGSON_EXT2) + data |= LOONGSON_CFG2_LEXT2; + if (options & MIPS_CPU_LDPTE) + data |= LOONGSON_CFG2_LSPW; + + if (ases & MIPS_ASE_VZ) + data |= LOONGSON_CFG2_LVZP; + else + data &= ~LOONGSON_CFG2_LVZREV; + + c->loongson3_cpucfg_data[1] = data; +} + +static void patch_cpucfg_sel3(struct cpuinfo_mips *c) +{ + u64 ases = c->ases; + u32 data = c->loongson3_cpucfg_data[2]; + + if (ases & MIPS_ASE_LOONGSON_CAM) { + data |= LOONGSON_CFG3_LCAMP; + } else { + data &= ~LOONGSON_CFG3_LCAMREV; + data &= ~LOONGSON_CFG3_LCAMNUM; + data &= ~LOONGSON_CFG3_LCAMKW; + data &= ~LOONGSON_CFG3_LCAMVW; + } + + c->loongson3_cpucfg_data[2] = data; +} + +void loongson3_cpucfg_synthesize_data(struct cpuinfo_mips *c) +{ + /* Only engage the logic on Loongson processors. */ + if (!is_loongson(c)) + return; + + /* CPUs with CPUCFG support don't need to synthesize anything. */ + if (cpu_has_cfg()) + goto have_cpucfg_now; + + c->loongson3_cpucfg_data[0] = 0; + c->loongson3_cpucfg_data[1] = 0; + c->loongson3_cpucfg_data[2] = 0; + + /* Add CPUCFG features non-discoverable otherwise. */ + switch (c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) { + case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_0: + case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_1: + case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_2: + case PRID_IMP_LOONGSON_64R | PRID_REV_LOONGSON2K_R1_3: + decode_loongson_config6(c); + probe_uca(c); + + c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_LSLDR0 | + LOONGSON_CFG1_LSSYNCI | LOONGSON_CFG1_LLSYNC | + LOONGSON_CFG1_TGTSYNC); + c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 | + LOONGSON_CFG2_LBT2 | LOONGSON_CFG2_LPMP | + LOONGSON_CFG2_LPM_REV2); + c->loongson3_cpucfg_data[2] = 0; + break; + + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R1: + c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_LSLDR0 | + LOONGSON_CFG1_LSSYNCI | LOONGSON_CFG1_LSUCA | + LOONGSON_CFG1_LLSYNC | LOONGSON_CFG1_TGTSYNC); + c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 | + LOONGSON_CFG2_LPMP | LOONGSON_CFG2_LPM_REV1); + c->loongson3_cpucfg_data[2] |= ( + LOONGSON_CFG3_LCAM_REV1 | + LOONGSON_CFG3_LCAMNUM_REV1 | + LOONGSON_CFG3_LCAMKW_REV1 | + LOONGSON_CFG3_LCAMVW_REV1); + break; + + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3B_R1: + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3B_R2: + c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_LSLDR0 | + LOONGSON_CFG1_LSSYNCI | LOONGSON_CFG1_LSUCA | + LOONGSON_CFG1_LLSYNC | LOONGSON_CFG1_TGTSYNC); + c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 | + LOONGSON_CFG2_LPMP | LOONGSON_CFG2_LPM_REV1); + c->loongson3_cpucfg_data[2] |= ( + LOONGSON_CFG3_LCAM_REV1 | + LOONGSON_CFG3_LCAMNUM_REV1 | + LOONGSON_CFG3_LCAMKW_REV1 | + LOONGSON_CFG3_LCAMVW_REV1); + break; + + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0: + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_1: + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R3_0: + case PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R3_1: + decode_loongson_config6(c); + probe_uca(c); + + c->loongson3_cpucfg_data[0] |= (LOONGSON_CFG1_CNT64 | + LOONGSON_CFG1_LSLDR0 | LOONGSON_CFG1_LSPREF | + LOONGSON_CFG1_LSPREFX | LOONGSON_CFG1_LSSYNCI | + LOONGSON_CFG1_LLSYNC | LOONGSON_CFG1_TGTSYNC); + c->loongson3_cpucfg_data[1] |= (LOONGSON_CFG2_LBT1 | + LOONGSON_CFG2_LBT2 | LOONGSON_CFG2_LBTMMU | + LOONGSON_CFG2_LPMP | LOONGSON_CFG2_LPM_REV1 | + LOONGSON_CFG2_LVZ_REV1); + c->loongson3_cpucfg_data[2] |= (LOONGSON_CFG3_LCAM_REV1 | + LOONGSON_CFG3_LCAMNUM_REV1 | + LOONGSON_CFG3_LCAMKW_REV1 | + LOONGSON_CFG3_LCAMVW_REV1); + break; + + default: + /* It is possible that some future Loongson cores still do + * not have CPUCFG, so do not emulate anything for these + * cores. + */ + return; + } + + /* This feature is set by firmware, but all known Loongson-64 systems + * are configured this way. + */ + c->loongson3_cpucfg_data[0] |= LOONGSON_CFG1_CDMAP; + + /* Patch in dynamically probed bits. */ + patch_cpucfg_sel1(c); + patch_cpucfg_sel2(c); + patch_cpucfg_sel3(c); + +have_cpucfg_now: + /* We have usable CPUCFG now, emulated or not. + * Announce CPUCFG availability to userspace via hwcap. + */ + elf_hwcap |= HWCAP_LOONGSON_CPUCFG; +} diff --git a/arch/mips/loongson64/dma.c b/arch/mips/loongson64/dma.c index 5e86635f71db..dbfe6e82fddd 100644 --- a/arch/mips/loongson64/dma.c +++ b/arch/mips/loongson64/dma.c @@ -2,21 +2,24 @@ #include <linux/dma-direct.h> #include <linux/init.h> #include <linux/swiotlb.h> +#include <boot_param.h> dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr) { /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from * Loongson-3's 48bit address space and embed it into 40bit */ long nid = (paddr >> 44) & 0x3; - return ((nid << 44) ^ paddr) | (nid << 37); + + return ((nid << 44) ^ paddr) | (nid << node_id_offset); } phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr) { /* We extract 2bit node id (bit 44~47, only bit 44~45 used now) from * Loongson-3's 48bit address space and embed it into 40bit */ - long nid = (daddr >> 37) & 0x3; - return ((nid << 37) ^ daddr) | (nid << 44); + long nid = (daddr >> node_id_offset) & 0x3; + + return ((nid << node_id_offset) ^ daddr) | (nid << 44); } void __init plat_swiotlb_setup(void) diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c index 2554ef11170d..d11bc346bbca 100644 --- a/arch/mips/loongson64/env.c +++ b/arch/mips/loongson64/env.c @@ -14,12 +14,15 @@ * Author: Wu Zhangjin, wuzhangjin@gmail.com */ #include <linux/export.h> +#include <linux/pci_ids.h> #include <asm/bootinfo.h> #include <loongson.h> #include <boot_param.h> #include <builtin_dtbs.h> #include <workarounds.h> +#define HOST_BRIDGE_CONFIG_ADDR ((void __iomem *)TO_UNCAC(0x1a000000)) + u32 cpu_clock_freq; EXPORT_SYMBOL(cpu_clock_freq); struct efi_memory_map_loongson *loongson_memmap; @@ -43,6 +46,8 @@ void __init prom_init_env(void) struct system_loongson *esys; struct efi_cpuinfo_loongson *ecpu; struct irq_source_routing_table *eirq_source; + u32 id; + u16 vendor, device; /* firmware arguments are initialized in head.S */ boot_p = (struct boot_params *)fw_arg2; @@ -178,4 +183,19 @@ void __init prom_init_env(void) memcpy(loongson_sysconf.sensors, esys->sensors, sizeof(struct sensor_device) * loongson_sysconf.nr_sensors); pr_info("CpuClock = %u\n", cpu_clock_freq); + + /* Read the ID of PCI host bridge to detect bridge type */ + id = readl(HOST_BRIDGE_CONFIG_ADDR); + vendor = id & 0xffff; + device = (id >> 16) & 0xffff; + + if (vendor == PCI_VENDOR_ID_LOONGSON && device == 0x7a00) { + pr_info("The bridge chip is LS7A\n"); + loongson_sysconf.bridgetype = LS7A; + loongson_sysconf.early_config = ls7a_early_config; + } else { + pr_info("The bridge chip is RS780E or SR5690\n"); + loongson_sysconf.bridgetype = RS780E; + loongson_sysconf.early_config = rs780e_early_config; + } } diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index da38944471f4..59ddadace83f 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -5,6 +5,7 @@ */ #include <linux/irqchip.h> +#include <linux/logic_pio.h> #include <linux/memblock.h> #include <asm/bootinfo.h> #include <asm/traps.h> @@ -13,25 +14,41 @@ #include <asm/fw/fw.h> #include <loongson.h> +#include <boot_param.h> + +#define NODE_ID_OFFSET_ADDR ((void __iomem *)TO_UNCAC(0x1001041c)) + +u32 node_id_offset; static void __init mips_nmi_setup(void) { void *base; - extern char except_vec_nmi; + extern char except_vec_nmi[]; base = (void *)(CAC_BASE + 0x380); - memcpy(base, &except_vec_nmi, 0x80); + memcpy(base, except_vec_nmi, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } +void ls7a_early_config(void) +{ + node_id_offset = ((readl(NODE_ID_OFFSET_ADDR) >> 8) & 0x1f) + 36; +} + +void rs780e_early_config(void) +{ + node_id_offset = 37; +} + void __init prom_init(void) { fw_init_cmdline(); prom_init_env(); /* init base address of io space */ - set_io_port_base((unsigned long) - ioremap(LOONGSON_PCIIO_BASE, LOONGSON_PCIIO_SIZE)); + set_io_port_base(PCI_IOBASE); + + loongson_sysconf.early_config(); prom_init_numa_memory(); @@ -46,7 +63,45 @@ void __init prom_free_prom_memory(void) { } +static __init void reserve_pio_range(void) +{ + struct logic_pio_hwaddr *range; + + range = kzalloc(sizeof(*range), GFP_ATOMIC); + if (!range) + return; + + range->fwnode = &of_root->fwnode; + range->size = MMIO_LOWER_RESERVED; + range->hw_start = LOONGSON_PCIIO_BASE; + range->flags = LOGIC_PIO_CPU_MMIO; + + if (logic_pio_register_range(range)) { + pr_err("Failed to reserve PIO range for legacy ISA\n"); + goto free_range; + } + + if (WARN(range->io_start != 0, + "Reserved PIO range does not start from 0\n")) + goto unregister; + + /* + * i8259 would access I/O space, so mapping must be done here. + * Please remove it when all drivers can be managed by logic_pio. + */ + ioremap_page_range(PCI_IOBASE, PCI_IOBASE + MMIO_LOWER_RESERVED, + LOONGSON_PCIIO_BASE, + pgprot_device(PAGE_KERNEL)); + + return; +unregister: + logic_pio_unregister_range(range); +free_range: + kfree(range); +} + void __init arch_init_irq(void) { + reserve_pio_range(); irqchip_init(); } diff --git a/arch/mips/loongson64/pci.c b/arch/mips/loongson64/pci.c deleted file mode 100644 index e84ae20c3290..000000000000 --- a/arch/mips/loongson64/pci.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2007 Lemote, Inc. & Institute of Computing Technology - * Author: Fuxin Zhang, zhangfx@lemote.com - */ -#include <linux/pci.h> - -#include <pci.h> -#include <loongson.h> -#include <boot_param.h> - -static struct resource loongson_pci_mem_resource = { - .name = "pci memory space", - .start = LOONGSON_PCI_MEM_START, - .end = LOONGSON_PCI_MEM_END, - .flags = IORESOURCE_MEM, -}; - -static struct resource loongson_pci_io_resource = { - .name = "pci io space", - .start = LOONGSON_PCI_IO_START, - .end = IO_SPACE_LIMIT, - .flags = IORESOURCE_IO, -}; - -static struct pci_controller loongson_pci_controller = { - .pci_ops = &loongson_pci_ops, - .io_resource = &loongson_pci_io_resource, - .mem_resource = &loongson_pci_mem_resource, - .mem_offset = 0x00000000UL, - .io_offset = 0x00000000UL, -}; - - -extern int sbx00_acpi_init(void); - -static int __init pcibios_init(void) -{ - - loongson_pci_controller.io_map_base = mips_io_port_base; - loongson_pci_mem_resource.start = loongson_sysconf.pci_mem_start_addr; - loongson_pci_mem_resource.end = loongson_sysconf.pci_mem_end_addr; - - register_pci_controller(&loongson_pci_controller); - - sbx00_acpi_init(); - - return 0; -} - -arch_initcall(pcibios_init); diff --git a/arch/mips/loongson64/rtc.c b/arch/mips/loongson64/rtc.c deleted file mode 100644 index 8d7628c0f513..000000000000 --- a/arch/mips/loongson64/rtc.c +++ /dev/null @@ -1,39 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Lemote Fuloong platform support - * - * Copyright(c) 2010 Arnaud Patard <apatard@mandriva.com> - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/platform_device.h> -#include <linux/mc146818rtc.h> - -static struct resource loongson_rtc_resources[] = { - { - .start = RTC_PORT(0), - .end = RTC_PORT(1), - .flags = IORESOURCE_IO, - }, { - .start = RTC_IRQ, - .end = RTC_IRQ, - .flags = IORESOURCE_IRQ, - } -}; - -static struct platform_device loongson_rtc_device = { - .name = "rtc_cmos", - .id = -1, - .resource = loongson_rtc_resources, - .num_resources = ARRAY_SIZE(loongson_rtc_resources), -}; - - -static int __init loongson_rtc_platform_init(void) -{ - platform_device_register(&loongson_rtc_device); - return 0; -} - -device_initcall(loongson_rtc_platform_init); diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index e1fe8bbb377d..e744e1bee49e 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -15,7 +15,6 @@ #include <linux/kexec.h> #include <asm/processor.h> #include <asm/time.h> -#include <asm/clock.h> #include <asm/tlbflush.h> #include <asm/cacheflush.h> #include <loongson.h> diff --git a/arch/mips/loongson64/time.c b/arch/mips/loongson64/time.c index 1245f22cec84..91e842b58365 100644 --- a/arch/mips/loongson64/time.c +++ b/arch/mips/loongson64/time.c @@ -6,7 +6,7 @@ * Copyright (C) 2009 Lemote Inc. * Author: Wu Zhangjin, wuzhangjin@gmail.com */ -#include <asm/mc146818-time.h> + #include <asm/time.h> #include <asm/hpet.h> @@ -21,9 +21,3 @@ void __init plat_time_init(void) setup_hpet_timer(); #endif } - -void read_persistent_clock64(struct timespec64 *ts) -{ - ts->tv_sec = mc146818_get_cmos_time(); - ts->tv_nsec = 0; -} diff --git a/arch/mips/loongson64/vbios_quirk.c b/arch/mips/loongson64/vbios_quirk.c new file mode 100644 index 000000000000..9a29e94d3db1 --- /dev/null +++ b/arch/mips/loongson64/vbios_quirk.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0+ + +#include <linux/pci.h> +#include <loongson.h> + +static void pci_fixup_radeon(struct pci_dev *pdev) +{ + struct resource *res = &pdev->resource[PCI_ROM_RESOURCE]; + + if (res->start) + return; + + if (!loongson_sysconf.vgabios_addr) + return; + + pci_disable_rom(pdev); + if (res->parent) + release_resource(res); + + res->start = virt_to_phys((void *) loongson_sysconf.vgabios_addr); + res->end = res->start + 256*1024 - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | + IORESOURCE_PCI_FIXED; + + dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n", + PCI_ROM_RESOURCE, res); +} +DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, 0x9615, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon); diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c index 9701c89e7e14..587cf1d115e8 100644 --- a/arch/mips/math-emu/cp1emu.c +++ b/arch/mips/math-emu/cp1emu.c @@ -439,7 +439,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; } - /* fall through */ + fallthrough; case jr_op: /* For R6, JR already emulated in jalr_op */ if (NO_R6EMU && insn.r_format.func == jr_op) @@ -459,11 +459,11 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* fall through */ + fallthrough; case bltzl_op: if (NO_R6EMU) break; - /* fall through */ + fallthrough; case bltz_op: if ((long)regs->regs[insn.i_format.rs] < 0) *contpc = regs->cp0_epc + @@ -483,11 +483,11 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* fall through */ + fallthrough; case bgezl_op: if (NO_R6EMU) break; - /* fall through */ + fallthrough; case bgez_op: if ((long)regs->regs[insn.i_format.rs] >= 0) *contpc = regs->cp0_epc + @@ -502,12 +502,12 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, break; case jalx_op: set_isa16_mode(bit); - /* fall through */ + fallthrough; case jal_op: regs->regs[31] = regs->cp0_epc + dec_insn.pc_inc + dec_insn.next_pc_inc; - /* fall through */ + fallthrough; case j_op: *contpc = regs->cp0_epc + dec_insn.pc_inc; *contpc >>= 28; @@ -519,7 +519,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case beql_op: if (NO_R6EMU) break; - /* fall through */ + fallthrough; case beq_op: if (regs->regs[insn.i_format.rs] == regs->regs[insn.i_format.rt]) @@ -534,7 +534,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case bnel_op: if (NO_R6EMU) break; - /* fall through */ + fallthrough; case bne_op: if (regs->regs[insn.i_format.rs] != regs->regs[insn.i_format.rt]) @@ -549,7 +549,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case blezl_op: if (!insn.i_format.rt && NO_R6EMU) break; - /* fall through */ + fallthrough; case blez_op: /* @@ -587,7 +587,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, case bgtzl_op: if (!insn.i_format.rt && NO_R6EMU) break; - /* fall through */ + fallthrough; case bgtz_op: /* * Compact branches for R6 for the @@ -725,7 +725,7 @@ int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, return 1; } /* R2/R6 compatible cop1 instruction */ - /* fall through */ + fallthrough; case cop2_op: case cop1x_op: if (insn.i_format.rs == bc_op) { @@ -1217,14 +1217,14 @@ emul: case bcfl_op: if (cpu_has_mips_2_3_4_5_r) likely = 1; - /* fall through */ + fallthrough; case bcf_op: cond = !cond; break; case bctl_op: if (cpu_has_mips_2_3_4_5_r) likely = 1; - /* fall through */ + fallthrough; case bct_op: break; } diff --git a/arch/mips/math-emu/dp_add.c b/arch/mips/math-emu/dp_add.c index a8f98b8157f5..78504736be9e 100644 --- a/arch/mips/math-emu/dp_add.c +++ b/arch/mips/math-emu/dp_add.c @@ -92,8 +92,7 @@ union ieee754dp ieee754dp_add(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; diff --git a/arch/mips/math-emu/dp_div.c b/arch/mips/math-emu/dp_div.c index 2b682e930e39..ac1ecc46248d 100644 --- a/arch/mips/math-emu/dp_div.c +++ b/arch/mips/math-emu/dp_div.c @@ -91,8 +91,7 @@ union ieee754dp ieee754dp_div(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c index 3eda9ff7b491..126ec90bb4c7 100644 --- a/arch/mips/math-emu/dp_fmax.c +++ b/arch/mips/math-emu/dp_fmax.c @@ -93,8 +93,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; @@ -222,8 +221,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c index b3594a1704a7..35ded4c45989 100644 --- a/arch/mips/math-emu/dp_fmin.c +++ b/arch/mips/math-emu/dp_fmin.c @@ -93,8 +93,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; @@ -222,8 +221,7 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c index e24ef374d828..931e66f683ca 100644 --- a/arch/mips/math-emu/dp_maddf.c +++ b/arch/mips/math-emu/dp_maddf.c @@ -150,8 +150,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): if (zc == IEEE754_CLASS_INF) return ieee754dp_inf(zs); diff --git a/arch/mips/math-emu/dp_mul.c b/arch/mips/math-emu/dp_mul.c index e8a97d26472a..8a671bb7af12 100644 --- a/arch/mips/math-emu/dp_mul.c +++ b/arch/mips/math-emu/dp_mul.c @@ -89,8 +89,7 @@ union ieee754dp ieee754dp_mul(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): DPDNORMY; break; diff --git a/arch/mips/math-emu/dp_sqrt.c b/arch/mips/math-emu/dp_sqrt.c index 06be390ba79a..1ee38f8242fd 100644 --- a/arch/mips/math-emu/dp_sqrt.c +++ b/arch/mips/math-emu/dp_sqrt.c @@ -52,8 +52,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x) case IEEE754_CLASS_DNORM: DPDNORMX; - /* fall through */ - + fallthrough; case IEEE754_CLASS_NORM: if (xs) { /* sqrt(-x) = Nan */ @@ -130,7 +129,7 @@ union ieee754dp ieee754dp_sqrt(union ieee754dp x) switch (oldcsr.rm) { case FPU_CSR_RU: y.bits += 1; - /* fall through */ + fallthrough; case FPU_CSR_RN: t.bits += 1; break; diff --git a/arch/mips/math-emu/dp_sub.c b/arch/mips/math-emu/dp_sub.c index f08aecefceff..08474ad2a64e 100644 --- a/arch/mips/math-emu/dp_sub.c +++ b/arch/mips/math-emu/dp_sub.c @@ -94,8 +94,7 @@ union ieee754dp ieee754dp_sub(union ieee754dp x, union ieee754dp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): DPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): /* normalize ym,ye */ DPDNORMY; diff --git a/arch/mips/math-emu/sp_add.c b/arch/mips/math-emu/sp_add.c index 9af3ec7302fb..715cd0534301 100644 --- a/arch/mips/math-emu/sp_add.c +++ b/arch/mips/math-emu/sp_add.c @@ -92,8 +92,7 @@ union ieee754sp ieee754sp_add(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/math-emu/sp_div.c b/arch/mips/math-emu/sp_div.c index fcc285f3b48d..2bfa266fdc76 100644 --- a/arch/mips/math-emu/sp_div.c +++ b/arch/mips/math-emu/sp_div.c @@ -91,8 +91,7 @@ union ieee754sp ieee754sp_div(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/math-emu/sp_fdp.c b/arch/mips/math-emu/sp_fdp.c index 9f1456109aa8..56417497c88e 100644 --- a/arch/mips/math-emu/sp_fdp.c +++ b/arch/mips/math-emu/sp_fdp.c @@ -34,8 +34,7 @@ union ieee754sp ieee754sp_fdp(union ieee754dp x) case IEEE754_CLASS_SNAN: x = ieee754dp_nanxcpt(x); EXPLODEXDP; - /* fall through */ - + fallthrough; case IEEE754_CLASS_QNAN: y = ieee754sp_nan_fdp(xs, xm); if (!ieee754_csr.nan2008) { diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c index 4ce1d1f8b499..3fb16a1df3b8 100644 --- a/arch/mips/math-emu/sp_fmax.c +++ b/arch/mips/math-emu/sp_fmax.c @@ -93,8 +93,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; @@ -222,8 +221,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c index 7ad867fd7de2..ad2599d4a892 100644 --- a/arch/mips/math-emu/sp_fmin.c +++ b/arch/mips/math-emu/sp_fmin.c @@ -93,8 +93,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; @@ -222,8 +221,7 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c index 1b85b1a527ac..473ee222d90c 100644 --- a/arch/mips/math-emu/sp_maddf.c +++ b/arch/mips/math-emu/sp_maddf.c @@ -119,8 +119,7 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x, case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): if (zc == IEEE754_CLASS_INF) return ieee754sp_inf(zs); diff --git a/arch/mips/math-emu/sp_mul.c b/arch/mips/math-emu/sp_mul.c index ded17e28e8bc..26cfd63025e9 100644 --- a/arch/mips/math-emu/sp_mul.c +++ b/arch/mips/math-emu/sp_mul.c @@ -89,8 +89,7 @@ union ieee754sp ieee754sp_mul(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/math-emu/sp_sub.c b/arch/mips/math-emu/sp_sub.c index f3d26a1f162c..16c8e9ae63ed 100644 --- a/arch/mips/math-emu/sp_sub.c +++ b/arch/mips/math-emu/sp_sub.c @@ -94,8 +94,7 @@ union ieee754sp ieee754sp_sub(union ieee754sp x, union ieee754sp y) case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM): SPDNORMX; - /* fall through */ - + fallthrough; case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM): SPDNORMY; break; diff --git a/arch/mips/mm/Makefile b/arch/mips/mm/Makefile index 46f483e952c8..865926a37775 100644 --- a/arch/mips/mm/Makefile +++ b/arch/mips/mm/Makefile @@ -23,7 +23,7 @@ obj-y += uasm-mips.o endif obj-$(CONFIG_32BIT) += ioremap.o pgtable-32.o -obj-$(CONFIG_64BIT) += pgtable-64.o +obj-$(CONFIG_64BIT) += ioremap64.o pgtable-64.o obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_DMA_NONCOHERENT) += dma-noncoherent.o diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 36a311348739..6fb83ac7c475 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -1049,7 +1049,7 @@ static inline void rm7k_erratum31(void) "cache\t%1, 0x3000(%0)\n\t" ".set pop\n" : - : "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill)); + : "r" (addr), "i" (Index_Store_Tag_I), "i" (Fill_I)); } } @@ -1073,12 +1073,12 @@ static inline int alias_74k_erratum(struct cpuinfo_mips *c) if (rev <= PRID_REV_ENCODE_332(2, 4, 0)) present = 1; if (rev == PRID_REV_ENCODE_332(2, 4, 0)) - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + write_c0_config6(read_c0_config6() | MIPS_CONF6_MTI_SYND); break; case PRID_IMP_1074K: if (rev <= PRID_REV_ENCODE_332(1, 1, 0)) { present = 1; - write_c0_config6(read_c0_config6() | MIPS_CONF6_SYND); + write_c0_config6(read_c0_config6() | MIPS_CONF6_MTI_SYND); } break; default: @@ -1200,7 +1200,7 @@ static void probe_pcache(void) case CPU_VR4133: write_c0_config(config & ~VR41_CONF_P4K); - /* fall through */ + fallthrough; case CPU_VR4131: /* Workaround for cache instruction bug of VR4131 */ if (c->processor_id == 0x0c80U || c->processor_id == 0x0c81U || @@ -1303,7 +1303,8 @@ static void probe_pcache(void) c->dcache.linesz; c->dcache.waybit = 0; if ((c->processor_id & (PRID_IMP_MASK | PRID_REV_MASK)) >= - (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0)) + (PRID_IMP_LOONGSON_64C | PRID_REV_LOONGSON3A_R2_0) || + (c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) c->options |= MIPS_CPU_PREFETCH; break; @@ -1425,7 +1426,7 @@ static void probe_pcache(void) case CPU_74K: case CPU_1074K: has_74k_erratum = alias_74k_erratum(c); - /* Fall through. */ + fallthrough; case CPU_M14KC: case CPU_M14KEC: case CPU_24K: @@ -1449,7 +1450,7 @@ static void probe_pcache(void) c->dcache.flags |= MIPS_CACHE_PINDEX; break; } - /* fall through */ + fallthrough; default: if (has_74k_erratum || c->dcache.waysize > PAGE_SIZE) c->dcache.flags |= MIPS_CACHE_ALIASES; @@ -1629,8 +1630,13 @@ static void __init loongson3_sc_init(void) scache_size = c->scache.sets * c->scache.ways * c->scache.linesz; - /* Loongson-3 has 4 cores, 1MB scache for each. scaches are shared */ - scache_size *= 4; + + /* Loongson-3 has 4-Scache banks, while Loongson-2K have only 2 banks */ + if ((c->processor_id & PRID_IMP_MASK) == PRID_IMP_LOONGSON_64R) + scache_size *= 2; + else + scache_size *= 4; + c->scache.waybit = 0; c->scache.waysize = scache_size / c->scache.ways; pr_info("Unified secondary cache %ldkB %s, linesize %d bytes.\n", @@ -1703,9 +1709,10 @@ static void setup_scache(void) return; default: - if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | - MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R1 | - MIPS_CPU_ISA_M64R2 | MIPS_CPU_ISA_M64R6)) { + if (c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | + MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | + MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6)) { #ifdef CONFIG_MIPS_CPU_SCACHE if (mips_sc_init ()) { scache_size = c->scache.ways * c->scache.sets * c->scache.linesz; diff --git a/arch/mips/mm/cache.c b/arch/mips/mm/cache.c index 33b409391ddb..ad6df1cea866 100644 --- a/arch/mips/mm/cache.c +++ b/arch/mips/mm/cache.c @@ -36,7 +36,6 @@ EXPORT_SYMBOL_GPL(flush_icache_range); void (*local_flush_icache_range)(unsigned long start, unsigned long end); EXPORT_SYMBOL_GPL(local_flush_icache_range); void (*__flush_icache_user_range)(unsigned long start, unsigned long end); -EXPORT_SYMBOL_GPL(__flush_icache_user_range); void (*__local_flush_icache_user_range)(unsigned long start, unsigned long end); EXPORT_SYMBOL_GPL(__local_flush_icache_user_range); diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index fcea92d95d86..563c2c0d0c81 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -33,6 +33,7 @@ static inline bool cpu_needs_post_dma_flush(void) case CPU_R10000: case CPU_R12000: case CPU_BMIPS5000: + case CPU_LOONGSON2EF: return true; default: /* diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c index 79684000de0e..620ebfa45ec1 100644 --- a/arch/mips/mm/init.c +++ b/arch/mips/mm/init.c @@ -358,17 +358,23 @@ void maar_init(void) write_c0_maari(i); back_to_back_c0_hazard(); upper = read_c0_maar(); +#ifdef CONFIG_XPA + upper |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT; +#endif write_c0_maari(i + 1); back_to_back_c0_hazard(); lower = read_c0_maar(); +#ifdef CONFIG_XPA + lower |= (phys_addr_t)readx_c0_maar() << MIPS_MAARX_ADDR_SHIFT; +#endif attr = lower & upper; lower = (lower & MIPS_MAAR_ADDR) << 4; upper = ((upper & MIPS_MAAR_ADDR) << 4) | 0xffff; pr_info(" [%d]: ", i / 2); - if (!(attr & MIPS_MAAR_VL)) { + if ((attr & MIPS_MAAR_V) != MIPS_MAAR_V) { pr_cont("disabled\n"); continue; } diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index 8317f337a86e..b6dad2fd5575 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -14,94 +14,13 @@ #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm_types.h> +#include <linux/io.h> #include <asm/cacheflush.h> -#include <asm/io.h> #include <asm/tlbflush.h> +#include <ioremap.h> -static inline void remap_area_pte(pte_t * pte, unsigned long address, - phys_addr_t size, phys_addr_t phys_addr, unsigned long flags) -{ - phys_addr_t end; - unsigned long pfn; - pgprot_t pgprot = __pgprot(_PAGE_GLOBAL | _PAGE_PRESENT | __READABLE - | __WRITEABLE | flags); - - address &= ~PMD_MASK; - end = address + size; - if (end > PMD_SIZE) - end = PMD_SIZE; - BUG_ON(address >= end); - pfn = phys_addr >> PAGE_SHIFT; - do { - if (!pte_none(*pte)) { - printk("remap_area_pte: page already exists\n"); - BUG(); - } - set_pte(pte, pfn_pte(pfn, pgprot)); - address += PAGE_SIZE; - pfn++; - pte++; - } while (address && (address < end)); -} - -static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, - phys_addr_t size, phys_addr_t phys_addr, unsigned long flags) -{ - phys_addr_t end; - - address &= ~PGDIR_MASK; - end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - phys_addr -= address; - BUG_ON(address >= end); - do { - pte_t * pte = pte_alloc_kernel(pmd, address); - if (!pte) - return -ENOMEM; - remap_area_pte(pte, address, end - address, address + phys_addr, flags); - address = (address + PMD_SIZE) & PMD_MASK; - pmd++; - } while (address && (address < end)); - return 0; -} - -static int remap_area_pages(unsigned long address, phys_addr_t phys_addr, - phys_addr_t size, unsigned long flags) -{ - int error; - pgd_t * dir; - unsigned long end = address + size; - - phys_addr -= address; - dir = pgd_offset(&init_mm, address); - flush_cache_all(); - BUG_ON(address >= end); - do { - p4d_t *p4d; - pud_t *pud; - pmd_t *pmd; - - error = -ENOMEM; - p4d = p4d_alloc(&init_mm, dir, address); - if (!p4d) - break; - pud = pud_alloc(&init_mm, p4d, address); - if (!pud) - break; - pmd = pmd_alloc(&init_mm, pud, address); - if (!pmd) - break; - if (remap_area_pmd(pmd, address, end - address, - phys_addr + address, flags)) - break; - error = 0; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); - flush_tlb_all(); - return error; -} +#define IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL)) +#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1) static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, void *arg) @@ -118,27 +37,25 @@ static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, } /* - * Generic mapping function (not visible outside): - */ - -/* - * Remap an arbitrary physical address space into the kernel virtual - * address space. Needed when the kernel wants to access high addresses - * directly. + * ioremap_prot - map bus memory into CPU space + * @phys_addr: bus address of the memory + * @size: size of the resource to map * - * NOTE! We need to allow non-page-aligned mappings too: we will obviously - * have to convert them into an offset in a page-aligned mapping, but the - * caller shouldn't need to know that small detail. + * ioremap_prot gives the caller control over cache coherency attributes (CCA) */ - -#define IS_LOW512(addr) (!((phys_addr_t)(addr) & (phys_addr_t) ~0x1fffffffULL)) - -void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long flags) +void __iomem *ioremap_prot(phys_addr_t phys_addr, unsigned long size, + unsigned long prot_val) { + unsigned long flags = prot_val & _CACHE_MASK; unsigned long offset, pfn, last_pfn; - struct vm_struct * area; + struct vm_struct *area; phys_addr_t last_addr; - void * addr; + unsigned long vaddr; + void __iomem *cpu_addr; + + cpu_addr = plat_ioremap(phys_addr, size, flags); + if (cpu_addr) + return cpu_addr; phys_addr = fixup_bigphys_addr(phys_addr, size); @@ -181,30 +98,22 @@ void __iomem * __ioremap(phys_addr_t phys_addr, phys_addr_t size, unsigned long area = get_vm_area(size, VM_IOREMAP); if (!area) return NULL; - addr = area->addr; - if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) { - vunmap(addr); + vaddr = (unsigned long)area->addr; + + flags |= _PAGE_GLOBAL | _PAGE_PRESENT | __READABLE | __WRITEABLE; + if (ioremap_page_range(vaddr, vaddr + size, phys_addr, + __pgprot(flags))) { + free_vm_area(area); return NULL; } - return (void __iomem *) (offset + (char *)addr); + return (void __iomem *)(vaddr + offset); } +EXPORT_SYMBOL(ioremap_prot); -#define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1) - -void __iounmap(const volatile void __iomem *addr) +void iounmap(const volatile void __iomem *addr) { - struct vm_struct *p; - - if (IS_KSEG1(addr)) - return; - - p = remove_vm_area((void *) (PAGE_MASK & (unsigned long __force) addr)); - if (!p) - printk(KERN_ERR "iounmap: bad address %p\n", addr); - - kfree(p); + if (!plat_iounmap(addr) && !IS_KSEG1(addr)) + vunmap((void *)((unsigned long)addr & PAGE_MASK)); } - -EXPORT_SYMBOL(__ioremap); -EXPORT_SYMBOL(__iounmap); +EXPORT_SYMBOL(iounmap); diff --git a/arch/mips/mm/ioremap64.c b/arch/mips/mm/ioremap64.c new file mode 100644 index 000000000000..15e7820d6a5f --- /dev/null +++ b/arch/mips/mm/ioremap64.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/io.h> +#include <ioremap.h> + +void __iomem *ioremap_prot(phys_addr_t offset, unsigned long size, + unsigned long prot_val) +{ + unsigned long flags = prot_val & _CACHE_MASK; + u64 base = (flags == _CACHE_UNCACHED ? IO_BASE : UNCAC_BASE); + void __iomem *addr; + + addr = plat_ioremap(offset, size, flags); + if (!addr) + addr = (void __iomem *)(unsigned long)(base + offset); + return addr; +} +EXPORT_SYMBOL(ioremap_prot); + +void iounmap(const volatile void __iomem *addr) +{ + plat_iounmap(addr); +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/mips/mm/sc-mips.c b/arch/mips/mm/sc-mips.c index dbdbfe5d8408..eedad47df24f 100644 --- a/arch/mips/mm/sc-mips.c +++ b/arch/mips/mm/sc-mips.c @@ -194,9 +194,10 @@ static inline int __init mips_sc_probe(void) return mips_sc_probe_cm3(); /* Ignore anything but MIPSxx processors */ - if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M32R2 | - MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R1 | - MIPS_CPU_ISA_M64R2 | MIPS_CPU_ISA_M64R6))) + if (!(c->isa_level & (MIPS_CPU_ISA_M32R1 | MIPS_CPU_ISA_M64R1 | + MIPS_CPU_ISA_M32R2 | MIPS_CPU_ISA_M64R2 | + MIPS_CPU_ISA_M32R5 | MIPS_CPU_ISA_M64R5 | + MIPS_CPU_ISA_M32R6 | MIPS_CPU_ISA_M64R6))) return 0; /* Does this MIPS32/MIPS64 CPU have a config2 register? */ diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c index da407cdc2135..38c204204529 100644 --- a/arch/mips/mm/tlbex.c +++ b/arch/mips/mm/tlbex.c @@ -576,7 +576,7 @@ void build_tlb_write_entry(u32 **p, struct uasm_label **l, case CPU_R5500: if (m4kc_tlbp_war()) uasm_i_nop(p); - /* fall through */ + fallthrough; case CPU_ALCHEMY: tlbw(p); break; diff --git a/arch/mips/mti-malta/Platform b/arch/mips/mti-malta/Platform index 2cc72c9b38e3..41e0d2a2d325 100644 --- a/arch/mips/mti-malta/Platform +++ b/arch/mips/mti-malta/Platform @@ -1,7 +1,6 @@ # # MIPS Malta board # -platform-$(CONFIG_MIPS_MALTA) += mti-malta/ cflags-$(CONFIG_MIPS_MALTA) += -I$(srctree)/arch/mips/include/asm/mach-malta ifdef CONFIG_KVM_GUEST load-$(CONFIG_MIPS_MALTA) += 0x0000000040100000 diff --git a/arch/mips/mti-malta/malta-init.c b/arch/mips/mti-malta/malta-init.c index ff2c1d809538..893af377aacc 100644 --- a/arch/mips/mti-malta/malta-init.c +++ b/arch/mips/mti-malta/malta-init.c @@ -90,24 +90,24 @@ static void __init console_config(void) static void __init mips_nmi_setup(void) { void *base; - extern char except_vec_nmi; + extern char except_vec_nmi[]; base = cpu_has_veic ? (void *)(CAC_BASE + 0xa80) : (void *)(CAC_BASE + 0x380); - memcpy(base, &except_vec_nmi, 0x80); + memcpy(base, except_vec_nmi, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } static void __init mips_ejtag_setup(void) { void *base; - extern char except_vec_ejtag_debug; + extern char except_vec_ejtag_debug[]; base = cpu_has_veic ? (void *)(CAC_BASE + 0xa00) : (void *)(CAC_BASE + 0x300); - memcpy(base, &except_vec_ejtag_debug, 0x80); + memcpy(base, except_vec_ejtag_debug, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } diff --git a/arch/mips/netlogic/Platform b/arch/mips/netlogic/Platform index fb8eb4c0c6ec..4195a097f5f2 100644 --- a/arch/mips/netlogic/Platform +++ b/arch/mips/netlogic/Platform @@ -13,5 +13,4 @@ cflags-$(CONFIG_CPU_XLP) += $(call cc-option,-march=xlp,-march=mips64r2) # # NETLOGIC processor support # -platform-$(CONFIG_NLM_COMMON) += netlogic/ load-$(CONFIG_NLM_COMMON) += 0xffffffff80100000 diff --git a/arch/mips/netlogic/xlr/fmn.c b/arch/mips/netlogic/xlr/fmn.c index d7db1533889a..f90303f31967 100644 --- a/arch/mips/netlogic/xlr/fmn.c +++ b/arch/mips/netlogic/xlr/fmn.c @@ -103,7 +103,7 @@ static irqreturn_t fmn_message_handler(int irq, void *data) mflags = nlm_cop2_enable_irqsave(); } } - }; + } /* Enable message ring intr, to any thread in core */ nlm_fmn_setup_intr(irq, (1 << nlm_threads_per_core) - 1); nlm_cop2_disable_irqrestore(mflags); diff --git a/arch/mips/oprofile/common.c b/arch/mips/oprofile/common.c index 03db268cba5c..d3996c4c6440 100644 --- a/arch/mips/oprofile/common.c +++ b/arch/mips/oprofile/common.c @@ -110,7 +110,7 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) case CPU_LOONGSON64: lmodel = &op_model_loongson3_ops; break; - }; + } /* * Always set the backtrace. This allows unsupported CPU types to still diff --git a/arch/mips/oprofile/op_model_mipsxx.c b/arch/mips/oprofile/op_model_mipsxx.c index a537bf98912c..1493c49ca47a 100644 --- a/arch/mips/oprofile/op_model_mipsxx.c +++ b/arch/mips/oprofile/op_model_mipsxx.c @@ -172,15 +172,15 @@ static void mipsxx_cpu_setup(void *args) case 4: w_c0_perfctrl3(0); w_c0_perfcntr3(reg.counter[3]); - /* fall through */ + fallthrough; case 3: w_c0_perfctrl2(0); w_c0_perfcntr2(reg.counter[2]); - /* fall through */ + fallthrough; case 2: w_c0_perfctrl1(0); w_c0_perfcntr1(reg.counter[1]); - /* fall through */ + fallthrough; case 1: w_c0_perfctrl0(0); w_c0_perfcntr0(reg.counter[0]); @@ -198,13 +198,13 @@ static void mipsxx_cpu_start(void *args) switch (counters) { case 4: w_c0_perfctrl3(WHAT | reg.control[3]); - /* fall through */ + fallthrough; case 3: w_c0_perfctrl2(WHAT | reg.control[2]); - /* fall through */ + fallthrough; case 2: w_c0_perfctrl1(WHAT | reg.control[1]); - /* fall through */ + fallthrough; case 1: w_c0_perfctrl0(WHAT | reg.control[0]); } @@ -221,13 +221,13 @@ static void mipsxx_cpu_stop(void *args) switch (counters) { case 4: w_c0_perfctrl3(0); - /* fall through */ + fallthrough; case 3: w_c0_perfctrl2(0); - /* fall through */ + fallthrough; case 2: w_c0_perfctrl1(0); - /* fall through */ + fallthrough; case 1: w_c0_perfctrl0(0); } @@ -245,7 +245,7 @@ static int mipsxx_perfcount_handler(void) switch (counters) { #define HANDLE_COUNTER(n) \ - /* fall through */ \ + fallthrough; \ case n + 1: \ control = r_c0_perfctrl ## n(); \ counter = r_c0_perfcntr ## n(); \ @@ -307,15 +307,15 @@ static void reset_counters(void *arg) case 4: w_c0_perfctrl3(0); w_c0_perfcntr3(0); - /* fall through */ + fallthrough; case 3: w_c0_perfctrl2(0); w_c0_perfcntr2(0); - /* fall through */ + fallthrough; case 2: w_c0_perfctrl1(0); w_c0_perfcntr1(0); - /* fall through */ + fallthrough; case 1: w_c0_perfctrl0(0); w_c0_perfcntr0(0); diff --git a/arch/mips/paravirt/Platform b/arch/mips/paravirt/Platform index 7e76ef25ea17..0b857580dfdd 100644 --- a/arch/mips/paravirt/Platform +++ b/arch/mips/paravirt/Platform @@ -1,7 +1,6 @@ # # Generic para-virtualized guest. # -platform-$(CONFIG_MIPS_PARAVIRT) += paravirt/ cflags-$(CONFIG_MIPS_PARAVIRT) += \ -I$(srctree)/arch/mips/include/asm/mach-paravirt diff --git a/arch/mips/pci/Makefile b/arch/mips/pci/Makefile index 342ce10ef593..0f68d6849978 100644 --- a/arch/mips/pci/Makefile +++ b/arch/mips/pci/Makefile @@ -13,10 +13,8 @@ obj-$(CONFIG_PCI_DRIVERS_GENERIC)+= pci-generic.o obj-$(CONFIG_MIPS_BONITO64) += ops-bonito64.o obj-$(CONFIG_PCI_GT64XXX_PCI0) += ops-gt64xxx_pci0.o obj-$(CONFIG_MIPS_MSC) += ops-msc.o -obj-$(CONFIG_MIPS_NILE4) += ops-nile4.o obj-$(CONFIG_SOC_TX3927) += ops-tx3927.o obj-$(CONFIG_PCI_VR41XX) += ops-vr41xx.o pci-vr41xx.o -obj-$(CONFIG_NEC_MARKEINS) += ops-emma2rh.o pci-emma2rh.o fixup-emma2rh.o obj-$(CONFIG_PCI_TX4927) += ops-tx4927.o obj-$(CONFIG_BCM47XX) += pci-bcm47xx.o obj-$(CONFIG_BCM63XX) += pci-bcm63xx.o fixup-bcm63xx.o \ @@ -31,15 +29,10 @@ obj-$(CONFIG_PCI_XTALK_BRIDGE) += pci-xtalk-bridge.o # These are still pretty much in the old state, watch, go blind. # obj-$(CONFIG_ATH79) += fixup-ath79.o -obj-$(CONFIG_LASAT) += pci-lasat.o obj-$(CONFIG_MIPS_COBALT) += fixup-cobalt.o obj-$(CONFIG_LEMOTE_FULOONG2E) += fixup-fuloong2e.o ops-loongson2.o obj-$(CONFIG_LEMOTE_MACH2F) += fixup-lemote2f.o ops-loongson2.o -obj-$(CONFIG_MACH_LOONGSON64) += fixup-loongson3.o ops-loongson3.o obj-$(CONFIG_MIPS_MALTA) += fixup-malta.o pci-malta.o -obj-$(CONFIG_PMC_MSP7120_GW) += fixup-pmcmsp.o ops-pmcmsp.o -obj-$(CONFIG_PMC_MSP7120_EVAL) += fixup-pmcmsp.o ops-pmcmsp.o -obj-$(CONFIG_PMC_MSP7120_FPGA) += fixup-pmcmsp.o ops-pmcmsp.o obj-$(CONFIG_SGI_IP27) += pci-ip27.o obj-$(CONFIG_SGI_IP32) += fixup-ip32.o ops-mace.o pci-ip32.o obj-$(CONFIG_SIBYTE_SB1250) += fixup-sb1250.o pci-sb1250.o diff --git a/arch/mips/pci/fixup-emma2rh.c b/arch/mips/pci/fixup-emma2rh.c deleted file mode 100644 index 2541f9bc12de..000000000000 --- a/arch/mips/pci/fixup-emma2rh.c +++ /dev/null @@ -1,84 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/ddb5xxx/ddb5477/pci.c - * - * Copyright 2001 MontaVista Software Inc. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/pci.h> - -#include <asm/bootinfo.h> - -#include <asm/emma/emma2rh.h> - -#define EMMA2RH_PCI_HOST_SLOT 0x09 -#define EMMA2RH_USB_SLOT 0x03 -#define PCI_DEVICE_ID_NEC_EMMA2RH 0x014b /* EMMA2RH PCI Host */ - -/* - * we fix up irqs based on the slot number. - * The first entry is at AD:11. - * Fortunately this works because, although we have two pci buses, - * they all have different slot numbers (except for rockhopper slot 20 - * which is handled below). - * - */ - -#define MAX_SLOT_NUM 10 -static unsigned char irq_map[][5] = { - [3] = {0, MARKEINS_PCI_IRQ_INTB, MARKEINS_PCI_IRQ_INTC, - MARKEINS_PCI_IRQ_INTD, 0,}, - [4] = {0, MARKEINS_PCI_IRQ_INTA, 0, 0, 0,}, - [5] = {0, 0, 0, 0, 0,}, - [6] = {0, MARKEINS_PCI_IRQ_INTC, MARKEINS_PCI_IRQ_INTD, - MARKEINS_PCI_IRQ_INTA, MARKEINS_PCI_IRQ_INTB,}, -}; - -static void nec_usb_controller_fixup(struct pci_dev *dev) -{ - if (PCI_SLOT(dev->devfn) == EMMA2RH_USB_SLOT) - /* on board USB controller configuration */ - pci_write_config_dword(dev, 0xe4, 1 << 5); -} - -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_USB, - nec_usb_controller_fixup); - -/* - * Prevent the PCI layer from seeing the resources allocated to this device - * if it is the host bridge by marking it as such. These resources are of - * no consequence to the PCI layer (they are handled elsewhere). - */ -static void emma2rh_pci_host_fixup(struct pci_dev *dev) -{ - int i; - - if (PCI_SLOT(dev->devfn) == EMMA2RH_PCI_HOST_SLOT) { - dev->class &= 0xff; - dev->class |= PCI_CLASS_BRIDGE_HOST << 8; - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - dev->resource[i].start = 0; - dev->resource[i].end = 0; - dev->resource[i].flags = 0; - } - } -} - -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NEC, PCI_DEVICE_ID_NEC_EMMA2RH, - emma2rh_pci_host_fixup); - -int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - return irq_map[slot][pin]; -} - -/* Do platform specific device initialization at pci_enable_device() time */ -int pcibios_plat_dev_init(struct pci_dev *dev) -{ - return 0; -} diff --git a/arch/mips/pci/fixup-loongson3.c b/arch/mips/pci/fixup-loongson3.c deleted file mode 100644 index 8a741c2c6685..000000000000 --- a/arch/mips/pci/fixup-loongson3.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * fixup-loongson3.c - * - * Copyright (C) 2012 Lemote, Inc. - * Author: Xiang Yu, xiangy@lemote.com - * Chen Huacai, chenhc@lemote.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - */ - -#include <linux/pci.h> -#include <boot_param.h> - -static void print_fixup_info(const struct pci_dev *pdev) -{ - dev_info(&pdev->dev, "Device %x:%x, irq %d\n", - pdev->vendor, pdev->device, pdev->irq); -} - -int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - print_fixup_info(dev); - return dev->irq; -} - -static void pci_fixup_radeon(struct pci_dev *pdev) -{ - struct resource *res = &pdev->resource[PCI_ROM_RESOURCE]; - - if (res->start) - return; - - if (!loongson_sysconf.vgabios_addr) - return; - - pci_disable_rom(pdev); - if (res->parent) - release_resource(res); - - res->start = virt_to_phys((void *) loongson_sysconf.vgabios_addr); - res->end = res->start + 256*1024 - 1; - res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | - IORESOURCE_PCI_FIXED; - - dev_info(&pdev->dev, "BAR %d: assigned %pR for Radeon ROM\n", - PCI_ROM_RESOURCE, res); -} - -DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_ATI, PCI_ANY_ID, - PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_radeon); - -/* Do platform specific device initialization at pci_enable_device() time */ -int pcibios_plat_dev_init(struct pci_dev *dev) -{ - return 0; -} diff --git a/arch/mips/pci/fixup-pmcmsp.c b/arch/mips/pci/fixup-pmcmsp.c deleted file mode 100644 index 4ad2ef02087b..000000000000 --- a/arch/mips/pci/fixup-pmcmsp.c +++ /dev/null @@ -1,216 +0,0 @@ -/* - * PMC-Sierra MSP board specific pci fixups. - * - * Copyright 2001 MontaVista Software Inc. - * Copyright 2005-2007 PMC-Sierra, Inc - * - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#ifdef CONFIG_PCI - -#include <linux/types.h> -#include <linux/pci.h> -#include <linux/kernel.h> -#include <linux/init.h> - -#include <asm/byteorder.h> - -#include <msp_pci.h> -#include <msp_cic_int.h> - -/* PCI interrupt pins */ -#define IRQ4 MSP_INT_EXT4 -#define IRQ5 MSP_INT_EXT5 -#define IRQ6 MSP_INT_EXT6 - -#if defined(CONFIG_PMC_MSP7120_GW) -/* Garibaldi Board IRQ wiring to PCI slots */ -static char irq_tab[][5] = { - /* INTA INTB INTC INTD */ - {0, 0, 0, 0, 0 }, /* (AD[0]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[1]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[2]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[3]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[4]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[5]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[6]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[7]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[8]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[9]): Unused */ - {0, 0, 0, 0, 0 }, /* 0 (AD[10]): Unused */ - {0, 0, 0, 0, 0 }, /* 1 (AD[11]): Unused */ - {0, 0, 0, 0, 0 }, /* 2 (AD[12]): Unused */ - {0, 0, 0, 0, 0 }, /* 3 (AD[13]): Unused */ - {0, 0, 0, 0, 0 }, /* 4 (AD[14]): Unused */ - {0, 0, 0, 0, 0 }, /* 5 (AD[15]): Unused */ - {0, 0, 0, 0, 0 }, /* 6 (AD[16]): Unused */ - {0, 0, 0, 0, 0 }, /* 7 (AD[17]): Unused */ - {0, 0, 0, 0, 0 }, /* 8 (AD[18]): Unused */ - {0, 0, 0, 0, 0 }, /* 9 (AD[19]): Unused */ - {0, 0, 0, 0, 0 }, /* 10 (AD[20]): Unused */ - {0, 0, 0, 0, 0 }, /* 11 (AD[21]): Unused */ - {0, 0, 0, 0, 0 }, /* 12 (AD[22]): Unused */ - {0, 0, 0, 0, 0 }, /* 13 (AD[23]): Unused */ - {0, 0, 0, 0, 0 }, /* 14 (AD[24]): Unused */ - {0, 0, 0, 0, 0 }, /* 15 (AD[25]): Unused */ - {0, 0, 0, 0, 0 }, /* 16 (AD[26]): Unused */ - {0, 0, 0, 0, 0 }, /* 17 (AD[27]): Unused */ - {0, IRQ4, IRQ4, 0, 0 }, /* 18 (AD[28]): slot 0 */ - {0, 0, 0, 0, 0 }, /* 19 (AD[29]): Unused */ - {0, IRQ5, IRQ5, 0, 0 }, /* 20 (AD[30]): slot 1 */ - {0, IRQ6, IRQ6, 0, 0 } /* 21 (AD[31]): slot 2 */ -}; - -#elif defined(CONFIG_PMC_MSP7120_EVAL) - -/* MSP7120 Eval Board IRQ wiring to PCI slots */ -static char irq_tab[][5] = { - /* INTA INTB INTC INTD */ - {0, 0, 0, 0, 0 }, /* (AD[0]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[1]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[2]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[3]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[4]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[5]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[6]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[7]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[8]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[9]): Unused */ - {0, 0, 0, 0, 0 }, /* 0 (AD[10]): Unused */ - {0, 0, 0, 0, 0 }, /* 1 (AD[11]): Unused */ - {0, 0, 0, 0, 0 }, /* 2 (AD[12]): Unused */ - {0, 0, 0, 0, 0 }, /* 3 (AD[13]): Unused */ - {0, 0, 0, 0, 0 }, /* 4 (AD[14]): Unused */ - {0, 0, 0, 0, 0 }, /* 5 (AD[15]): Unused */ - {0, IRQ6, IRQ6, 0, 0 }, /* 6 (AD[16]): slot 3 (mini) */ - {0, IRQ5, IRQ5, 0, 0 }, /* 7 (AD[17]): slot 2 (mini) */ - {0, IRQ4, IRQ4, IRQ4, IRQ4}, /* 8 (AD[18]): slot 0 (PCI) */ - {0, IRQ5, IRQ5, IRQ5, IRQ5}, /* 9 (AD[19]): slot 1 (PCI) */ - {0, 0, 0, 0, 0 }, /* 10 (AD[20]): Unused */ - {0, 0, 0, 0, 0 }, /* 11 (AD[21]): Unused */ - {0, 0, 0, 0, 0 }, /* 12 (AD[22]): Unused */ - {0, 0, 0, 0, 0 }, /* 13 (AD[23]): Unused */ - {0, 0, 0, 0, 0 }, /* 14 (AD[24]): Unused */ - {0, 0, 0, 0, 0 }, /* 15 (AD[25]): Unused */ - {0, 0, 0, 0, 0 }, /* 16 (AD[26]): Unused */ - {0, 0, 0, 0, 0 }, /* 17 (AD[27]): Unused */ - {0, 0, 0, 0, 0 }, /* 18 (AD[28]): Unused */ - {0, 0, 0, 0, 0 }, /* 19 (AD[29]): Unused */ - {0, 0, 0, 0, 0 }, /* 20 (AD[30]): Unused */ - {0, 0, 0, 0, 0 } /* 21 (AD[31]): Unused */ -}; - -#else - -/* Unknown board -- don't assign any IRQs */ -static char irq_tab[][5] = { - /* INTA INTB INTC INTD */ - {0, 0, 0, 0, 0 }, /* (AD[0]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[1]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[2]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[3]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[4]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[5]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[6]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[7]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[8]): Unused */ - {0, 0, 0, 0, 0 }, /* (AD[9]): Unused */ - {0, 0, 0, 0, 0 }, /* 0 (AD[10]): Unused */ - {0, 0, 0, 0, 0 }, /* 1 (AD[11]): Unused */ - {0, 0, 0, 0, 0 }, /* 2 (AD[12]): Unused */ - {0, 0, 0, 0, 0 }, /* 3 (AD[13]): Unused */ - {0, 0, 0, 0, 0 }, /* 4 (AD[14]): Unused */ - {0, 0, 0, 0, 0 }, /* 5 (AD[15]): Unused */ - {0, 0, 0, 0, 0 }, /* 6 (AD[16]): Unused */ - {0, 0, 0, 0, 0 }, /* 7 (AD[17]): Unused */ - {0, 0, 0, 0, 0 }, /* 8 (AD[18]): Unused */ - {0, 0, 0, 0, 0 }, /* 9 (AD[19]): Unused */ - {0, 0, 0, 0, 0 }, /* 10 (AD[20]): Unused */ - {0, 0, 0, 0, 0 }, /* 11 (AD[21]): Unused */ - {0, 0, 0, 0, 0 }, /* 12 (AD[22]): Unused */ - {0, 0, 0, 0, 0 }, /* 13 (AD[23]): Unused */ - {0, 0, 0, 0, 0 }, /* 14 (AD[24]): Unused */ - {0, 0, 0, 0, 0 }, /* 15 (AD[25]): Unused */ - {0, 0, 0, 0, 0 }, /* 16 (AD[26]): Unused */ - {0, 0, 0, 0, 0 }, /* 17 (AD[27]): Unused */ - {0, 0, 0, 0, 0 }, /* 18 (AD[28]): Unused */ - {0, 0, 0, 0, 0 }, /* 19 (AD[29]): Unused */ - {0, 0, 0, 0, 0 }, /* 20 (AD[30]): Unused */ - {0, 0, 0, 0, 0 } /* 21 (AD[31]): Unused */ -}; -#endif - -/***************************************************************************** - * - * FUNCTION: pcibios_plat_dev_init - * _________________________________________________________________________ - * - * DESCRIPTION: Perform platform specific device initialization at - * pci_enable_device() time. - * None are needed for the MSP7120 PCI Controller. - * - * INPUTS: dev - structure describing the PCI device - * - * OUTPUTS: none - * - * RETURNS: PCIBIOS_SUCCESSFUL - * - ****************************************************************************/ -int pcibios_plat_dev_init(struct pci_dev *dev) -{ - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: pcibios_map_irq - * _________________________________________________________________________ - * - * DESCRIPTION: Perform board supplied PCI IRQ mapping routine. - * - * INPUTS: dev - unused - * slot - PCI slot. Identified by which bit of the AD[] bus - * drives the IDSEL line. AD[10] is 0, AD[31] is - * slot 21. - * pin - numbered using the scheme of the PCI_INTERRUPT_PIN - * field of the config header. - * - * OUTPUTS: none - * - * RETURNS: IRQ number - * - ****************************************************************************/ -int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ -#if !defined(CONFIG_PMC_MSP7120_GW) && !defined(CONFIG_PMC_MSP7120_EVAL) - printk(KERN_WARNING "PCI: unknown board, no PCI IRQs assigned.\n"); -#endif - printk(KERN_WARNING "PCI: irq_tab returned %d for slot=%d pin=%d\n", - irq_tab[slot][pin], slot, pin); - - return irq_tab[slot][pin]; -} - -#endif /* CONFIG_PCI */ diff --git a/arch/mips/pci/fixup-sni.c b/arch/mips/pci/fixup-sni.c index adb9a58641e8..de012f8bd8c3 100644 --- a/arch/mips/pci/fixup-sni.c +++ b/arch/mips/pci/fixup-sni.c @@ -151,8 +151,7 @@ int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) case SNI_BRD_PCI_MTOWER: if (is_rm300_revd()) return irq_tab_rm300d[slot][pin]; - /* fall through */ - + fallthrough; case SNI_BRD_PCI_DESKTOP: return irq_tab_rm200[slot][pin]; diff --git a/arch/mips/pci/ops-bcm63xx.c b/arch/mips/pci/ops-bcm63xx.c index 925c72348fb6..dc6dc2741272 100644 --- a/arch/mips/pci/ops-bcm63xx.c +++ b/arch/mips/pci/ops-bcm63xx.c @@ -474,7 +474,7 @@ static int bcm63xx_pcie_can_access(struct pci_bus *bus, int devfn) if (PCI_SLOT(devfn) == 0) return bcm_pcie_readl(PCIE_DLSTATUS_REG) & DLSTATUS_PHYLINKUP; - /* else, fall through */ + fallthrough; default: return false; } diff --git a/arch/mips/pci/ops-emma2rh.c b/arch/mips/pci/ops-emma2rh.c deleted file mode 100644 index 65f47344536c..000000000000 --- a/arch/mips/pci/ops-emma2rh.c +++ /dev/null @@ -1,167 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/pci/ops-vr41xx.c - * - * Copyright 2001 MontaVista Software Inc. - */ - -#include <linux/pci.h> -#include <linux/kernel.h> -#include <linux/types.h> - -#include <asm/addrspace.h> - -#include <asm/emma/emma2rh.h> - -#define RTABORT (0x1<<9) -#define RMABORT (0x1<<10) -#define EMMA2RH_PCI_SLOT_NUM 9 /* 0000:09.0 is final PCI device */ - -/* - * access config space - */ - -static int check_args(struct pci_bus *bus, u32 devfn, u32 * bus_num) -{ - /* check if the bus is top-level */ - if (bus->parent != NULL) - *bus_num = bus->number; - else - *bus_num = 0; - - if (*bus_num == 0) { - /* Type 0 */ - if (PCI_SLOT(devfn) >= 10) - return PCIBIOS_DEVICE_NOT_FOUND; - } else { - /* Type 1 */ - if ((*bus_num >= 64) || (PCI_SLOT(devfn) >= 16)) - return PCIBIOS_DEVICE_NOT_FOUND; - } - return 0; -} - -static inline int set_pci_configuration_address(unsigned char bus_num, - unsigned int devfn, int where) -{ - u32 config_win0; - - emma2rh_out32(EMMA2RH_PCI_INT, ~RMABORT); - if (bus_num == 0) - /* - * Type 0 configuration - */ - config_win0 = (1 << (22 + PCI_SLOT(devfn))) | (5 << 9); - else - /* - * Type 1 configuration - */ - config_win0 = (bus_num << 26) | (PCI_SLOT(devfn) << 22) | - (1 << 15) | (5 << 9); - - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, config_win0); - - return 0; -} - -static int pci_config_read(struct pci_bus *bus, unsigned int devfn, int where, - int size, uint32_t * val) -{ - u32 bus_num; - u32 base = KSEG1ADDR(EMMA2RH_PCI_CONFIG_BASE); - u32 backup_win0; - u32 data; - - *val = 0xffffffffU; - - if (check_args(bus, devfn, &bus_num) == PCIBIOS_DEVICE_NOT_FOUND) - return PCIBIOS_DEVICE_NOT_FOUND; - - backup_win0 = emma2rh_in32(EMMA2RH_PCI_IWIN0_CTR); - - if (set_pci_configuration_address(bus_num, devfn, where) < 0) - return PCIBIOS_DEVICE_NOT_FOUND; - - data = - *(volatile u32 *)(base + (PCI_FUNC(devfn) << 8) + - (where & 0xfffffffc)); - - switch (size) { - case 1: - *val = (data >> ((where & 3) << 3)) & 0xffU; - break; - case 2: - *val = (data >> ((where & 2) << 3)) & 0xffffU; - break; - case 4: - *val = data; - break; - default: - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0); - return PCIBIOS_FUNC_NOT_SUPPORTED; - } - - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0); - - if (emma2rh_in32(EMMA2RH_PCI_INT) & RMABORT) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -static int pci_config_write(struct pci_bus *bus, unsigned int devfn, int where, - int size, u32 val) -{ - u32 bus_num; - u32 base = KSEG1ADDR(EMMA2RH_PCI_CONFIG_BASE); - u32 backup_win0; - u32 data; - int shift; - - if (check_args(bus, devfn, &bus_num) == PCIBIOS_DEVICE_NOT_FOUND) - return PCIBIOS_DEVICE_NOT_FOUND; - - backup_win0 = emma2rh_in32(EMMA2RH_PCI_IWIN0_CTR); - - if (set_pci_configuration_address(bus_num, devfn, where) < 0) - return PCIBIOS_DEVICE_NOT_FOUND; - - /* read modify write */ - data = - *(volatile u32 *)(base + (PCI_FUNC(devfn) << 8) + - (where & 0xfffffffc)); - - switch (size) { - case 1: - shift = (where & 3) << 3; - data &= ~(0xffU << shift); - data |= ((val & 0xffU) << shift); - break; - case 2: - shift = (where & 2) << 3; - data &= ~(0xffffU << shift); - data |= ((val & 0xffffU) << shift); - break; - case 4: - data = val; - break; - default: - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0); - return PCIBIOS_FUNC_NOT_SUPPORTED; - } - *(volatile u32 *)(base + (PCI_FUNC(devfn) << 8) + - (where & 0xfffffffc)) = data; - - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, backup_win0); - if (emma2rh_in32(EMMA2RH_PCI_INT) & RMABORT) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -struct pci_ops emma2rh_pci_ops = { - .read = pci_config_read, - .write = pci_config_write, -}; diff --git a/arch/mips/pci/ops-loongson3.c b/arch/mips/pci/ops-loongson3.c deleted file mode 100644 index 2f6ad36bdea6..000000000000 --- a/arch/mips/pci/ops-loongson3.c +++ /dev/null @@ -1,116 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/types.h> -#include <linux/pci.h> -#include <linux/kernel.h> - -#include <asm/mips-boards/bonito64.h> - -#include <loongson.h> - -#define PCI_ACCESS_READ 0 -#define PCI_ACCESS_WRITE 1 - -#define HT1LO_PCICFG_BASE 0x1a000000 -#define HT1LO_PCICFG_BASE_TP1 0x1b000000 - -static int loongson3_pci_config_access(unsigned char access_type, - struct pci_bus *bus, unsigned int devfn, - int where, u32 *data) -{ - unsigned char busnum = bus->number; - int function = PCI_FUNC(devfn); - int device = PCI_SLOT(devfn); - int reg = where & ~3; - void *addrp; - u64 addr; - - if (where < PCI_CFG_SPACE_SIZE) { /* standard config */ - addr = (busnum << 16) | (device << 11) | (function << 8) | reg; - if (busnum == 0) { - if (device > 31) - return PCIBIOS_DEVICE_NOT_FOUND; - addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE | addr); - } else { - addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE_TP1 | addr); - } - } else if (where < PCI_CFG_SPACE_EXP_SIZE) { /* extended config */ - struct pci_dev *rootdev; - - rootdev = pci_get_domain_bus_and_slot(0, 0, 0); - if (!rootdev) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = pci_resource_start(rootdev, 3); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr |= busnum << 20 | device << 15 | function << 12 | reg; - addrp = (void *)TO_UNCAC(addr); - } else { - return PCIBIOS_DEVICE_NOT_FOUND; - } - - if (access_type == PCI_ACCESS_WRITE) - writel(*data, addrp); - else { - *data = readl(addrp); - if (*data == 0xffffffff) { - *data = -1; - return PCIBIOS_DEVICE_NOT_FOUND; - } - } - return PCIBIOS_SUCCESSFUL; -} - -static int loongson3_pci_pcibios_read(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 *val) -{ - u32 data = 0; - int ret = loongson3_pci_config_access(PCI_ACCESS_READ, - bus, devfn, where, &data); - - if (ret != PCIBIOS_SUCCESSFUL) - return ret; - - if (size == 1) - *val = (data >> ((where & 3) << 3)) & 0xff; - else if (size == 2) - *val = (data >> ((where & 3) << 3)) & 0xffff; - else - *val = data; - - return PCIBIOS_SUCCESSFUL; -} - -static int loongson3_pci_pcibios_write(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 val) -{ - u32 data = 0; - int ret; - - if (size == 4) - data = val; - else { - ret = loongson3_pci_config_access(PCI_ACCESS_READ, - bus, devfn, where, &data); - if (ret != PCIBIOS_SUCCESSFUL) - return ret; - - if (size == 1) - data = (data & ~(0xff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - else if (size == 2) - data = (data & ~(0xffff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - } - - ret = loongson3_pci_config_access(PCI_ACCESS_WRITE, - bus, devfn, where, &data); - - return ret; -} - -struct pci_ops loongson_pci_ops = { - .read = loongson3_pci_pcibios_read, - .write = loongson3_pci_pcibios_write -}; diff --git a/arch/mips/pci/ops-nile4.c b/arch/mips/pci/ops-nile4.c deleted file mode 100644 index b00658d19116..000000000000 --- a/arch/mips/pci/ops-nile4.c +++ /dev/null @@ -1,136 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/pci.h> -#include <asm/bootinfo.h> - -#include <asm/lasat/lasat.h> -#include <asm/nile4.h> - -#define PCI_ACCESS_READ 0 -#define PCI_ACCESS_WRITE 1 - -#define LO(reg) (reg / 4) -#define HI(reg) (reg / 4 + 1) - -volatile unsigned long *const vrc_pciregs = (void *) Vrc5074_BASE; - -static int nile4_pcibios_config_access(unsigned char access_type, - struct pci_bus *bus, unsigned int devfn, int where, u32 *val) -{ - unsigned char busnum = bus->number; - u32 adr, mask, err; - - if ((busnum == 0) && (PCI_SLOT(devfn) > 8)) - /* The addressing scheme chosen leaves room for just - * 8 devices on the first busnum (besides the PCI - * controller itself) */ - return PCIBIOS_DEVICE_NOT_FOUND; - - if ((busnum == 0) && (devfn == PCI_DEVFN(0, 0))) { - /* Access controller registers directly */ - if (access_type == PCI_ACCESS_WRITE) { - vrc_pciregs[(0x200 + where) >> 2] = *val; - } else { - *val = vrc_pciregs[(0x200 + where) >> 2]; - } - return PCIBIOS_SUCCESSFUL; - } - - /* Temporarily map PCI Window 1 to config space */ - mask = vrc_pciregs[LO(NILE4_PCIINIT1)]; - vrc_pciregs[LO(NILE4_PCIINIT1)] = 0x0000001a | (busnum ? 0x200 : 0); - - /* Clear PCI Error register. This also clears the Error Type - * bits in the Control register */ - vrc_pciregs[LO(NILE4_PCIERR)] = 0; - vrc_pciregs[HI(NILE4_PCIERR)] = 0; - - /* Setup address */ - if (busnum == 0) - adr = - KSEG1ADDR(PCI_WINDOW1) + - ((1 << (PCI_SLOT(devfn) + 15)) | (PCI_FUNC(devfn) << 8) - | (where & ~3)); - else - adr = KSEG1ADDR(PCI_WINDOW1) | (busnum << 16) | (devfn << 8) | - (where & ~3); - - if (access_type == PCI_ACCESS_WRITE) - *(u32 *) adr = *val; - else - *val = *(u32 *) adr; - - /* Check for master or target abort */ - err = (vrc_pciregs[HI(NILE4_PCICTRL)] >> 5) & 0x7; - - /* Restore PCI Window 1 */ - vrc_pciregs[LO(NILE4_PCIINIT1)] = mask; - - if (err) - return PCIBIOS_DEVICE_NOT_FOUND; - - return PCIBIOS_SUCCESSFUL; -} - -static int nile4_pcibios_read(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 *val) -{ - u32 data = 0; - int err; - - if ((size == 2) && (where & 1)) - return PCIBIOS_BAD_REGISTER_NUMBER; - else if ((size == 4) && (where & 3)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - err = nile4_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, where, - &data); - if (err) - return err; - - if (size == 1) - *val = (data >> ((where & 3) << 3)) & 0xff; - else if (size == 2) - *val = (data >> ((where & 3) << 3)) & 0xffff; - else - *val = data; - - return PCIBIOS_SUCCESSFUL; -} - -static int nile4_pcibios_write(struct pci_bus *bus, unsigned int devfn, - int where, int size, u32 val) -{ - u32 data = 0; - int err; - - if ((size == 2) && (where & 1)) - return PCIBIOS_BAD_REGISTER_NUMBER; - else if ((size == 4) && (where & 3)) - return PCIBIOS_BAD_REGISTER_NUMBER; - - err = nile4_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, where, - &data); - if (err) - return err; - - if (size == 1) - data = (data & ~(0xff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - else if (size == 2) - data = (data & ~(0xffff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - else - data = val; - - if (nile4_pcibios_config_access - (PCI_ACCESS_WRITE, bus, devfn, where, &data)) - return -1; - - return PCIBIOS_SUCCESSFUL; -} - -struct pci_ops nile4_pci_ops = { - .read = nile4_pcibios_read, - .write = nile4_pcibios_write, -}; diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c deleted file mode 100644 index ad5dd711c575..000000000000 --- a/arch/mips/pci/ops-pmcmsp.c +++ /dev/null @@ -1,944 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * PMC-Sierra MSP board specific pci_ops - * - * Copyright 2001 MontaVista Software Inc. - * Copyright 2005-2007 PMC-Sierra, Inc - * - * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net - * - * Much of the code is derived from the original DDB5074 port by - * Geert Uytterhoeven <geert@linux-m68k.org> - */ - -#define PCI_COUNTERS 1 - -#include <linux/types.h> -#include <linux/pci.h> -#include <linux/interrupt.h> - -#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS) -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#endif /* CONFIG_PROC_FS && PCI_COUNTERS */ - -#include <linux/kernel.h> -#include <linux/init.h> - -#include <asm/byteorder.h> -#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL) -#include <asm/mipsmtregs.h> -#endif - -#include <msp_prom.h> -#include <msp_cic_int.h> -#include <msp_pci.h> -#include <msp_regs.h> -#include <msp_regops.h> - -#define PCI_ACCESS_READ 0 -#define PCI_ACCESS_WRITE 1 - -#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS) -static char proc_init; -extern struct proc_dir_entry *proc_bus_pci_dir; -unsigned int pci_int_count[32]; - -static void pci_proc_init(void); - -/***************************************************************************** - * - * FUNCTION: show_msp_pci_counts - * _________________________________________________________________________ - * - * DESCRIPTION: Prints the count of how many times each PCI - * interrupt has asserted. Can be invoked by the - * /proc filesystem. - * - * INPUTS: m - synthetic file construction data - * v - iterator - * - * RETURNS: 0 or error - * - ****************************************************************************/ -static int show_msp_pci_counts(struct seq_file *m, void *v) -{ - int i; - unsigned int intcount, total = 0; - - for (i = 0; i < 32; ++i) { - intcount = pci_int_count[i]; - if (intcount != 0) { - seq_printf(m, "[%d] = %u\n", i, intcount); - total += intcount; - } - } - - seq_printf(m, "total = %u\n", total); - return 0; -} - -/***************************************************************************** - * - * FUNCTION: gen_pci_cfg_wr_show - * _________________________________________________________________________ - * - * DESCRIPTION: Generates a configuration write cycle for debug purposes. - * The IDSEL line asserted and location and data written are - * immaterial. Just want to be able to prove that a - * configuration write can be correctly generated on the - * PCI bus. Intent is that this function by invocable from - * the /proc filesystem. - * - * INPUTS: m - synthetic file construction data - * v - iterator - * - * RETURNS: 0 or error - * - ****************************************************************************/ -static int gen_pci_cfg_wr_show(struct seq_file *m, void *v) -{ - unsigned char where = 0; /* Write to static Device/Vendor ID */ - unsigned char bus_num = 0; /* Bus 0 */ - unsigned char dev_fn = 0xF; /* Arbitrary device number */ - u32 wr_data = 0xFF00AA00; /* Arbitrary data */ - struct msp_pci_regs *preg = (void *)PCI_BASE_REG; - unsigned long value; - int intr; - - seq_puts(m, "PMC MSP PCI: Beginning\n"); - - if (proc_init == 0) { - pci_proc_init(); - proc_init = ~0; - } - - seq_puts(m, "PMC MSP PCI: Before Cfg Wr\n"); - - /* - * Generate PCI Configuration Write Cycle - */ - - /* Clear cause register bits */ - preg->if_status = ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F); - - /* Setup address that is to appear on PCI bus */ - preg->config_addr = BPCI_CFGADDR_ENABLE | - (bus_num << BPCI_CFGADDR_BUSNUM_SHF) | - (dev_fn << BPCI_CFGADDR_FUNCTNUM_SHF) | - (where & 0xFC); - - value = cpu_to_le32(wr_data); - - /* Launch the PCI configuration write cycle */ - *PCI_CONFIG_SPACE_REG = value; - - /* - * Check if the PCI configuration cycle (rd or wr) succeeded, by - * checking the status bits for errors like master or target abort. - */ - intr = preg->if_status; - - seq_puts(m, "PMC MSP PCI: After Cfg Wr\n"); - return 0; -} - -/***************************************************************************** - * - * FUNCTION: pci_proc_init - * _________________________________________________________________________ - * - * DESCRIPTION: Create entries in the /proc filesystem for debug access. - * - * INPUTS: none - * - * OUTPUTS: none - * - * RETURNS: none - * - ****************************************************************************/ -static void pci_proc_init(void) -{ - proc_create_single("pmc_msp_pci_rd_cnt", 0, NULL, show_msp_pci_counts); - proc_create_single("pmc_msp_pci_cfg_wr", 0, NULL, gen_pci_cfg_wr_show); -} -#endif /* CONFIG_PROC_FS && PCI_COUNTERS */ - -/***************************************************************************** - * - * STRUCT: pci_io_resource - * _________________________________________________________________________ - * - * DESCRIPTION: Defines the address range that pciauto() will use to - * assign to the I/O BARs of PCI devices. - * - * Use the start and end addresses of the MSP7120 PCI Host - * Controller I/O space, in the form that they appear on the - * PCI bus AFTER MSP7120 has performed address translation. - * - * For I/O accesses, MSP7120 ignores OATRAN and maps I/O - * accesses into the bottom 0xFFF region of address space, - * so that is the range to put into the pci_io_resource - * struct. - * - * In MSP4200, the start address was 0x04 instead of the - * expected 0x00. Will just assume there was a good reason - * for this! - * - * NOTES: Linux, by default, will assign I/O space to the lowest - * region of address space. Since MSP7120 and Linux, - * by default, have no offset in between how they map, the - * io_offset element of pci_controller struct should be set - * to zero. - * ELEMENTS: - * name - String used for a meaningful name. - * - * start - Start address of MSP7120's I/O space, as MSP7120 presents - * the address on the PCI bus. - * - * end - End address of MSP7120's I/O space, as MSP7120 presents - * the address on the PCI bus. - * - * flags - Attributes indicating the type of resource. In this case, - * indicate I/O space. - * - ****************************************************************************/ -static struct resource pci_io_resource = { - .name = "pci IO space", - .start = 0x04, - .end = 0x0FFF, - .flags = IORESOURCE_IO /* I/O space */ -}; - -/***************************************************************************** - * - * STRUCT: pci_mem_resource - * _________________________________________________________________________ - * - * DESCRIPTION: Defines the address range that pciauto() will use to - * assign to the memory BARs of PCI devices. - * - * The .start and .end values are dependent upon how address - * translation is performed by the OATRAN regiser. - * - * The values to use for .start and .end are the values - * in the form they appear on the PCI bus AFTER MSP7120 has - * performed OATRAN address translation. - * - * ELEMENTS: - * name - String used for a meaningful name. - * - * start - Start address of MSP7120's memory space, as MSP7120 presents - * the address on the PCI bus. - * - * end - End address of MSP7120's memory space, as MSP7120 presents - * the address on the PCI bus. - * - * flags - Attributes indicating the type of resource. In this case, - * indicate memory space. - * - ****************************************************************************/ -static struct resource pci_mem_resource = { - .name = "pci memory space", - .start = MSP_PCI_SPACE_BASE, - .end = MSP_PCI_SPACE_END, - .flags = IORESOURCE_MEM /* memory space */ -}; - -/***************************************************************************** - * - * FUNCTION: bpci_interrupt - * _________________________________________________________________________ - * - * DESCRIPTION: PCI status interrupt handler. Updates the count of how - * many times each status bit has been set, then clears - * the status bits. If the appropriate macros are defined, - * these counts can be viewed via the /proc filesystem. - * - * INPUTS: irq - unused - * dev_id - unused - * pt_regs - unused - * - * OUTPUTS: none - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * - ****************************************************************************/ -static irqreturn_t bpci_interrupt(int irq, void *dev_id) -{ - struct msp_pci_regs *preg = (void *)PCI_BASE_REG; - unsigned int stat = preg->if_status; - -#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS) - int i; - for (i = 0; i < 32; ++i) { - if ((1 << i) & stat) - ++pci_int_count[i]; - } -#endif /* PROC_FS && PCI_COUNTERS */ - - /* printk("PCI ISR: Status=%08X\n", stat); */ - - /* write to clear all asserted interrupts */ - preg->if_status = stat; - - return IRQ_HANDLED; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_config_access - * _________________________________________________________________________ - * - * DESCRIPTION: Performs a PCI configuration access (rd or wr), then - * checks that the access succeeded by querying MSP7120's - * PCI status bits. - * - * INPUTS: - * access_type - kind of PCI configuration cycle to perform - * (read or write). Legal values are - * PCI_ACCESS_WRITE and PCI_ACCESS_READ. - * - * bus - pointer to the bus number of the device to - * be targeted for the configuration cycle. - * The only element of the pci_bus structure - * used is bus->number. This argument determines - * if the configuration access will be Type 0 or - * Type 1. Since MSP7120 assumes itself to be the - * PCI Host, any non-zero bus->number generates - * a Type 1 access. - * - * devfn - this is an 8-bit field. The lower three bits - * specify the function number of the device to - * be targeted for the configuration cycle, with - * all three-bit combinations being legal. The - * upper five bits specify the device number, - * with legal values being 10 to 31. - * - * where - address within the Configuration Header - * space to access. - * - * data - for write accesses, contains the data to - * write. - * - * OUTPUTS: - * data - for read accesses, contains the value read. - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * -1 - access failure - * - ****************************************************************************/ -int msp_pcibios_config_access(unsigned char access_type, - struct pci_bus *bus, - unsigned int devfn, - unsigned char where, - u32 *data) -{ - struct msp_pci_regs *preg = (void *)PCI_BASE_REG; - unsigned char bus_num = bus->number; - unsigned char dev_fn = (unsigned char)devfn; - unsigned long intr; - unsigned long value; - static char pciirqflag; - int ret; -#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL) - unsigned int vpe_status; -#endif - -#if defined(CONFIG_PROC_FS) && defined(PCI_COUNTERS) - if (proc_init == 0) { - pci_proc_init(); - proc_init = ~0; - } -#endif /* CONFIG_PROC_FS && PCI_COUNTERS */ - - /* - * Just the first time this function invokes, allocate - * an interrupt line for PCI host status interrupts. The - * allocation assigns an interrupt handler to the interrupt. - */ - if (pciirqflag == 0) { - ret = request_irq(MSP_INT_PCI,/* Hardcoded internal MSP7120 wiring */ - bpci_interrupt, - IRQF_SHARED, - "PMC MSP PCI Host", - preg); - if (ret != 0) - return ret; - pciirqflag = ~0; - } - -#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL) - vpe_status = dvpe(); -#endif - - /* - * Clear PCI cause register bits. - * - * In Polo, the PCI Host had a dedicated DMA called the - * Block Copy (not to be confused with the general purpose Block - * Copy Engine block). There appear to have been special interrupts - * for this Block Copy, called Block Copy 0 Fault (BC0F) and - * Block Copy 1 Fault (BC1F). MSP4200 and MSP7120 don't have this - * dedicated Block Copy block, so these two interrupts are now - * marked reserved. In case the Block Copy is resurrected in a - * future design, maintain the code that treats these two interrupts - * specially. - * - * Write to clear all interrupts in the PCI status register, aside - * from BC0F and BC1F. - */ - preg->if_status = ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F); - - /* Setup address that is to appear on PCI bus */ - preg->config_addr = BPCI_CFGADDR_ENABLE | - (bus_num << BPCI_CFGADDR_BUSNUM_SHF) | - (dev_fn << BPCI_CFGADDR_FUNCTNUM_SHF) | - (where & 0xFC); - - /* IF access is a PCI configuration write */ - if (access_type == PCI_ACCESS_WRITE) { - value = cpu_to_le32(*data); - *PCI_CONFIG_SPACE_REG = value; - } else { - /* ELSE access is a PCI configuration read */ - value = le32_to_cpu(*PCI_CONFIG_SPACE_REG); - *data = value; - } - - /* - * Check if the PCI configuration cycle (rd or wr) succeeded, by - * checking the status bits for errors like master or target abort. - */ - intr = preg->if_status; - - /* Clear config access */ - preg->config_addr = 0; - - /* IF error occurred */ - if (intr & ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F)) { - /* Clear status bits */ - preg->if_status = ~(BPCI_IFSTATUS_BC0F | BPCI_IFSTATUS_BC1F); - -#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL) - evpe(vpe_status); -#endif - - return -1; - } - -#if defined(CONFIG_PMC_MSP7120_GW) || defined(CONFIG_PMC_MSP7120_EVAL) - evpe(vpe_status); -#endif - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_read_config_byte - * _________________________________________________________________________ - * - * DESCRIPTION: Read a byte from PCI configuration address spac - * Since the hardware can't address 8 bit chunks - * directly, read a 32-bit chunk, then mask off extraneous - * bits. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the read is destined for. - * devfn - device/function combination that the read is - * destined for. - * where - register within the Configuration Header space - * to access. - * - * OUTPUTS val - read data - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * -1 - read access failure - * - ****************************************************************************/ -static int -msp_pcibios_read_config_byte(struct pci_bus *bus, - unsigned int devfn, - int where, - u32 *val) -{ - u32 data = 0; - - /* - * If the config access did not complete normally (e.g., underwent - * master abort) do the PCI compliant thing, which is to supply an - * all ones value. - */ - if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, - where, &data)) { - *val = 0xFFFFFFFF; - return -1; - } - - *val = (data >> ((where & 3) << 3)) & 0x0ff; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_read_config_word - * _________________________________________________________________________ - * - * DESCRIPTION: Read a word (16 bits) from PCI configuration address space. - * Since the hardware can't address 16 bit chunks - * directly, read a 32-bit chunk, then mask off extraneous - * bits. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the read is destined for. - * devfn - device/function combination that the read is - * destined for. - * where - register within the Configuration Header space - * to access. - * - * OUTPUTS val - read data - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * PCIBIOS_BAD_REGISTER_NUMBER - bad register address - * -1 - read access failure - * - ****************************************************************************/ -static int -msp_pcibios_read_config_word(struct pci_bus *bus, - unsigned int devfn, - int where, - u32 *val) -{ - u32 data = 0; - - /* if (where & 1) */ /* Commented out non-compliant code. - * Should allow word access to configuration - * registers, with only exception being when - * the word access would wrap around into - * the next dword. - */ - if ((where & 3) == 3) { - *val = 0xFFFFFFFF; - return PCIBIOS_BAD_REGISTER_NUMBER; - } - - /* - * If the config access did not complete normally (e.g., underwent - * master abort) do the PCI compliant thing, which is to supply an - * all ones value. - */ - if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, - where, &data)) { - *val = 0xFFFFFFFF; - return -1; - } - - *val = (data >> ((where & 3) << 3)) & 0x0ffff; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_read_config_dword - * _________________________________________________________________________ - * - * DESCRIPTION: Read a double word (32 bits) from PCI configuration - * address space. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the read is destined for. - * devfn - device/function combination that the read is - * destined for. - * where - register within the Configuration Header space - * to access. - * - * OUTPUTS val - read data - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * PCIBIOS_BAD_REGISTER_NUMBER - bad register address - * -1 - read access failure - * - ****************************************************************************/ -static int -msp_pcibios_read_config_dword(struct pci_bus *bus, - unsigned int devfn, - int where, - u32 *val) -{ - u32 data = 0; - - /* Address must be dword aligned. */ - if (where & 3) { - *val = 0xFFFFFFFF; - return PCIBIOS_BAD_REGISTER_NUMBER; - } - - /* - * If the config access did not complete normally (e.g., underwent - * master abort) do the PCI compliant thing, which is to supply an - * all ones value. - */ - if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, - where, &data)) { - *val = 0xFFFFFFFF; - return -1; - } - - *val = data; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_write_config_byte - * _________________________________________________________________________ - * - * DESCRIPTION: Write a byte to PCI configuration address space. - * Since the hardware can't address 8 bit chunks - * directly, a read-modify-write is performed. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the write is destined for. - * devfn - device/function combination that the write is - * destined for. - * where - register within the Configuration Header space - * to access. - * val - value to write - * - * OUTPUTS none - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * -1 - write access failure - * - ****************************************************************************/ -static int -msp_pcibios_write_config_byte(struct pci_bus *bus, - unsigned int devfn, - int where, - u8 val) -{ - u32 data = 0; - - /* read config space */ - if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, - where, &data)) - return -1; - - /* modify the byte within the dword */ - data = (data & ~(0xff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - - /* write back the full dword */ - if (msp_pcibios_config_access(PCI_ACCESS_WRITE, bus, devfn, - where, &data)) - return -1; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_write_config_word - * _________________________________________________________________________ - * - * DESCRIPTION: Write a word (16-bits) to PCI configuration address space. - * Since the hardware can't address 16 bit chunks - * directly, a read-modify-write is performed. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the write is destined for. - * devfn - device/function combination that the write is - * destined for. - * where - register within the Configuration Header space - * to access. - * val - value to write - * - * OUTPUTS none - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * PCIBIOS_BAD_REGISTER_NUMBER - bad register address - * -1 - write access failure - * - ****************************************************************************/ -static int -msp_pcibios_write_config_word(struct pci_bus *bus, - unsigned int devfn, - int where, - u16 val) -{ - u32 data = 0; - - /* Fixed non-compliance: if (where & 1) */ - if ((where & 3) == 3) - return PCIBIOS_BAD_REGISTER_NUMBER; - - /* read config space */ - if (msp_pcibios_config_access(PCI_ACCESS_READ, bus, devfn, - where, &data)) - return -1; - - /* modify the word within the dword */ - data = (data & ~(0xffff << ((where & 3) << 3))) | - (val << ((where & 3) << 3)); - - /* write back the full dword */ - if (msp_pcibios_config_access(PCI_ACCESS_WRITE, bus, devfn, - where, &data)) - return -1; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_write_config_dword - * _________________________________________________________________________ - * - * DESCRIPTION: Write a double word (32-bits) to PCI configuration address - * space. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the write is destined for. - * devfn - device/function combination that the write is - * destined for. - * where - register within the Configuration Header space - * to access. - * val - value to write - * - * OUTPUTS none - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * PCIBIOS_BAD_REGISTER_NUMBER - bad register address - * -1 - write access failure - * - ****************************************************************************/ -static int -msp_pcibios_write_config_dword(struct pci_bus *bus, - unsigned int devfn, - int where, - u32 val) -{ - /* check that address is dword aligned */ - if (where & 3) - return PCIBIOS_BAD_REGISTER_NUMBER; - - /* perform write */ - if (msp_pcibios_config_access(PCI_ACCESS_WRITE, bus, devfn, - where, &val)) - return -1; - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_read_config - * _________________________________________________________________________ - * - * DESCRIPTION: Interface the PCI configuration read request with - * the appropriate function, based on how many bytes - * the read request is. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the write is destined for. - * devfn - device/function combination that the write is - * destined for. - * where - register within the Configuration Header space - * to access. - * size - in units of bytes, should be 1, 2, or 4. - * - * OUTPUTS val - value read, with any extraneous bytes masked - * to zero. - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * -1 - failure - * - ****************************************************************************/ -int -msp_pcibios_read_config(struct pci_bus *bus, - unsigned int devfn, - int where, - int size, - u32 *val) -{ - if (size == 1) { - if (msp_pcibios_read_config_byte(bus, devfn, where, val)) { - return -1; - } - } else if (size == 2) { - if (msp_pcibios_read_config_word(bus, devfn, where, val)) { - return -1; - } - } else if (size == 4) { - if (msp_pcibios_read_config_dword(bus, devfn, where, val)) { - return -1; - } - } else { - *val = 0xFFFFFFFF; - return -1; - } - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * FUNCTION: msp_pcibios_write_config - * _________________________________________________________________________ - * - * DESCRIPTION: Interface the PCI configuration write request with - * the appropriate function, based on how many bytes - * the read request is. - * - * INPUTS bus - structure containing attributes for the PCI bus - * that the write is destined for. - * devfn - device/function combination that the write is - * destined for. - * where - register within the Configuration Header space - * to access. - * size - in units of bytes, should be 1, 2, or 4. - * val - value to write - * - * OUTPUTS: none - * - * RETURNS: PCIBIOS_SUCCESSFUL - success - * -1 - failure - * - ****************************************************************************/ -int -msp_pcibios_write_config(struct pci_bus *bus, - unsigned int devfn, - int where, - int size, - u32 val) -{ - if (size == 1) { - if (msp_pcibios_write_config_byte(bus, devfn, - where, (u8)(0xFF & val))) { - return -1; - } - } else if (size == 2) { - if (msp_pcibios_write_config_word(bus, devfn, - where, (u16)(0xFFFF & val))) { - return -1; - } - } else if (size == 4) { - if (msp_pcibios_write_config_dword(bus, devfn, where, val)) { - return -1; - } - } else { - return -1; - } - - return PCIBIOS_SUCCESSFUL; -} - -/***************************************************************************** - * - * STRUCTURE: msp_pci_ops - * _________________________________________________________________________ - * - * DESCRIPTION: structure to abstract the hardware specific PCI - * configuration accesses. - * - * ELEMENTS: - * read - function for Linux to generate PCI Configuration reads. - * write - function for Linux to generate PCI Configuration writes. - * - ****************************************************************************/ -struct pci_ops msp_pci_ops = { - .read = msp_pcibios_read_config, - .write = msp_pcibios_write_config -}; - -/***************************************************************************** - * - * STRUCTURE: msp_pci_controller - * _________________________________________________________________________ - * - * Describes the attributes of the MSP7120 PCI Host Controller - * - * ELEMENTS: - * pci_ops - abstracts the hardware specific PCI configuration - * accesses. - * - * mem_resource - address range pciauto() uses to assign to PCI device - * memory BARs. - * - * mem_offset - offset between how MSP7120 outbound PCI memory - * transaction addresses appear on the PCI bus and how Linux - * wants to configure memory BARs of the PCI devices. - * MSP7120 does nothing funky, so just set to zero. - * - * io_resource - address range pciauto() uses to assign to PCI device - * I/O BARs. - * - * io_offset - offset between how MSP7120 outbound PCI I/O - * transaction addresses appear on the PCI bus and how - * Linux defaults to configure I/O BARs of the PCI devices. - * MSP7120 maps outbound I/O accesses into the bottom - * bottom 4K of PCI address space (and ignores OATRAN). - * Since the Linux default is to configure I/O BARs to the - * bottom 4K, no special offset is needed. Just set to zero. - * - ****************************************************************************/ -static struct pci_controller msp_pci_controller = { - .pci_ops = &msp_pci_ops, - .mem_resource = &pci_mem_resource, - .mem_offset = 0, - .io_map_base = MSP_PCI_IOSPACE_BASE, - .io_resource = &pci_io_resource, - .io_offset = 0 -}; - -/***************************************************************************** - * - * FUNCTION: msp_pci_init - * _________________________________________________________________________ - * - * DESCRIPTION: Initialize the PCI Host Controller and register it with - * Linux so Linux can seize control of the PCI bus. - * - ****************************************************************************/ -void __init msp_pci_init(void) -{ - struct msp_pci_regs *preg = (void *)PCI_BASE_REG; - u32 id; - - /* Extract Device ID */ - id = read_reg32(PCI_JTAG_DEVID_REG, 0xFFFF) >> 12; - - /* Check if JTAG ID identifies MSP7120 */ - if (!MSP_HAS_PCI(id)) { - printk(KERN_WARNING "PCI: No PCI; id reads as %x\n", id); - goto no_pci; - } - - /* - * Enable flushing of the PCI-SDRAM queue upon a read - * of the SDRAM's Memory Configuration Register. - */ - *(unsigned long *)QFLUSH_REG_1 = 3; - - /* Configure PCI Host Controller. */ - preg->if_status = ~0; /* Clear cause register bits */ - preg->config_addr = 0; /* Clear config access */ - preg->oatran = MSP_PCI_OATRAN; /* PCI outbound addr translation */ - preg->if_mask = 0xF8BF87C0; /* Enable all PCI status interrupts */ - - /* configure so inb(), outb(), and family are functional */ - set_io_port_base(MSP_PCI_IOSPACE_BASE); - - /* Tell Linux the details of the MSP7120 PCI Host Controller */ - register_pci_controller(&msp_pci_controller); - - return; - -no_pci: - /* Disable PCI channel */ - printk(KERN_WARNING "PCI: no host PCI bus detected\n"); -} diff --git a/arch/mips/pci/pci-alchemy.c b/arch/mips/pci/pci-alchemy.c index 01a2af8215c8..7285b5667568 100644 --- a/arch/mips/pci/pci-alchemy.c +++ b/arch/mips/pci/pci-alchemy.c @@ -52,7 +52,7 @@ struct alchemy_pci_context { static struct alchemy_pci_context *__alchemy_pci_ctx; -/* IO/MEM resources for PCI. Keep the memres in sync with __fixup_bigphys_addr +/* IO/MEM resources for PCI. Keep the memres in sync with fixup_bigphys_addr * in arch/mips/alchemy/common/setup.c */ static struct resource alchemy_pci_def_memres = { diff --git a/arch/mips/pci/pci-emma2rh.c b/arch/mips/pci/pci-emma2rh.c deleted file mode 100644 index 156091a3e341..000000000000 --- a/arch/mips/pci/pci-emma2rh.c +++ /dev/null @@ -1,72 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) NEC Electronics Corporation 2004-2006 - * - * This file is based on the arch/mips/ddb5xxx/ddb5477/pci.c - * - * Copyright 2001 MontaVista Software Inc. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/types.h> -#include <linux/pci.h> - -#include <asm/bootinfo.h> - -#include <asm/emma/emma2rh.h> - -static struct resource pci_io_resource = { - .name = "pci IO space", - .start = EMMA2RH_PCI_IO_BASE, - .end = EMMA2RH_PCI_IO_BASE + EMMA2RH_PCI_IO_SIZE - 1, - .flags = IORESOURCE_IO, -}; - -static struct resource pci_mem_resource = { - .name = "pci memory space", - .start = EMMA2RH_PCI_MEM_BASE, - .end = EMMA2RH_PCI_MEM_BASE + EMMA2RH_PCI_MEM_SIZE - 1, - .flags = IORESOURCE_MEM, -}; - -extern struct pci_ops emma2rh_pci_ops; - -static struct pci_controller emma2rh_pci_controller = { - .pci_ops = &emma2rh_pci_ops, - .mem_resource = &pci_mem_resource, - .io_resource = &pci_io_resource, - .mem_offset = -0x04000000, - .io_offset = 0, -}; - -static void __init emma2rh_pci_init(void) -{ - /* setup PCI interface */ - emma2rh_out32(EMMA2RH_PCI_ARBIT_CTR, 0x70f); - - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, 0x80000a18); - emma2rh_out32(EMMA2RH_PCI_CONFIG_BASE + PCI_COMMAND, - PCI_STATUS_DEVSEL_MEDIUM | PCI_STATUS_CAP_LIST | - PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY); - emma2rh_out32(EMMA2RH_PCI_CONFIG_BASE + PCI_BASE_ADDRESS_0, 0x10000000); - emma2rh_out32(EMMA2RH_PCI_CONFIG_BASE + PCI_BASE_ADDRESS_1, 0x00000000); - - emma2rh_out32(EMMA2RH_PCI_IWIN0_CTR, 0x12000000 | 0x218); - emma2rh_out32(EMMA2RH_PCI_IWIN1_CTR, 0x18000000 | 0x600); - emma2rh_out32(EMMA2RH_PCI_INIT_ESWP, 0x00000200); - - emma2rh_out32(EMMA2RH_PCI_TWIN_CTR, 0x00009200); - emma2rh_out32(EMMA2RH_PCI_TWIN_BADR, 0x00000000); - emma2rh_out32(EMMA2RH_PCI_TWIN0_DADR, 0x00000000); - emma2rh_out32(EMMA2RH_PCI_TWIN1_DADR, 0x00000000); -} - -static int __init emma2rh_pci_setup(void) -{ - emma2rh_pci_init(); - register_pci_controller(&emma2rh_pci_controller); - return 0; -} - -arch_initcall(emma2rh_pci_setup); diff --git a/arch/mips/pci/pci-lasat.c b/arch/mips/pci/pci-lasat.c deleted file mode 100644 index 47f4ee6bbb3b..000000000000 --- a/arch/mips/pci/pci-lasat.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 2000, 2001, 04 Keith M Wesolowski - */ -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/types.h> - -#include <asm/lasat/lasat.h> - -#include <irq.h> - -extern struct pci_ops nile4_pci_ops; -extern struct pci_ops gt64xxx_pci0_ops; -static struct resource lasat_pci_mem_resource = { - .name = "LASAT PCI MEM", - .start = 0x18000000, - .end = 0x19ffffff, - .flags = IORESOURCE_MEM, -}; - -static struct resource lasat_pci_io_resource = { - .name = "LASAT PCI IO", - .start = 0x1a000000, - .end = 0x1bffffff, - .flags = IORESOURCE_IO, -}; - -static struct pci_controller lasat_pci_controller = { - .mem_resource = &lasat_pci_mem_resource, - .io_resource = &lasat_pci_io_resource, -}; - -static int __init lasat_pci_setup(void) -{ - printk(KERN_DEBUG "PCI: starting\n"); - - if (IS_LASAT_200()) - lasat_pci_controller.pci_ops = &nile4_pci_ops; - else - lasat_pci_controller.pci_ops = >64xxx_pci0_ops; - - register_pci_controller(&lasat_pci_controller); - - return 0; -} - -arch_initcall(lasat_pci_setup); - -#define LASAT_IRQ_ETH1 (LASAT_IRQ_BASE + 0) -#define LASAT_IRQ_ETH0 (LASAT_IRQ_BASE + 1) -#define LASAT_IRQ_HDC (LASAT_IRQ_BASE + 2) -#define LASAT_IRQ_COMP (LASAT_IRQ_BASE + 3) -#define LASAT_IRQ_HDLC (LASAT_IRQ_BASE + 4) -#define LASAT_IRQ_PCIA (LASAT_IRQ_BASE + 5) -#define LASAT_IRQ_PCIB (LASAT_IRQ_BASE + 6) -#define LASAT_IRQ_PCIC (LASAT_IRQ_BASE + 7) -#define LASAT_IRQ_PCID (LASAT_IRQ_BASE + 8) - -int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - switch (slot) { - case 1: - case 2: - case 3: - return LASAT_IRQ_PCIA + (((slot-1) + (pin-1)) % 4); - case 4: - return LASAT_IRQ_ETH1; /* Ethernet 1 (LAN 2) */ - case 5: - return LASAT_IRQ_ETH0; /* Ethernet 0 (LAN 1) */ - case 6: - return LASAT_IRQ_HDC; /* IDE controller */ - default: - return 0xff; /* Illegal */ - } - - return -1; -} - -/* Do platform specific device initialization at pci_enable_device() time */ -int pcibios_plat_dev_init(struct pci_dev *dev) -{ - return 0; -} diff --git a/arch/mips/pic32/Platform b/arch/mips/pic32/Platform index cd2084f44507..1e92e52a137b 100644 --- a/arch/mips/pic32/Platform +++ b/arch/mips/pic32/Platform @@ -1,7 +1,6 @@ # # PIC32MZDA # -platform-$(CONFIG_PIC32MZDA) += pic32/ cflags-$(CONFIG_PIC32MZDA) += -I$(srctree)/arch/mips/include/asm/mach-pic32 load-$(CONFIG_PIC32MZDA) += 0xffffffff88000000 all-$(CONFIG_PIC32MZDA) := $(COMPRESSION_FNAME).bin diff --git a/arch/mips/pistachio/Platform b/arch/mips/pistachio/Platform index c3592b374ad2..f73a1a929965 100644 --- a/arch/mips/pistachio/Platform +++ b/arch/mips/pistachio/Platform @@ -1,7 +1,6 @@ # # IMG Pistachio SoC # -platform-$(CONFIG_MACH_PISTACHIO) += pistachio/ cflags-$(CONFIG_MACH_PISTACHIO) += \ -I$(srctree)/arch/mips/include/asm/mach-pistachio load-$(CONFIG_MACH_PISTACHIO) += 0xffffffff80400000 diff --git a/arch/mips/pistachio/init.c b/arch/mips/pistachio/init.c index a09a5da38e6b..558995ed6fe8 100644 --- a/arch/mips/pistachio/init.c +++ b/arch/mips/pistachio/init.c @@ -83,12 +83,12 @@ phys_addr_t mips_cdmm_phys_base(void) static void __init mips_nmi_setup(void) { void *base; - extern char except_vec_nmi; + extern char except_vec_nmi[]; base = cpu_has_veic ? (void *)(CAC_BASE + 0xa80) : (void *)(CAC_BASE + 0x380); - memcpy(base, &except_vec_nmi, 0x80); + memcpy(base, except_vec_nmi, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } @@ -96,12 +96,12 @@ static void __init mips_nmi_setup(void) static void __init mips_ejtag_setup(void) { void *base; - extern char except_vec_ejtag_debug; + extern char except_vec_ejtag_debug[]; base = cpu_has_veic ? (void *)(CAC_BASE + 0xa00) : (void *)(CAC_BASE + 0x300); - memcpy(base, &except_vec_ejtag_debug, 0x80); + memcpy(base, except_vec_ejtag_debug, 0x80); flush_icache_range((unsigned long)base, (unsigned long)base + 0x80); } diff --git a/arch/mips/pmcs-msp71xx/Kconfig b/arch/mips/pmcs-msp71xx/Kconfig deleted file mode 100644 index b185b7620c97..000000000000 --- a/arch/mips/pmcs-msp71xx/Kconfig +++ /dev/null @@ -1,50 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -choice - prompt "PMC-Sierra MSP SOC type" - depends on PMC_MSP - -config PMC_MSP4200_EVAL - bool "PMC-Sierra MSP4200 Eval Board" - select IRQ_MSP_SLP - select HAVE_PCI - select MIPS_L1_CACHE_SHIFT_4 - -config PMC_MSP4200_GW - bool "PMC-Sierra MSP4200 VoIP Gateway" - select IRQ_MSP_SLP - select HAVE_PCI - -config PMC_MSP7120_EVAL - bool "PMC-Sierra MSP7120 Eval Board" - select SYS_SUPPORTS_MULTITHREADING - select IRQ_MSP_CIC - select HAVE_PCI - -config PMC_MSP7120_GW - bool "PMC-Sierra MSP7120 Residential Gateway" - select SYS_SUPPORTS_MULTITHREADING - select IRQ_MSP_CIC - select HAVE_PCI - select MSP_HAS_USB - select MSP_ETH - -config PMC_MSP7120_FPGA - bool "PMC-Sierra MSP7120 FPGA" - select SYS_SUPPORTS_MULTITHREADING - select IRQ_MSP_CIC - select HAVE_PCI - -endchoice - -config MSP_HAS_USB - bool - depends on PMC_MSP - -config MSP_ETH - bool - select MSP_HAS_MAC - depends on PMC_MSP - -config MSP_HAS_MAC - bool - depends on PMC_MSP diff --git a/arch/mips/pmcs-msp71xx/Makefile b/arch/mips/pmcs-msp71xx/Makefile deleted file mode 100644 index c040bd6ed62d..000000000000 --- a/arch/mips/pmcs-msp71xx/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Makefile for the PMC-Sierra MSP SOCs -# -obj-y += msp_prom.o msp_setup.o msp_irq.o \ - msp_time.o msp_serial.o msp_elb.o -obj-$(CONFIG_PMC_MSP7120_GW) += msp_hwbutton.o -obj-$(CONFIG_IRQ_MSP_SLP) += msp_irq_slp.o -obj-$(CONFIG_IRQ_MSP_CIC) += msp_irq_cic.o msp_irq_per.o -obj-$(CONFIG_PCI) += msp_pci.o -obj-$(CONFIG_MSP_HAS_MAC) += msp_eth.o -obj-$(CONFIG_MSP_HAS_USB) += msp_usb.o -obj-$(CONFIG_MIPS_MT_SMP) += msp_smp.o diff --git a/arch/mips/pmcs-msp71xx/Platform b/arch/mips/pmcs-msp71xx/Platform deleted file mode 100644 index 7af0734a5007..000000000000 --- a/arch/mips/pmcs-msp71xx/Platform +++ /dev/null @@ -1,7 +0,0 @@ -# -# PMC-Sierra MSP SOCs -# -platform-$(CONFIG_PMC_MSP) += pmcs-msp71xx/ -cflags-$(CONFIG_PMC_MSP) += -I$(srctree)/arch/mips/include/asm/mach-pmcs-msp71xx \ - -mno-branch-likely -load-$(CONFIG_PMC_MSP) += 0xffffffff80100000 diff --git a/arch/mips/pmcs-msp71xx/msp_elb.c b/arch/mips/pmcs-msp71xx/msp_elb.c deleted file mode 100644 index 3e9641007216..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_elb.c +++ /dev/null @@ -1,46 +0,0 @@ -/* - * Sets up the proper Chip Select configuration registers. It is assumed that - * PMON sets up the ADDR and MASK registers properly. - * - * Copyright 2005-2006 PMC-Sierra, Inc. - * Author: Marc St-Jean, Marc_St-Jean@pmc-sierra.com - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <msp_regs.h> - -static int __init msp_elb_setup(void) -{ -#if defined(CONFIG_PMC_MSP7120_GW) \ - || defined(CONFIG_PMC_MSP7120_EVAL) - /* - * Force all CNFG to be identical and equal to CS0, - * according to OPS doc - */ - *CS1_CNFG_REG = *CS2_CNFG_REG = *CS3_CNFG_REG = *CS0_CNFG_REG; -#endif - return 0; -} - -subsys_initcall(msp_elb_setup); diff --git a/arch/mips/pmcs-msp71xx/msp_eth.c b/arch/mips/pmcs-msp71xx/msp_eth.c deleted file mode 100644 index 15679b427f44..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_eth.c +++ /dev/null @@ -1,111 +0,0 @@ -/* - * The setup file for ethernet related hardware on PMC-Sierra MSP processors. - * - * Copyright 2010 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/ioport.h> -#include <linux/platform_device.h> -#include <linux/delay.h> -#include <msp_regs.h> -#include <msp_int.h> -#include <msp_gpio_macros.h> - - -#define MSP_ETHERNET_GPIO0 14 -#define MSP_ETHERNET_GPIO1 15 -#define MSP_ETHERNET_GPIO2 16 - -#define MSP_ETH_ID "pmc_mspeth" -#define MSP_ETH_SIZE 0xE0 -static struct resource msp_eth0_resources[] = { - [0] = { - .start = MSP_MAC0_BASE, - .end = MSP_MAC0_BASE + MSP_ETH_SIZE - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = MSP_INT_MAC0, - .end = MSP_INT_MAC0, - .flags = IORESOURCE_IRQ, - }, -}; - -static struct resource msp_eth1_resources[] = { - [0] = { - .start = MSP_MAC1_BASE, - .end = MSP_MAC1_BASE + MSP_ETH_SIZE - 1, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = MSP_INT_MAC1, - .end = MSP_INT_MAC1, - .flags = IORESOURCE_IRQ, - }, -}; - - - -static struct platform_device mspeth_device[] = { - [0] = { - .name = MSP_ETH_ID, - .id = 0, - .num_resources = ARRAY_SIZE(msp_eth0_resources), - .resource = msp_eth0_resources, - }, - [1] = { - .name = MSP_ETH_ID, - .id = 1, - .num_resources = ARRAY_SIZE(msp_eth1_resources), - .resource = msp_eth1_resources, - }, - -}; -#define msp_eth_devs mspeth_device - -int __init msp_eth_setup(void) -{ - int i, ret = 0; - - /* Configure the GPIO and take the ethernet PHY out of reset */ - msp_gpio_pin_mode(MSP_GPIO_OUTPUT, MSP_ETHERNET_GPIO0); - msp_gpio_pin_hi(MSP_ETHERNET_GPIO0); - - for (i = 0; i < ARRAY_SIZE(msp_eth_devs); i++) { - ret = platform_device_register(&msp_eth_devs[i]); - printk(KERN_INFO "device: %d, return value = %d\n", i, ret); - if (ret) { - platform_device_unregister(&msp_eth_devs[i]); - break; - } - } - - if (ret) - printk(KERN_WARNING "Could not initialize " - "MSPETH device structures.\n"); - - return ret; -} -subsys_initcall(msp_eth_setup); diff --git a/arch/mips/pmcs-msp71xx/msp_hwbutton.c b/arch/mips/pmcs-msp71xx/msp_hwbutton.c deleted file mode 100644 index bb57ed9ea2bd..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_hwbutton.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - * Sets up interrupt handlers for various hardware switches which are - * connected to interrupt lines. - * - * Copyright 2005-2207 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/interrupt.h> - -#include <msp_int.h> -#include <msp_regs.h> -#include <msp_regops.h> - -/* For hwbutton_interrupt->initial_state */ -#define HWBUTTON_HI 0x1 -#define HWBUTTON_LO 0x2 - -/* - * This struct describes a hardware button - */ -struct hwbutton_interrupt { - char *name; /* Name of button */ - int irq; /* Actual LINUX IRQ */ - int eirq; /* Extended IRQ number (0-7) */ - int initial_state; /* The "normal" state of the switch */ - void (*handle_hi)(void *); /* Handler: switch input has gone HI */ - void (*handle_lo)(void *); /* Handler: switch input has gone LO */ - void *data; /* Optional data to pass to handler */ -}; - -#ifdef CONFIG_PMC_MSP7120_GW -extern void msp_restart(char *); - -static void softreset_push(void *data) -{ - printk(KERN_WARNING "SOFTRESET switch was pushed\n"); - - /* - * In the future you could move this to the release handler, - * timing the difference between the 'push' and 'release', and only - * doing this ungraceful restart if the button has been down for - * a certain amount of time; otherwise doing a graceful restart. - */ - - msp_restart(NULL); -} - -static void softreset_release(void *data) -{ - printk(KERN_WARNING "SOFTRESET switch was released\n"); - - /* Do nothing */ -} - -static void standby_on(void *data) -{ - printk(KERN_WARNING "STANDBY switch was set to ON (not implemented)\n"); - - /* TODO: Put board in standby mode */ -} - -static void standby_off(void *data) -{ - printk(KERN_WARNING - "STANDBY switch was set to OFF (not implemented)\n"); - - /* TODO: Take out of standby mode */ -} - -static struct hwbutton_interrupt softreset_sw = { - .name = "Softreset button", - .irq = MSP_INT_EXT0, - .eirq = 0, - .initial_state = HWBUTTON_HI, - .handle_hi = softreset_release, - .handle_lo = softreset_push, - .data = NULL, -}; - -static struct hwbutton_interrupt standby_sw = { - .name = "Standby switch", - .irq = MSP_INT_EXT1, - .eirq = 1, - .initial_state = HWBUTTON_HI, - .handle_hi = standby_off, - .handle_lo = standby_on, - .data = NULL, -}; -#endif /* CONFIG_PMC_MSP7120_GW */ - -static irqreturn_t hwbutton_handler(int irq, void *data) -{ - struct hwbutton_interrupt *hirq = data; - unsigned long cic_ext = *CIC_EXT_CFG_REG; - - if (CIC_EXT_IS_ACTIVE_HI(cic_ext, hirq->eirq)) { - /* Interrupt: pin is now HI */ - CIC_EXT_SET_ACTIVE_LO(cic_ext, hirq->eirq); - hirq->handle_hi(hirq->data); - } else { - /* Interrupt: pin is now LO */ - CIC_EXT_SET_ACTIVE_HI(cic_ext, hirq->eirq); - hirq->handle_lo(hirq->data); - } - - /* - * Invert the POLARITY of this level interrupt to ack the interrupt - * Thus next state change will invoke the opposite message - */ - *CIC_EXT_CFG_REG = cic_ext; - - return IRQ_HANDLED; -} - -static int msp_hwbutton_register(struct hwbutton_interrupt *hirq) -{ - unsigned long cic_ext; - - if (hirq->handle_hi == NULL || hirq->handle_lo == NULL) - return -EINVAL; - - cic_ext = *CIC_EXT_CFG_REG; - CIC_EXT_SET_TRIGGER_LEVEL(cic_ext, hirq->eirq); - if (hirq->initial_state == HWBUTTON_HI) - CIC_EXT_SET_ACTIVE_LO(cic_ext, hirq->eirq); - else - CIC_EXT_SET_ACTIVE_HI(cic_ext, hirq->eirq); - *CIC_EXT_CFG_REG = cic_ext; - - return request_irq(hirq->irq, hwbutton_handler, 0, - hirq->name, hirq); -} - -static int __init msp_hwbutton_setup(void) -{ -#ifdef CONFIG_PMC_MSP7120_GW - msp_hwbutton_register(&softreset_sw); - msp_hwbutton_register(&standby_sw); -#endif - return 0; -} - -subsys_initcall(msp_hwbutton_setup); diff --git a/arch/mips/pmcs-msp71xx/msp_irq.c b/arch/mips/pmcs-msp71xx/msp_irq.c deleted file mode 100644 index d525cc931d89..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_irq.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - * IRQ vector handles - * - * Copyright (C) 1995, 1996, 1997, 2003 by Ralf Baechle - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/irq.h> -#include <linux/interrupt.h> -#include <linux/ptrace.h> -#include <linux/time.h> - -#include <asm/irq_cpu.h> -#include <asm/setup.h> - -#include <msp_int.h> - -/* SLP bases systems */ -extern void msp_slp_irq_init(void); -extern void msp_slp_irq_dispatch(void); - -/* CIC based systems */ -extern void msp_cic_irq_init(void); -extern void msp_cic_irq_dispatch(void); - -/* VSMP support init */ -extern void msp_vsmp_int_init(void); - -/* vectored interrupt implementation */ - -/* SW0/1 interrupts are used for SMP */ -static inline void mac0_int_dispatch(void) { do_IRQ(MSP_INT_MAC0); } -static inline void mac1_int_dispatch(void) { do_IRQ(MSP_INT_MAC1); } -static inline void mac2_int_dispatch(void) { do_IRQ(MSP_INT_SAR); } -static inline void usb_int_dispatch(void) { do_IRQ(MSP_INT_USB); } -static inline void sec_int_dispatch(void) { do_IRQ(MSP_INT_SEC); } - -/* - * The PMC-Sierra MSP interrupts are arranged in a 3 level cascaded - * hierarchical system. The first level are the direct MIPS interrupts - * and are assigned the interrupt range 0-7. The second level is the SLM - * interrupt controller and is assigned the range 8-39. The third level - * comprises the Peripherial block, the PCI block, the PCI MSI block and - * the SLP. The PCI interrupts and the SLP errors are handled by the - * relevant subsystems so the core interrupt code needs only concern - * itself with the Peripheral block. These are assigned interrupts in - * the range 40-71. - */ - -asmlinkage void plat_irq_dispatch(void) -{ - u32 pending; - - pending = read_c0_status() & read_c0_cause(); - - /* - * jump to the correct interrupt routine - * These are arranged in priority order and the timer - * comes first! - */ - -#ifdef CONFIG_IRQ_MSP_CIC /* break out the CIC stuff for now */ - if (pending & C_IRQ4) /* do the peripherals first, that's the timer */ - msp_cic_irq_dispatch(); - - else if (pending & C_IRQ0) - do_IRQ(MSP_INT_MAC0); - - else if (pending & C_IRQ1) - do_IRQ(MSP_INT_MAC1); - - else if (pending & C_IRQ2) - do_IRQ(MSP_INT_USB); - - else if (pending & C_IRQ3) - do_IRQ(MSP_INT_SAR); - - else if (pending & C_IRQ5) - do_IRQ(MSP_INT_SEC); - -#else - if (pending & C_IRQ5) - do_IRQ(MSP_INT_TIMER); - - else if (pending & C_IRQ0) - do_IRQ(MSP_INT_MAC0); - - else if (pending & C_IRQ1) - do_IRQ(MSP_INT_MAC1); - - else if (pending & C_IRQ3) - do_IRQ(MSP_INT_VE); - - else if (pending & C_IRQ4) - msp_slp_irq_dispatch(); -#endif - - else if (pending & C_SW0) /* do software after hardware */ - do_IRQ(MSP_INT_SW0); - - else if (pending & C_SW1) - do_IRQ(MSP_INT_SW1); -} - -void __init arch_init_irq(void) -{ - /* assume we'll be using vectored interrupt mode except in UP mode*/ -#ifdef CONFIG_MIPS_MT - BUG_ON(!cpu_has_vint); -#endif - /* initialize the 1st-level CPU based interrupt controller */ - mips_cpu_irq_init(); - -#ifdef CONFIG_IRQ_MSP_CIC - msp_cic_irq_init(); -#ifdef CONFIG_MIPS_MT - set_vi_handler(MSP_INT_CIC, msp_cic_irq_dispatch); - set_vi_handler(MSP_INT_MAC0, mac0_int_dispatch); - set_vi_handler(MSP_INT_MAC1, mac1_int_dispatch); - set_vi_handler(MSP_INT_SAR, mac2_int_dispatch); - set_vi_handler(MSP_INT_USB, usb_int_dispatch); - set_vi_handler(MSP_INT_SEC, sec_int_dispatch); -#ifdef CONFIG_MIPS_MT_SMP - msp_vsmp_int_init(); -#endif /* CONFIG_MIPS_MT_SMP */ -#endif /* CONFIG_MIPS_MT */ - /* setup the cascaded interrupts */ - if (request_irq(MSP_INT_CIC, no_action, IRQF_NO_THREAD, - "MSP CIC cascade", NULL)) - pr_err("Failed to register MSP CIC cascade interrupt\n"); - if (request_irq(MSP_INT_PER, no_action, IRQF_NO_THREAD, - "MSP PER cascade", NULL)) - pr_err("Failed to register MSP PER cascade interrupt\n"); - -#else - /* - * Setup the 2nd-level SLP register based interrupt controller. - * VSMP support support is not enabled for SLP. - */ - msp_slp_irq_init(); - - /* setup the cascaded SLP/PER interrupts */ - if (request_irq(MSP_INT_SLP, no_action, IRQF_NO_THREAD, - "MSP CIC cascade", NULL)) - pr_err("Failed to register MSP CIC cascade interrupt\n"); - if (request_irq(MSP_INT_PER, no_action, IRQF_NO_THREAD, - "MSP PER cascade", NULL)) - pr_err("Failed to register MSP PER cascade interrupt\n"); -#endif -} diff --git a/arch/mips/pmcs-msp71xx/msp_irq_cic.c b/arch/mips/pmcs-msp71xx/msp_irq_cic.c deleted file mode 100644 index 0706010cc99f..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_irq_cic.c +++ /dev/null @@ -1,208 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2010 PMC-Sierra, Inc, derived from irq_cpu.c - * - * This file define the irq handler for MSP CIC subsystem interrupts. - */ - -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/kernel.h> -#include <linux/bitops.h> -#include <linux/irq.h> - -#include <asm/mipsregs.h> - -#include <msp_cic_int.h> -#include <msp_regs.h> - -/* - * External API - */ -extern void msp_per_irq_init(void); -extern void msp_per_irq_dispatch(void); - - -/* - * Convenience Macro. Should be somewhere generic. - */ -#define get_current_vpe() \ - ((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE) - -#ifdef CONFIG_SMP - -#define LOCK_VPE(flags, mtflags) \ -do { \ - local_irq_save(flags); \ - mtflags = dmt(); \ -} while (0) - -#define UNLOCK_VPE(flags, mtflags) \ -do { \ - emt(mtflags); \ - local_irq_restore(flags);\ -} while (0) - -#define LOCK_CORE(flags, mtflags) \ -do { \ - local_irq_save(flags); \ - mtflags = dvpe(); \ -} while (0) - -#define UNLOCK_CORE(flags, mtflags) \ -do { \ - evpe(mtflags); \ - local_irq_restore(flags);\ -} while (0) - -#else - -#define LOCK_VPE(flags, mtflags) -#define UNLOCK_VPE(flags, mtflags) -#endif - -/* ensure writes to cic are completed */ -static inline void cic_wmb(void) -{ - const volatile void __iomem *cic_mem = CIC_VPE0_MSK_REG; - volatile u32 dummy_read; - - wmb(); - dummy_read = __raw_readl(cic_mem); - dummy_read++; -} - -static void unmask_cic_irq(struct irq_data *d) -{ - volatile u32 *cic_msk_reg = CIC_VPE0_MSK_REG; - int vpe; -#ifdef CONFIG_SMP - unsigned int mtflags; - unsigned long flags; - - /* - * Make sure we have IRQ affinity. It may have changed while - * we were processing the IRQ. - */ - if (!cpumask_test_cpu(smp_processor_id(), - irq_data_get_affinity_mask(d))) - return; -#endif - - vpe = get_current_vpe(); - LOCK_VPE(flags, mtflags); - cic_msk_reg[vpe] |= (1 << (d->irq - MSP_CIC_INTBASE)); - UNLOCK_VPE(flags, mtflags); - cic_wmb(); -} - -static void mask_cic_irq(struct irq_data *d) -{ - volatile u32 *cic_msk_reg = CIC_VPE0_MSK_REG; - int vpe = get_current_vpe(); -#ifdef CONFIG_SMP - unsigned long flags, mtflags; -#endif - LOCK_VPE(flags, mtflags); - cic_msk_reg[vpe] &= ~(1 << (d->irq - MSP_CIC_INTBASE)); - UNLOCK_VPE(flags, mtflags); - cic_wmb(); -} -static void msp_cic_irq_ack(struct irq_data *d) -{ - mask_cic_irq(d); - /* - * Only really necessary for 18, 16-14 and sometimes 3:0 - * (since these can be edge sensitive) but it doesn't - * hurt for the others - */ - *CIC_STS_REG = (1 << (d->irq - MSP_CIC_INTBASE)); -} - -/* Note: Limiting to VSMP. */ - -#ifdef CONFIG_MIPS_MT_SMP -static int msp_cic_irq_set_affinity(struct irq_data *d, - const struct cpumask *cpumask, bool force) -{ - int cpu; - unsigned long flags; - unsigned int mtflags; - unsigned long imask = (1 << (d->irq - MSP_CIC_INTBASE)); - volatile u32 *cic_mask = (volatile u32 *)CIC_VPE0_MSK_REG; - - /* timer balancing should be disabled in kernel code */ - BUG_ON(d->irq == MSP_INT_VPE0_TIMER || d->irq == MSP_INT_VPE1_TIMER); - - LOCK_CORE(flags, mtflags); - /* enable if any of each VPE's TCs require this IRQ */ - for_each_online_cpu(cpu) { - if (cpumask_test_cpu(cpu, cpumask)) - cic_mask[cpu] |= imask; - else - cic_mask[cpu] &= ~imask; - - } - - UNLOCK_CORE(flags, mtflags); - return 0; - -} -#endif - -static struct irq_chip msp_cic_irq_controller = { - .name = "MSP_CIC", - .irq_mask = mask_cic_irq, - .irq_mask_ack = msp_cic_irq_ack, - .irq_unmask = unmask_cic_irq, - .irq_ack = msp_cic_irq_ack, -#ifdef CONFIG_MIPS_MT_SMP - .irq_set_affinity = msp_cic_irq_set_affinity, -#endif -}; - -void __init msp_cic_irq_init(void) -{ - int i; - /* Mask/clear interrupts. */ - *CIC_VPE0_MSK_REG = 0x00000000; - *CIC_VPE1_MSK_REG = 0x00000000; - *CIC_STS_REG = 0xFFFFFFFF; - /* - * The MSP7120 RG and EVBD boards use IRQ[6:4] for PCI. - * These inputs map to EXT_INT_POL[6:4] inside the CIC. - * They are to be active low, level sensitive. - */ - *CIC_EXT_CFG_REG &= 0xFFFF8F8F; - - /* initialize all the IRQ descriptors */ - for (i = MSP_CIC_INTBASE ; i < MSP_CIC_INTBASE + 32 ; i++) { - irq_set_chip_and_handler(i, &msp_cic_irq_controller, - handle_level_irq); - } - - /* Initialize the PER interrupt sub-system */ - msp_per_irq_init(); -} - -/* CIC masked by CIC vector processing before dispatch called */ -void msp_cic_irq_dispatch(void) -{ - volatile u32 *cic_msk_reg = (volatile u32 *)CIC_VPE0_MSK_REG; - u32 cic_mask; - u32 pending; - int cic_status = *CIC_STS_REG; - cic_mask = cic_msk_reg[get_current_vpe()]; - pending = cic_status & cic_mask; - if (pending & (1 << (MSP_INT_VPE0_TIMER - MSP_CIC_INTBASE))) { - do_IRQ(MSP_INT_VPE0_TIMER); - } else if (pending & (1 << (MSP_INT_VPE1_TIMER - MSP_CIC_INTBASE))) { - do_IRQ(MSP_INT_VPE1_TIMER); - } else if (pending & (1 << (MSP_INT_PER - MSP_CIC_INTBASE))) { - msp_per_irq_dispatch(); - } else if (pending) { - do_IRQ(ffs(pending) + MSP_CIC_INTBASE - 1); - } else{ - spurious_interrupt(); - } -} diff --git a/arch/mips/pmcs-msp71xx/msp_irq_per.c b/arch/mips/pmcs-msp71xx/msp_irq_per.c deleted file mode 100644 index b284412b2923..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_irq_per.c +++ /dev/null @@ -1,127 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright 2010 PMC-Sierra, Inc, derived from irq_cpu.c - * - * This file define the irq handler for MSP PER subsystem interrupts. - */ - -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/kernel.h> -#include <linux/spinlock.h> -#include <linux/bitops.h> - -#include <asm/mipsregs.h> - -#include <msp_cic_int.h> -#include <msp_regs.h> - - -/* - * Convenience Macro. Should be somewhere generic. - */ -#define get_current_vpe() \ - ((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE) - -#ifdef CONFIG_SMP -/* - * The PER registers must be protected from concurrent access. - */ - -static DEFINE_SPINLOCK(per_lock); -#endif - -/* ensure writes to per are completed */ - -static inline void per_wmb(void) -{ - const volatile void __iomem *per_mem = PER_INT_MSK_REG; - volatile u32 dummy_read; - - wmb(); - dummy_read = __raw_readl(per_mem); - dummy_read++; -} - -static inline void unmask_per_irq(struct irq_data *d) -{ -#ifdef CONFIG_SMP - unsigned long flags; - spin_lock_irqsave(&per_lock, flags); - *PER_INT_MSK_REG |= (1 << (d->irq - MSP_PER_INTBASE)); - spin_unlock_irqrestore(&per_lock, flags); -#else - *PER_INT_MSK_REG |= (1 << (d->irq - MSP_PER_INTBASE)); -#endif - per_wmb(); -} - -static inline void mask_per_irq(struct irq_data *d) -{ -#ifdef CONFIG_SMP - unsigned long flags; - spin_lock_irqsave(&per_lock, flags); - *PER_INT_MSK_REG &= ~(1 << (d->irq - MSP_PER_INTBASE)); - spin_unlock_irqrestore(&per_lock, flags); -#else - *PER_INT_MSK_REG &= ~(1 << (d->irq - MSP_PER_INTBASE)); -#endif - per_wmb(); -} - -static inline void msp_per_irq_ack(struct irq_data *d) -{ - mask_per_irq(d); - /* - * In the PER interrupt controller, only bits 11 and 10 - * are write-to-clear, (SPI TX complete, SPI RX complete). - * It does nothing for any others. - */ - *PER_INT_STS_REG = (1 << (d->irq - MSP_PER_INTBASE)); -} - -#ifdef CONFIG_SMP -static int msp_per_irq_set_affinity(struct irq_data *d, - const struct cpumask *affinity, bool force) -{ - /* WTF is this doing ????? */ - unmask_per_irq(d); - return 0; -} -#endif - -static struct irq_chip msp_per_irq_controller = { - .name = "MSP_PER", - .irq_enable = unmask_per_irq, - .irq_disable = mask_per_irq, - .irq_ack = msp_per_irq_ack, -#ifdef CONFIG_SMP - .irq_set_affinity = msp_per_irq_set_affinity, -#endif -}; - -void __init msp_per_irq_init(void) -{ - int i; - /* Mask/clear interrupts. */ - *PER_INT_MSK_REG = 0x00000000; - *PER_INT_STS_REG = 0xFFFFFFFF; - /* initialize all the IRQ descriptors */ - for (i = MSP_PER_INTBASE; i < MSP_PER_INTBASE + 32; i++) { - irq_set_chip(i, &msp_per_irq_controller); - } -} - -void msp_per_irq_dispatch(void) -{ - u32 per_mask = *PER_INT_MSK_REG; - u32 per_status = *PER_INT_STS_REG; - u32 pending; - - pending = per_status & per_mask; - if (pending) { - do_IRQ(ffs(pending) + MSP_PER_INTBASE - 1); - } else { - spurious_interrupt(); - } -} diff --git a/arch/mips/pmcs-msp71xx/msp_irq_slp.c b/arch/mips/pmcs-msp71xx/msp_irq_slp.c deleted file mode 100644 index 097a5fd3b06b..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_irq_slp.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * This file define the irq handler for MSP SLM subsystem interrupts. - * - * Copyright 2005-2006 PMC-Sierra, Inc, derived from irq_cpu.c - * Author: Andrew Hughes, Andrew_Hughes@pmc-sierra.com - */ - -#include <linux/init.h> -#include <linux/interrupt.h> -#include <linux/kernel.h> -#include <linux/bitops.h> - -#include <asm/mipsregs.h> - -#include <msp_slp_int.h> -#include <msp_regs.h> - -static inline void unmask_msp_slp_irq(struct irq_data *d) -{ - unsigned int irq = d->irq; - - /* check for PER interrupt range */ - if (irq < MSP_PER_INTBASE) - *SLP_INT_MSK_REG |= (1 << (irq - MSP_SLP_INTBASE)); - else - *PER_INT_MSK_REG |= (1 << (irq - MSP_PER_INTBASE)); -} - -static inline void mask_msp_slp_irq(struct irq_data *d) -{ - unsigned int irq = d->irq; - - /* check for PER interrupt range */ - if (irq < MSP_PER_INTBASE) - *SLP_INT_MSK_REG &= ~(1 << (irq - MSP_SLP_INTBASE)); - else - *PER_INT_MSK_REG &= ~(1 << (irq - MSP_PER_INTBASE)); -} - -/* - * While we ack the interrupt interrupts are disabled and thus we don't need - * to deal with concurrency issues. Same for msp_slp_irq_end. - */ -static inline void ack_msp_slp_irq(struct irq_data *d) -{ - unsigned int irq = d->irq; - - /* check for PER interrupt range */ - if (irq < MSP_PER_INTBASE) - *SLP_INT_STS_REG = (1 << (irq - MSP_SLP_INTBASE)); - else - *PER_INT_STS_REG = (1 << (irq - MSP_PER_INTBASE)); -} - -static struct irq_chip msp_slp_irq_controller = { - .name = "MSP_SLP", - .irq_ack = ack_msp_slp_irq, - .irq_mask = mask_msp_slp_irq, - .irq_unmask = unmask_msp_slp_irq, -}; - -void __init msp_slp_irq_init(void) -{ - int i; - - /* Mask/clear interrupts. */ - *SLP_INT_MSK_REG = 0x00000000; - *PER_INT_MSK_REG = 0x00000000; - *SLP_INT_STS_REG = 0xFFFFFFFF; - *PER_INT_STS_REG = 0xFFFFFFFF; - - /* initialize all the IRQ descriptors */ - for (i = MSP_SLP_INTBASE; i < MSP_PER_INTBASE + 32; i++) - irq_set_chip_and_handler(i, &msp_slp_irq_controller, - handle_level_irq); -} - -void msp_slp_irq_dispatch(void) -{ - u32 pending; - int intbase; - - intbase = MSP_SLP_INTBASE; - pending = *SLP_INT_STS_REG & *SLP_INT_MSK_REG; - - /* check for PER interrupt */ - if (pending == (1 << (MSP_INT_PER - MSP_SLP_INTBASE))) { - intbase = MSP_PER_INTBASE; - pending = *PER_INT_STS_REG & *PER_INT_MSK_REG; - } - - /* check for spurious interrupt */ - if (pending == 0x00000000) { - printk(KERN_ERR "Spurious %s interrupt?\n", - (intbase == MSP_SLP_INTBASE) ? "SLP" : "PER"); - return; - } - - /* dispatch the irq */ - do_IRQ(ffs(pending) + intbase - 1); -} diff --git a/arch/mips/pmcs-msp71xx/msp_pci.c b/arch/mips/pmcs-msp71xx/msp_pci.c deleted file mode 100644 index 428dea23c35c..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_pci.c +++ /dev/null @@ -1,50 +0,0 @@ -/* - * The setup file for PCI related hardware on PMC-Sierra MSP processors. - * - * Copyright 2005-2006 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/init.h> - -#include <msp_prom.h> -#include <msp_regs.h> - -extern void msp_pci_init(void); - -static int __init msp_pci_setup(void) -{ -#if 0 /* Linux 2.6 initialization code to be completed */ - if (getdeviceid() & DEV_ID_SINGLE_PC) { - /* If single card mode */ - slmRegs *sreg = (slmRegs *) SREG_BASE; - - sreg->single_pc_enable = SINGLE_PCCARD; - } -#endif - - msp_pci_init(); - - return 0; -} - -subsys_initcall(msp_pci_setup); diff --git a/arch/mips/pmcs-msp71xx/msp_prom.c b/arch/mips/pmcs-msp71xx/msp_prom.c deleted file mode 100644 index 800a21b8b8b0..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_prom.c +++ /dev/null @@ -1,513 +0,0 @@ -/* - * BRIEF MODULE DESCRIPTION - * PROM library initialisation code, assuming a version of - * pmon is the boot code. - * - * Copyright 2000,2001 MontaVista Software Inc. - * Author: MontaVista Software, Inc. - * ppopov@mvista.com or source@mvista.com - * - * This file was derived from Carsten Langgaard's - * arch/mips/mips-boards/xx files. - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/export.h> -#include <linux/kernel.h> -#include <linux/init.h> -#include <linux/string.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/slab.h> - -#include <asm/addrspace.h> -#include <asm/bootinfo.h> -#include <asm-generic/sections.h> -#include <asm/page.h> - -#include <msp_prom.h> -#include <msp_regs.h> - -/* global PROM environment variables and pointers */ -int prom_argc; -char **prom_argv, **prom_envp; -int *prom_vec; - -/* debug flag */ -int init_debug = 1; - -/* memory blocks */ -struct prom_pmemblock mdesc[PROM_MAX_PMEMBLOCKS]; - -#define MAX_PROM_MEM 5 -static phys_addr_t prom_mem_base[MAX_PROM_MEM] __initdata; -static phys_addr_t prom_mem_size[MAX_PROM_MEM] __initdata; -static unsigned int nr_prom_mem __initdata; - -/* default feature sets */ -static char msp_default_features[] = -#if defined(CONFIG_PMC_MSP4200_EVAL) \ - || defined(CONFIG_PMC_MSP4200_GW) - "ERER"; -#elif defined(CONFIG_PMC_MSP7120_EVAL) \ - || defined(CONFIG_PMC_MSP7120_GW) - "EMEMSP"; -#elif defined(CONFIG_PMC_MSP7120_FPGA) - "EMEM"; -#endif - -/* conversion functions */ -static inline unsigned char str2hexnum(unsigned char c) -{ - if (c >= '0' && c <= '9') - return c - '0'; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - return 0; /* foo */ -} - -int str2eaddr(unsigned char *ea, unsigned char *str) -{ - int index = 0; - unsigned char num = 0; - - while (*str != '\0') { - if ((*str == '.') || (*str == ':')) { - ea[index++] = num; - num = 0; - str++; - } else { - num = num << 4; - num |= str2hexnum(*str++); - } - } - - if (index == 5) { - ea[index++] = num; - return 0; - } else - return -1; -} -EXPORT_SYMBOL(str2eaddr); - -static inline unsigned long str2hex(unsigned char *str) -{ - int value = 0; - - while (*str) { - value = value << 4; - value |= str2hexnum(*str++); - } - - return value; -} - -/* function to query the system information */ -const char *get_system_type(void) -{ -#if defined(CONFIG_PMC_MSP4200_EVAL) - return "PMC-Sierra MSP4200 Eval Board"; -#elif defined(CONFIG_PMC_MSP4200_GW) - return "PMC-Sierra MSP4200 VoIP Gateway"; -#elif defined(CONFIG_PMC_MSP7120_EVAL) - return "PMC-Sierra MSP7120 Eval Board"; -#elif defined(CONFIG_PMC_MSP7120_GW) - return "PMC-Sierra MSP7120 Residential Gateway"; -#elif defined(CONFIG_PMC_MSP7120_FPGA) - return "PMC-Sierra MSP7120 FPGA"; -#else - #error "What is the type of *your* MSP?" -#endif -} - -int get_ethernet_addr(char *ethaddr_name, char *ethernet_addr) -{ - char *ethaddr_str; - - ethaddr_str = prom_getenv(ethaddr_name); - if (!ethaddr_str) { - printk(KERN_WARNING "%s not set in boot prom\n", ethaddr_name); - return -1; - } - - if (str2eaddr(ethernet_addr, ethaddr_str) == -1) { - printk(KERN_WARNING "%s badly formatted-<%s>\n", - ethaddr_name, ethaddr_str); - return -1; - } - - if (init_debug > 1) { - int i; - printk(KERN_DEBUG "get_ethernet_addr: for %s ", ethaddr_name); - for (i = 0; i < 5; i++) - printk(KERN_DEBUG "%02x:", - (unsigned char)*(ethernet_addr+i)); - printk(KERN_DEBUG "%02x\n", *(ethernet_addr+i)); - } - - return 0; -} -EXPORT_SYMBOL(get_ethernet_addr); - -static char *get_features(void) -{ - char *feature = prom_getenv(FEATURES); - - if (feature == NULL) { - /* default features based on MACHINE_TYPE */ - feature = msp_default_features; - } - - return feature; -} - -static char test_feature(char c) -{ - char *feature = get_features(); - - while (*feature) { - if (*feature++ == c) - return *feature; - feature++; - } - - return FEATURE_NOEXIST; -} - -unsigned long get_deviceid(void) -{ - char *deviceid = prom_getenv(DEVICEID); - - if (deviceid == NULL) - return *DEV_ID_REG; - else - return str2hex(deviceid); -} - -char identify_pci(void) -{ - return test_feature(PCI_KEY); -} -EXPORT_SYMBOL(identify_pci); - -char identify_pcimux(void) -{ - return test_feature(PCIMUX_KEY); -} - -char identify_sec(void) -{ - return test_feature(SEC_KEY); -} -EXPORT_SYMBOL(identify_sec); - -char identify_spad(void) -{ - return test_feature(SPAD_KEY); -} -EXPORT_SYMBOL(identify_spad); - -char identify_tdm(void) -{ - return test_feature(TDM_KEY); -} -EXPORT_SYMBOL(identify_tdm); - -char identify_zsp(void) -{ - return test_feature(ZSP_KEY); -} -EXPORT_SYMBOL(identify_zsp); - -static char identify_enetfeature(char key, unsigned long interface_num) -{ - char *feature = get_features(); - - while (*feature) { - if (*feature++ == key && interface_num-- == 0) - return *feature; - feature++; - } - - return FEATURE_NOEXIST; -} - -char identify_enet(unsigned long interface_num) -{ - return identify_enetfeature(ENET_KEY, interface_num); -} -EXPORT_SYMBOL(identify_enet); - -char identify_enetTxD(unsigned long interface_num) -{ - return identify_enetfeature(ENETTXD_KEY, interface_num); -} -EXPORT_SYMBOL(identify_enetTxD); - -unsigned long identify_family(void) -{ - unsigned long deviceid; - - deviceid = get_deviceid(); - - return deviceid & CPU_DEVID_FAMILY; -} -EXPORT_SYMBOL(identify_family); - -unsigned long identify_revision(void) -{ - unsigned long deviceid; - - deviceid = get_deviceid(); - - return deviceid & CPU_DEVID_REVISION; -} -EXPORT_SYMBOL(identify_revision); - -/* PROM environment functions */ -char *prom_getenv(char *env_name) -{ - /* - * Return a pointer to the given environment variable. prom_envp - * points to a null terminated array of pointers to variables. - * Environment variables are stored in the form of "memsize=64" - */ - - char **var = prom_envp; - int i = strlen(env_name); - - while (*var) { - if (strncmp(env_name, *var, i) == 0) { - return *var + strlen(env_name) + 1; - } - var++; - } - - return NULL; -} - -/* PROM commandline functions */ -void __init prom_init_cmdline(void) -{ - char *cp; - int actr; - - actr = 1; /* Always ignore argv[0] */ - - cp = &(arcs_cmdline[0]); - while (actr < prom_argc) { - strcpy(cp, prom_argv[actr]); - cp += strlen(prom_argv[actr]); - *cp++ = ' '; - actr++; - } - if (cp != &(arcs_cmdline[0])) /* get rid of trailing space */ - --cp; - *cp = '\0'; -} - -/* memory allocation functions */ -static int __init prom_memtype_classify(unsigned int type) -{ - switch (type) { - case yamon_free: - return BOOT_MEM_RAM; - case yamon_prom: - return BOOT_MEM_ROM_DATA; - default: - return BOOT_MEM_RESERVED; - } -} - -void __init prom_meminit(void) -{ - struct prom_pmemblock *p; - - p = prom_getmdesc(); - - while (p->size) { - long type; - unsigned long base, size; - - type = prom_memtype_classify(p->type); - base = p->base; - size = p->size; - - add_memory_region(base, size, type); - p++; - - if (type == BOOT_MEM_ROM_DATA) { - if (nr_prom_mem >= MAX_PROM_MEM) { - pr_err("Too many ROM DATA regions"); - continue; - } - prom_mem_base[nr_prom_mem] = base; - prom_mem_size[nr_prom_mem] = size; - nr_prom_mem++; - } - } -} - -void __init prom_free_prom_memory(void) -{ - int argc; - char **argv; - char **envp; - char *ptr; - int len = 0; - int i; - - /* - * preserve environment variables and command line from pmon/bbload - * first preserve the command line - */ - for (argc = 0; argc < prom_argc; argc++) { - len += sizeof(char *); /* length of pointer */ - len += strlen(prom_argv[argc]) + 1; /* length of string */ - } - len += sizeof(char *); /* plus length of null pointer */ - - argv = kmalloc(len, GFP_KERNEL); - ptr = (char *) &argv[prom_argc + 1]; /* strings follow array */ - - for (argc = 0; argc < prom_argc; argc++) { - argv[argc] = ptr; - strcpy(ptr, prom_argv[argc]); - ptr += strlen(prom_argv[argc]) + 1; - } - argv[prom_argc] = NULL; /* end array with null pointer */ - prom_argv = argv; - - /* next preserve the environment variables */ - len = 0; - i = 0; - for (envp = prom_envp; *envp != NULL; envp++) { - i++; /* count number of environment variables */ - len += sizeof(char *); /* length of pointer */ - len += strlen(*envp) + 1; /* length of string */ - } - len += sizeof(char *); /* plus length of null pointer */ - - envp = kmalloc(len, GFP_KERNEL); - ptr = (char *) &envp[i+1]; - - for (argc = 0; argc < i; argc++) { - envp[argc] = ptr; - strcpy(ptr, prom_envp[argc]); - ptr += strlen(prom_envp[argc]) + 1; - } - envp[i] = NULL; /* end array with null pointer */ - prom_envp = envp; - - for (i = 0; i < nr_prom_mem; i++) { - free_init_pages("prom memory", - prom_mem_base[i], prom_mem_base[i] + prom_mem_size[i]); - } -} - -struct prom_pmemblock *__init prom_getmdesc(void) -{ - static char memsz_env[] __initdata = "memsize"; - static char heaptop_env[] __initdata = "heaptop"; - char *str; - unsigned int memsize; - unsigned int heaptop; - int i; - - str = prom_getenv(memsz_env); - if (!str) { - ppfinit("memsize not set in boot prom, " - "set to default (32Mb)\n"); - memsize = 0x02000000; - } else { - memsize = simple_strtol(str, NULL, 0); - - if (memsize == 0) { - /* if memsize is a bad size, use reasonable default */ - memsize = 0x02000000; - } - - /* convert to physical address (removing caching bits, etc) */ - memsize = CPHYSADDR(memsize); - } - - str = prom_getenv(heaptop_env); - if (!str) { - heaptop = CPHYSADDR((u32)&_text); - ppfinit("heaptop not set in boot prom, " - "set to default 0x%08x\n", heaptop); - } else { - heaptop = simple_strtol(str, NULL, 16); - if (heaptop == 0) { - /* heaptop conversion bad, might have 0xValue */ - heaptop = simple_strtol(str, NULL, 0); - - if (heaptop == 0) { - /* heaptop still bad, use reasonable default */ - heaptop = CPHYSADDR((u32)&_text); - } - } - - /* convert to physical address (removing caching bits, etc) */ - heaptop = CPHYSADDR((u32)heaptop); - } - - /* the base region */ - i = 0; - mdesc[i].type = BOOT_MEM_RESERVED; - mdesc[i].base = 0x00000000; - mdesc[i].size = PAGE_ALIGN(0x300 + 0x80); - /* jtag interrupt vector + sizeof vector */ - - /* PMON data */ - if (heaptop > mdesc[i].base + mdesc[i].size) { - i++; /* 1 */ - mdesc[i].type = BOOT_MEM_ROM_DATA; - mdesc[i].base = mdesc[i-1].base + mdesc[i-1].size; - mdesc[i].size = heaptop - mdesc[i].base; - } - - /* end of PMON data to start of kernel -- probably zero .. */ - if (heaptop != CPHYSADDR((u32)_text)) { - i++; /* 2 */ - mdesc[i].type = BOOT_MEM_RAM; - mdesc[i].base = heaptop; - mdesc[i].size = CPHYSADDR((u32)_text) - mdesc[i].base; - } - - /* kernel proper */ - i++; /* 3 */ - mdesc[i].type = BOOT_MEM_RESERVED; - mdesc[i].base = CPHYSADDR((u32)_text); - mdesc[i].size = CPHYSADDR(PAGE_ALIGN((u32)_end)) - mdesc[i].base; - - /* Remainder of RAM -- under memsize */ - i++; /* 5 */ - mdesc[i].type = yamon_free; - mdesc[i].base = mdesc[i-1].base + mdesc[i-1].size; - mdesc[i].size = memsize - mdesc[i].base; - - return &mdesc[0]; -} diff --git a/arch/mips/pmcs-msp71xx/msp_serial.c b/arch/mips/pmcs-msp71xx/msp_serial.c deleted file mode 100644 index 940c684f6921..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_serial.c +++ /dev/null @@ -1,154 +0,0 @@ -/* - * The setup file for serial related hardware on PMC-Sierra MSP processors. - * - * Copyright 2005 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -#include <linux/serial.h> -#include <linux/serial_core.h> -#include <linux/serial_reg.h> -#include <linux/slab.h> - -#include <asm/bootinfo.h> -#include <asm/io.h> -#include <asm/processor.h> -#include <asm/serial.h> -#include <linux/serial_8250.h> - -#include <msp_prom.h> -#include <msp_int.h> -#include <msp_regs.h> - -struct msp_uart_data { - int last_lcr; -}; - -static void msp_serial_out(struct uart_port *p, int offset, int value) -{ - struct msp_uart_data *d = p->private_data; - - if (offset == UART_LCR) - d->last_lcr = value; - - offset <<= p->regshift; - writeb(value, p->membase + offset); -} - -static unsigned int msp_serial_in(struct uart_port *p, int offset) -{ - offset <<= p->regshift; - - return readb(p->membase + offset); -} - -static int msp_serial_handle_irq(struct uart_port *p) -{ - struct msp_uart_data *d = p->private_data; - unsigned int iir = readb(p->membase + (UART_IIR << p->regshift)); - - if (serial8250_handle_irq(p, iir)) { - return 1; - } else if ((iir & UART_IIR_BUSY) == UART_IIR_BUSY) { - /* - * The DesignWare APB UART has an Busy Detect (0x07) interrupt - * meaning an LCR write attempt occurred while the UART was - * busy. The interrupt must be cleared by reading the UART - * status register (USR) and the LCR re-written. - * - * Note: MSP reserves 0x20 bytes of address space for the UART - * and the USR is mapped in a separate block at an offset of - * 0xc0 from the start of the UART. - */ - (void)readb(p->membase + 0xc0); - writeb(d->last_lcr, p->membase + (UART_LCR << p->regshift)); - - return 1; - } - - return 0; -} - -void __init msp_serial_setup(void) -{ - char *s; - char *endp; - struct uart_port up; - unsigned int uartclk; - - memset(&up, 0, sizeof(up)); - - /* Check if clock was specified in environment */ - s = prom_getenv("uartfreqhz"); - if(!(s && *s && (uartclk = simple_strtoul(s, &endp, 10)) && *endp == 0)) - uartclk = MSP_BASE_BAUD; - ppfinit("UART clock set to %d\n", uartclk); - - /* Initialize first serial port */ - up.mapbase = MSP_UART0_BASE; - up.membase = ioremap(up.mapbase, MSP_UART_REG_LEN); - up.irq = MSP_INT_UART0; - up.uartclk = uartclk; - up.regshift = 2; - up.iotype = UPIO_MEM; - up.flags = UPF_BOOT_AUTOCONF | UPF_SKIP_TEST; - up.type = PORT_16550A; - up.line = 0; - up.serial_out = msp_serial_out; - up.serial_in = msp_serial_in; - up.handle_irq = msp_serial_handle_irq; - up.private_data = kzalloc(sizeof(struct msp_uart_data), GFP_KERNEL); - if (!up.private_data) { - pr_err("failed to allocate uart private data\n"); - return; - } - if (early_serial_setup(&up)) { - kfree(up.private_data); - pr_err("Early serial init of port 0 failed\n"); - } - - /* Initialize the second serial port, if one exists */ - switch (mips_machtype) { - case MACH_MSP4200_EVAL: - case MACH_MSP4200_GW: - case MACH_MSP4200_FPGA: - case MACH_MSP7120_EVAL: - case MACH_MSP7120_GW: - case MACH_MSP7120_FPGA: - /* Enable UART1 on MSP4200 and MSP7120 */ - *GPIO_CFG2_REG = 0x00002299; - break; - - default: - return; /* No second serial port, good-bye. */ - } - - up.mapbase = MSP_UART1_BASE; - up.membase = ioremap(up.mapbase, MSP_UART_REG_LEN); - up.irq = MSP_INT_UART1; - up.line = 1; - up.private_data = (void*)UART1_STATUS_REG; - if (early_serial_setup(&up)) { - kfree(up.private_data); - pr_err("Early serial init of port 1 failed\n"); - } -} diff --git a/arch/mips/pmcs-msp71xx/msp_setup.c b/arch/mips/pmcs-msp71xx/msp_setup.c deleted file mode 100644 index d1e59cec116e..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_setup.c +++ /dev/null @@ -1,228 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * The generic setup file for PMC-Sierra MSP processors - * - * Copyright 2005-2007 PMC-Sierra, Inc, - * Author: Jun Sun, jsun@mvista.com or jsun@junsun.net - */ - -#include <linux/delay.h> - -#include <asm/bootinfo.h> -#include <asm/cacheflush.h> -#include <asm/idle.h> -#include <asm/r4kcache.h> -#include <asm/reboot.h> -#include <asm/smp-ops.h> -#include <asm/time.h> - -#include <msp_prom.h> -#include <msp_regs.h> - -#if defined(CONFIG_PMC_MSP7120_GW) -#include <msp_regops.h> -#define MSP_BOARD_RESET_GPIO 9 -#endif - -extern void msp_serial_setup(void); - -#if defined(CONFIG_PMC_MSP7120_EVAL) || \ - defined(CONFIG_PMC_MSP7120_GW) || \ - defined(CONFIG_PMC_MSP7120_FPGA) -/* - * Performs the reset for MSP7120-based boards - */ -void msp7120_reset(void) -{ - void *start, *end, *iptr; - - /* Diasble all interrupts */ - local_irq_disable(); -#ifdef CONFIG_SYS_SUPPORTS_MULTITHREADING - dvpe(); -#endif - - /* Cache the reset code of this function */ - __asm__ __volatile__ ( - " .set push \n" - " .set arch=r4000 \n" - " la %0,startpoint \n" - " la %1,endpoint \n" - " .set pop \n" - : "=r" (start), "=r" (end) - : - ); - - for (iptr = (void *)((unsigned int)start & ~(L1_CACHE_BYTES - 1)); - iptr < end; iptr += L1_CACHE_BYTES) - cache_op(Fill, iptr); - - __asm__ __volatile__ ( - "startpoint: \n" - ); - - /* Put the DDRC into self-refresh mode */ - DDRC_INDIRECT_WRITE(DDRC_CTL(10), 0xb, 1 << 16); - - /* - * IMPORTANT! - * DO NOT do anything from here on out that might even - * think about fetching from RAM - i.e., don't call any - * non-inlined functions, and be VERY sure that any inline - * functions you do call do NOT access any sort of RAM - * anywhere! - */ - - /* Wait a bit for the DDRC to settle */ - mdelay(125); - -#if defined(CONFIG_PMC_MSP7120_GW) - /* - * Set GPIO 9 HI, (tied to board reset logic) - * GPIO 9 is the 4th GPIO of register 3 - * - * NOTE: We cannot use the higher-level msp_gpio_mode()/out() - * as GPIO char driver may not be enabled and it would look up - * data inRAM! - */ - set_value_reg32(GPIO_CFG3_REG, 0xf000, 0x8000); - set_reg32(GPIO_DATA3_REG, 8); - - /* - * In case GPIO9 doesn't reset the board (jumper configurable!) - * fallback to device reset below. - */ -#endif - /* Set bit 1 of the MSP7120 reset register */ - *RST_SET_REG = 0x00000001; - - __asm__ __volatile__ ( - "endpoint: \n" - ); -} -#endif - -void msp_restart(char *command) -{ - printk(KERN_WARNING "Now rebooting .......\n"); - -#if defined(CONFIG_PMC_MSP7120_EVAL) || \ - defined(CONFIG_PMC_MSP7120_GW) || \ - defined(CONFIG_PMC_MSP7120_FPGA) - msp7120_reset(); -#else - /* No chip-specific reset code, just jump to the ROM reset vector */ - set_c0_status(ST0_BEV | ST0_ERL); - change_c0_config(CONF_CM_CMASK, CONF_CM_UNCACHED); - __flush_cache_all(); - write_c0_wired(0); - - __asm__ __volatile__("jr\t%0"::"r"(0xbfc00000)); -#endif -} - -void msp_halt(void) -{ - printk(KERN_WARNING "\n** You can safely turn off the power\n"); - while (1) - /* If possible call official function to get CPU WARs */ - if (cpu_wait) - (*cpu_wait)(); - else - __asm__(".set\tmips3\n\t" "wait\n\t" ".set\tmips0"); -} - -void msp_power_off(void) -{ - msp_halt(); -} - -void __init plat_mem_setup(void) -{ - _machine_restart = msp_restart; - _machine_halt = msp_halt; - pm_power_off = msp_power_off; -} - -void __init prom_init(void) -{ - unsigned long family; - unsigned long revision; - - prom_argc = fw_arg0; - prom_argv = (char **)fw_arg1; - prom_envp = (char **)fw_arg2; - - /* - * Someday we can use this with PMON2000 to get a - * platform call prom routines for output etc. without - * having to use grody hacks. For now it's unused. - * - * struct callvectors *cv = (struct callvectors *) fw_arg3; - */ - family = identify_family(); - revision = identify_revision(); - - switch (family) { - case FAMILY_FPGA: - if (FPGA_IS_MSP4200(revision)) { - /* Old-style revision ID */ - mips_machtype = MACH_MSP4200_FPGA; - } else { - mips_machtype = MACH_MSP_OTHER; - } - break; - - case FAMILY_MSP4200: -#if defined(CONFIG_PMC_MSP4200_EVAL) - mips_machtype = MACH_MSP4200_EVAL; -#elif defined(CONFIG_PMC_MSP4200_GW) - mips_machtype = MACH_MSP4200_GW; -#else - mips_machtype = MACH_MSP_OTHER; -#endif - break; - - case FAMILY_MSP4200_FPGA: - mips_machtype = MACH_MSP4200_FPGA; - break; - - case FAMILY_MSP7100: -#if defined(CONFIG_PMC_MSP7120_EVAL) - mips_machtype = MACH_MSP7120_EVAL; -#elif defined(CONFIG_PMC_MSP7120_GW) - mips_machtype = MACH_MSP7120_GW; -#else - mips_machtype = MACH_MSP_OTHER; -#endif - break; - - case FAMILY_MSP7100_FPGA: - mips_machtype = MACH_MSP7120_FPGA; - break; - - default: - /* we don't recognize the machine */ - mips_machtype = MACH_UNKNOWN; - panic("***Bogosity factor five***, exiting"); - break; - } - - prom_init_cmdline(); - - prom_meminit(); - - /* - * Sub-system setup follows. - * Setup functions can either be called here or using the - * subsys_initcall mechanism (i.e. see msp_pci_setup). The - * order in which they are called can be changed by using the - * link order in arch/mips/pmc-sierra/msp71xx/Makefile. - * - * NOTE: Please keep sub-system specific initialization code - * in separate specific files. - */ - msp_serial_setup(); - - register_vsmp_smp_ops(); -} diff --git a/arch/mips/pmcs-msp71xx/msp_smp.c b/arch/mips/pmcs-msp71xx/msp_smp.c deleted file mode 100644 index 00092e2924ec..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_smp.c +++ /dev/null @@ -1,56 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2000, 2001, 2004 MIPS Technologies, Inc. - * Copyright (C) 2001 Ralf Baechle - * Copyright (C) 2010 PMC-Sierra, Inc. - * - * VSMP support for MSP platforms . Derived from malta vsmp support. - */ -#include <linux/smp.h> -#include <linux/interrupt.h> - -#include <asm/setup.h> - -#ifdef CONFIG_MIPS_MT_SMP -#define MIPS_CPU_IPI_RESCHED_IRQ 0 /* SW int 0 for resched */ -#define MIPS_CPU_IPI_CALL_IRQ 1 /* SW int 1 for call */ - - -static void ipi_resched_dispatch(void) -{ - do_IRQ(MIPS_CPU_IPI_RESCHED_IRQ); -} - -static void ipi_call_dispatch(void) -{ - do_IRQ(MIPS_CPU_IPI_CALL_IRQ); -} - -static irqreturn_t ipi_resched_interrupt(int irq, void *dev_id) -{ - return IRQ_HANDLED; -} - -static irqreturn_t ipi_call_interrupt(int irq, void *dev_id) -{ - generic_smp_call_function_interrupt(); - - return IRQ_HANDLED; -} - -void __init arch_init_ipiirq(int irq, const char *name, irq_handler_t handler) -{ - if (request_irq(irq, handler, IRQF_PERCPU, name, NULL)) - pr_err("Failed to request irq %d (%s)\n", irq, name); - irq_set_handler(irq, handle_percpu_irq); -} - -void __init msp_vsmp_int_init(void) -{ - set_vi_handler(MIPS_CPU_IPI_RESCHED_IRQ, ipi_resched_dispatch); - set_vi_handler(MIPS_CPU_IPI_CALL_IRQ, ipi_call_dispatch); - arch_init_ipiirq(MIPS_CPU_IPI_RESCHED_IRQ, "IPI_resched", - ipi_resched_interrupt); - arch_init_ipiirq(MIPS_CPU_IPI_CALL_IRQ, "IPI_call", ipi_call_interrupt); -} -#endif /* CONFIG_MIPS_MT_SMP */ diff --git a/arch/mips/pmcs-msp71xx/msp_time.c b/arch/mips/pmcs-msp71xx/msp_time.c deleted file mode 100644 index 9c629829f447..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_time.c +++ /dev/null @@ -1,90 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Setting up the clock on MSP SOCs. No RTC typically. - * - * Carsten Langgaard, carstenl@mips.com - * Copyright (C) 1999,2000 MIPS Technologies, Inc. All rights reserved. - * - * ######################################################################## - * - * ######################################################################## - */ - -#include <linux/init.h> -#include <linux/kernel_stat.h> -#include <linux/sched.h> -#include <linux/spinlock.h> -#include <linux/ptrace.h> - -#include <asm/cevt-r4k.h> -#include <asm/mipsregs.h> -#include <asm/time.h> - -#include <msp_prom.h> -#include <msp_int.h> -#include <msp_regs.h> - -#define get_current_vpe() \ - ((read_c0_tcbind() >> TCBIND_CURVPE_SHIFT) & TCBIND_CURVPE) - -static int tim_installed; - -void __init plat_time_init(void) -{ - char *endp, *s; - unsigned long cpu_rate = 0; - - if (cpu_rate == 0) { - s = prom_getenv("clkfreqhz"); - cpu_rate = simple_strtoul(s, &endp, 10); - if (endp != NULL && *endp != 0) { - printk(KERN_ERR - "Clock rate in Hz parse error: %s\n", s); - cpu_rate = 0; - } - } - - if (cpu_rate == 0) { - s = prom_getenv("clkfreq"); - cpu_rate = 1000 * simple_strtoul(s, &endp, 10); - if (endp != NULL && *endp != 0) { - printk(KERN_ERR - "Clock rate in MHz parse error: %s\n", s); - cpu_rate = 0; - } - } - - if (cpu_rate == 0) { -#if defined(CONFIG_PMC_MSP7120_EVAL) \ - || defined(CONFIG_PMC_MSP7120_GW) - cpu_rate = 400000000; -#elif defined(CONFIG_PMC_MSP7120_FPGA) - cpu_rate = 25000000; -#else - cpu_rate = 150000000; -#endif - printk(KERN_ERR - "Failed to determine CPU clock rate, " - "assuming %ld hz ...\n", cpu_rate); - } - - printk(KERN_WARNING "Clock rate set to %ld\n", cpu_rate); - - /* timer frequency is 1/2 clock rate */ - mips_hpt_frequency = cpu_rate/2; -} - -unsigned int get_c0_compare_int(void) -{ - unsigned long flags = IRQF_PERCPU | IRQF_TIMER | IRQF_SHARED; - - /* MIPS_MT modes may want timer for second VPE */ - if ((get_current_vpe()) && !tim_installed) { - if (request_irq(MSP_INT_VPE1_TIMER, c0_compare_interrupt, flags, - "timer", c0_compare_interrupt)) - pr_err("Failed to register timer interrupt\n"); - tim_installed++; - } - - return get_current_vpe() ? MSP_INT_VPE1_TIMER : MSP_INT_VPE0_TIMER; -} diff --git a/arch/mips/pmcs-msp71xx/msp_usb.c b/arch/mips/pmcs-msp71xx/msp_usb.c deleted file mode 100644 index d38ac70b5a2e..000000000000 --- a/arch/mips/pmcs-msp71xx/msp_usb.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * The setup file for USB related hardware on PMC-Sierra MSP processors. - * - * Copyright 2006 PMC-Sierra, Inc. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the - * Free Software Foundation; either version 2 of the License, or (at your - * option) any later version. - * - * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN - * NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, - * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT - * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF - * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON - * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF - * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 675 Mass Ave, Cambridge, MA 02139, USA. - */ -#if defined(CONFIG_USB_EHCI_HCD) || defined(CONFIG_USB_GADGET) - -#include <linux/init.h> -#include <linux/ioport.h> -#include <linux/platform_device.h> - -#include <asm/mipsregs.h> - -#include <msp_regs.h> -#include <msp_int.h> -#include <msp_prom.h> -#include <msp_usb.h> - - -#if defined(CONFIG_USB_EHCI_HCD) -static struct resource msp_usbhost0_resources[] = { - [0] = { /* EHCI-HS operational and capabilities registers */ - .start = MSP_USB0_HS_START, - .end = MSP_USB0_HS_END, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = MSP_INT_USB, - .end = MSP_INT_USB, - .flags = IORESOURCE_IRQ, - }, - [2] = { /* MSBus-to-AMBA bridge register space */ - .start = MSP_USB0_MAB_START, - .end = MSP_USB0_MAB_END, - .flags = IORESOURCE_MEM, - }, - [3] = { /* Identification and general hardware parameters */ - .start = MSP_USB0_ID_START, - .end = MSP_USB0_ID_END, - .flags = IORESOURCE_MEM, - }, -}; - -static u64 msp_usbhost0_dma_mask = 0xffffffffUL; - -static struct mspusb_device msp_usbhost0_device = { - .dev = { - .name = "pmcmsp-ehci", - .id = 0, - .dev = { - .dma_mask = &msp_usbhost0_dma_mask, - .coherent_dma_mask = 0xffffffffUL, - }, - .num_resources = ARRAY_SIZE(msp_usbhost0_resources), - .resource = msp_usbhost0_resources, - }, -}; -#endif /* CONFIG_USB_EHCI_HCD */ - -#if defined(CONFIG_USB_GADGET) -static struct resource msp_usbdev0_resources[] = { - [0] = { /* EHCI-HS operational and capabilities registers */ - .start = MSP_USB0_HS_START, - .end = MSP_USB0_HS_END, - .flags = IORESOURCE_MEM, - }, - [1] = { - .start = MSP_INT_USB, - .end = MSP_INT_USB, - .flags = IORESOURCE_IRQ, - }, - [2] = { /* MSBus-to-AMBA bridge register space */ - .start = MSP_USB0_MAB_START, - .end = MSP_USB0_MAB_END, - .flags = IORESOURCE_MEM, - }, - [3] = { /* Identification and general hardware parameters */ - .start = MSP_USB0_ID_START, - .end = MSP_USB0_ID_END, - .flags = IORESOURCE_MEM, - }, -}; - -static u64 msp_usbdev_dma_mask = 0xffffffffUL; - -/* This may need to be converted to a mspusb_device, too. */ -static struct mspusb_device msp_usbdev0_device = { - .dev = { - .name = "msp71xx_udc", - .id = 0, - .dev = { - .dma_mask = &msp_usbdev_dma_mask, - .coherent_dma_mask = 0xffffffffUL, - }, - .num_resources = ARRAY_SIZE(msp_usbdev0_resources), - .resource = msp_usbdev0_resources, - }, -}; -#endif /* CONFIG_USB_GADGET */ - -static int __init msp_usb_setup(void) -{ - char *strp; - char envstr[32]; - struct platform_device *msp_devs[NUM_USB_DEVS]; - unsigned int val; - - /* construct environment name usbmode */ - /* set usbmode <host/device> as pmon environment var */ - /* - * Could this perhaps be integrated into the "features" env var? - * Use the features key "U", and follow with "H" for host-mode, - * "D" for device-mode. If it works for Ethernet, why not USB... - * -- hammtrev, 2007/03/22 - */ - snprintf(&envstr[0], sizeof(envstr), "usbmode"); - - /* set default host mode */ - val = 1; - - /* get environment string */ - strp = prom_getenv(&envstr[0]); - if (strp) { - /* compare string */ - if (!strcmp(strp, "device")) - val = 0; - } - - if (val) { -#if defined(CONFIG_USB_EHCI_HCD) - msp_devs[0] = &msp_usbhost0_device.dev; - ppfinit("platform add USB HOST done %s.\n", msp_devs[0]->name); -#else - ppfinit("%s: echi_hcd not supported\n", __FILE__); -#endif /* CONFIG_USB_EHCI_HCD */ - } else { -#if defined(CONFIG_USB_GADGET) - /* get device mode structure */ - msp_devs[0] = &msp_usbdev0_device.dev; - ppfinit("platform add USB DEVICE done %s.\n" - , msp_devs[0]->name); -#else - ppfinit("%s: usb_gadget not supported\n", __FILE__); -#endif /* CONFIG_USB_GADGET */ - } - /* add device */ - platform_add_devices(msp_devs, ARRAY_SIZE(msp_devs)); - - return 0; -} - -subsys_initcall(msp_usb_setup); -#endif /* CONFIG_USB_EHCI_HCD || CONFIG_USB_GADGET */ diff --git a/arch/mips/pnx833x/Platform b/arch/mips/pnx833x/Platform index 287260669551..e5286a49fc3e 100644 --- a/arch/mips/pnx833x/Platform +++ b/arch/mips/pnx833x/Platform @@ -1,5 +1,4 @@ # NXP STB225 -platform-$(CONFIG_SOC_PNX833X) += pnx833x/ cflags-$(CONFIG_SOC_PNX833X) += -I$(srctree)/arch/mips/include/asm/mach-pnx833x load-$(CONFIG_NXP_STB220) += 0xffffffff80001000 load-$(CONFIG_NXP_STB225) += 0xffffffff80001000 diff --git a/arch/mips/ralink/Platform b/arch/mips/ralink/Platform index 6095fcc334f4..02ee0791481d 100644 --- a/arch/mips/ralink/Platform +++ b/arch/mips/ralink/Platform @@ -1,7 +1,6 @@ # # Ralink SoC common stuff # -core-$(CONFIG_RALINK) += arch/mips/ralink/ cflags-$(CONFIG_RALINK) += -I$(srctree)/arch/mips/include/asm/mach-ralink # diff --git a/arch/mips/ralink/bootrom.c b/arch/mips/ralink/bootrom.c index 88bcce59beeb..94ca8379b83c 100644 --- a/arch/mips/ralink/bootrom.c +++ b/arch/mips/ralink/bootrom.c @@ -31,7 +31,7 @@ static const struct file_operations bootrom_file_ops = { .release = single_release, }; -static int bootrom_setup(void) +static int __init bootrom_setup(void) { debugfs_create_file("bootrom", 0444, NULL, NULL, &bootrom_file_ops); return 0; diff --git a/arch/mips/ralink/mt7621.c b/arch/mips/ralink/mt7621.c index 0accb80db709..ca0ac607b0f3 100644 --- a/arch/mips/ralink/mt7621.c +++ b/arch/mips/ralink/mt7621.c @@ -20,11 +20,6 @@ #include "common.h" -#define SYSC_REG_SYSCFG 0x10 -#define SYSC_REG_CPLL_CLKCFG0 0x2c -#define SYSC_REG_CUR_CLK_STS 0x44 -#define CPU_CLK_SEL (BIT(30) | BIT(31)) - #define MT7621_GPIO_MODE_UART1 1 #define MT7621_GPIO_MODE_I2C 2 #define MT7621_GPIO_MODE_UART3_MASK 0x3 @@ -115,44 +110,6 @@ phys_addr_t mips_cpc_default_phys_base(void) panic("Cannot detect cpc address"); } -void __init ralink_clk_init(void) -{ - int cpu_fdiv = 0; - int cpu_ffrac = 0; - int fbdiv = 0; - u32 clk_sts, syscfg; - u8 clk_sel = 0, xtal_mode; - u32 cpu_clk; - - if ((rt_sysc_r32(SYSC_REG_CPLL_CLKCFG0) & CPU_CLK_SEL) != 0) - clk_sel = 1; - - switch (clk_sel) { - case 0: - clk_sts = rt_sysc_r32(SYSC_REG_CUR_CLK_STS); - cpu_fdiv = ((clk_sts >> 8) & 0x1F); - cpu_ffrac = (clk_sts & 0x1F); - cpu_clk = (500 * cpu_ffrac / cpu_fdiv) * 1000 * 1000; - break; - - case 1: - fbdiv = ((rt_sysc_r32(0x648) >> 4) & 0x7F) + 1; - syscfg = rt_sysc_r32(SYSC_REG_SYSCFG); - xtal_mode = (syscfg >> 6) & 0x7; - if (xtal_mode >= 6) { - /* 25Mhz Xtal */ - cpu_clk = 25 * fbdiv * 1000 * 1000; - } else if (xtal_mode >= 3) { - /* 40Mhz Xtal */ - cpu_clk = 40 * fbdiv * 1000 * 1000; - } else { - /* 20Mhz Xtal */ - cpu_clk = 20 * fbdiv * 1000 * 1000; - } - break; - } -} - void __init ralink_of_remap(void) { rt_sysc_membase = plat_of_remap_node("mtk,mt7621-sysc"); diff --git a/arch/mips/rb532/Platform b/arch/mips/rb532/Platform index aeec45a7cbb3..12eaa8790b3e 100644 --- a/arch/mips/rb532/Platform +++ b/arch/mips/rb532/Platform @@ -1,7 +1,6 @@ # # Routerboard 532 # -platform-$(CONFIG_MIKROTIK_RB532) += rb532/ cflags-$(CONFIG_MIKROTIK_RB532) += \ -I$(srctree)/arch/mips/include/asm/mach-rc32434 load-$(CONFIG_MIKROTIK_RB532) += 0xffffffff80101000 diff --git a/arch/mips/sgi-ip22/Platform b/arch/mips/sgi-ip22/Platform index e8f6b3a42a48..62fa30bb959e 100644 --- a/arch/mips/sgi-ip22/Platform +++ b/arch/mips/sgi-ip22/Platform @@ -7,7 +7,6 @@ # current variable will break so for 64-bit kernels we have to raise the start # address by 8kb. # -platform-$(CONFIG_SGI_IP22) += sgi-ip22/ cflags-$(CONFIG_SGI_IP22) += -I$(srctree)/arch/mips/include/asm/mach-ip22 ifdef CONFIG_32BIT load-$(CONFIG_SGI_IP22) += 0xffffffff88002000 @@ -29,6 +28,5 @@ ifdef CONFIG_SGI_IP28 $(error gcc doesn't support needed option -mr10k-cache-barrier=store) endif endif -platform-$(CONFIG_SGI_IP28) += sgi-ip22/ cflags-$(CONFIG_SGI_IP28) += -mr10k-cache-barrier=store -I$(srctree)/arch/mips/include/asm/mach-ip28 load-$(CONFIG_SGI_IP28) += 0xa800000020004000 diff --git a/arch/mips/sgi-ip27/Platform b/arch/mips/sgi-ip27/Platform index 1fb9c2ea7c8f..e734ee6abd44 100644 --- a/arch/mips/sgi-ip27/Platform +++ b/arch/mips/sgi-ip27/Platform @@ -5,8 +5,6 @@ # symmon, 0xc00000000001c000 for production kernels. Note that the value must # be 16kb aligned or the handling of the current variable will break. # -ifdef CONFIG_SGI_IP27 -platform-$(CONFIG_SGI_IP27) += sgi-ip27/ cflags-$(CONFIG_SGI_IP27) += -I$(srctree)/arch/mips/include/asm/mach-ip27 ifdef CONFIG_MAPPED_KERNEL load-$(CONFIG_SGI_IP27) += 0xc00000004001c000 @@ -16,4 +14,3 @@ else load-$(CONFIG_SGI_IP27) += 0xa80000000001c000 OBJCOPYFLAGS := --change-addresses=0x57ffffff80000000 endif -endif diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c index 61f3565f3645..c0e33632bc37 100644 --- a/arch/mips/sgi-ip27/ip27-timer.c +++ b/arch/mips/sgi-ip27/ip27-timer.c @@ -21,7 +21,6 @@ #include <asm/time.h> #include <asm/pgtable.h> #include <asm/sgialib.h> -#include <asm/sn/ioc3.h> #include <asm/sn/klconfig.h> #include <asm/sn/arch.h> #include <asm/sn/addrs.h> @@ -29,14 +28,6 @@ #include "ip27-common.h" -#define TICK_SIZE (tick_nsec / 1000) - -/* Includes for ioc3_init(). */ -#include <asm/sn/types.h> -#include <asm/pci/bridge.h> - -#include "ip27-common.h" - static int rt_next_event(unsigned long delta, struct clock_event_device *evt) { unsigned int cpu = smp_processor_id(); diff --git a/arch/mips/sgi-ip30/Platform b/arch/mips/sgi-ip30/Platform index 2b5695c2049a..f6f11517e091 100644 --- a/arch/mips/sgi-ip30/Platform +++ b/arch/mips/sgi-ip30/Platform @@ -1,8 +1,5 @@ # # SGI-IP30 (Octane/Octane2) # -ifdef CONFIG_SGI_IP30 -platform-$(CONFIG_SGI_IP30) += sgi-ip30/ cflags-$(CONFIG_SGI_IP30) += -I$(srctree)/arch/mips/include/asm/mach-ip30 load-$(CONFIG_SGI_IP30) += 0xa800000020004000 -endif diff --git a/arch/mips/sgi-ip32/Platform b/arch/mips/sgi-ip32/Platform index 0fea556f3641..f58a7a02b4ca 100644 --- a/arch/mips/sgi-ip32/Platform +++ b/arch/mips/sgi-ip32/Platform @@ -6,6 +6,5 @@ # a multiple of the kernel stack size or the handling of the current variable # will break. # -platform-$(CONFIG_SGI_IP32) += sgi-ip32/ cflags-$(CONFIG_SGI_IP32) += -I$(srctree)/arch/mips/include/asm/mach-ip32 load-$(CONFIG_SGI_IP32) += 0xffffffff80004000 diff --git a/arch/mips/sibyte/Platform b/arch/mips/sibyte/Platform index af117330ce14..65b2225b76b2 100644 --- a/arch/mips/sibyte/Platform +++ b/arch/mips/sibyte/Platform @@ -1,10 +1,6 @@ # # These are all rather similar so we consider them a single platform # -platform-$(CONFIG_SIBYTE_BCM112X) += sibyte/ -platform-$(CONFIG_SIBYTE_SB1250) += sibyte/ -platform-$(CONFIG_SIBYTE_BCM1x55) += sibyte/ -platform-$(CONFIG_SIBYTE_BCM1x80) += sibyte/ # # Sibyte SB1250 / BCM1480 family of SOCs diff --git a/arch/mips/sni/Platform b/arch/mips/sni/Platform index 2644a9d63c0f..b0b3dde0bef8 100644 --- a/arch/mips/sni/Platform +++ b/arch/mips/sni/Platform @@ -1,7 +1,6 @@ # # SNI RM # -platform-$(CONFIG_SNI_RM) += sni/ cflags-$(CONFIG_SNI_RM) += -I$(srctree)/arch/mips/include/asm/mach-rm ifdef CONFIG_CPU_LITTLE_ENDIAN load-$(CONFIG_SNI_RM) += 0xffffffff80600000 diff --git a/arch/mips/tools/elf-entry.c b/arch/mips/tools/elf-entry.c index adde79ce7fc0..dbd14ff05b4c 100644 --- a/arch/mips/tools/elf-entry.c +++ b/arch/mips/tools/elf-entry.c @@ -51,11 +51,14 @@ int main(int argc, const char *argv[]) nread = fread(&hdr, 1, sizeof(hdr), file); if (nread != sizeof(hdr)) { perror("Unable to read input file"); + fclose(file); return EXIT_FAILURE; } - if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) + if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG)) { + fclose(file); die("Input is not an ELF\n"); + } switch (hdr.ehdr32.e_ident[EI_CLASS]) { case ELFCLASS32: @@ -67,6 +70,7 @@ int main(int argc, const char *argv[]) entry = be32toh(hdr.ehdr32.e_entry); break; default: + fclose(file); die("Invalid ELF encoding\n"); } @@ -83,14 +87,17 @@ int main(int argc, const char *argv[]) entry = be64toh(hdr.ehdr64.e_entry); break; default: + fclose(file); die("Invalid ELF encoding\n"); } break; default: + fclose(file); die("Invalid ELF class\n"); } printf("0x%016" PRIx64 "\n", entry); + fclose(file); return EXIT_SUCCESS; } diff --git a/arch/mips/tools/loongson3-llsc-check.c b/arch/mips/tools/loongson3-llsc-check.c index 0ebddd0ae46f..bdbc7b4324ec 100644 --- a/arch/mips/tools/loongson3-llsc-check.c +++ b/arch/mips/tools/loongson3-llsc-check.c @@ -303,5 +303,7 @@ out_munmap: out_close: close(vmlinux_fd); out_ret: + fprintf(stdout, "loongson3-llsc-check returns %s\n", + status ? "failure" : "success"); return status; } diff --git a/arch/mips/txx9/Kconfig b/arch/mips/txx9/Kconfig index 9a22a182b7a4..85c4c121c71f 100644 --- a/arch/mips/txx9/Kconfig +++ b/arch/mips/txx9/Kconfig @@ -58,7 +58,7 @@ config TOSHIBA_RBTX4939 config SOC_TX3927 bool select CEVT_TXX9 - select HAS_TXX9_SERIAL + imply HAS_TXX9_SERIAL select HAVE_PCI select IRQ_TXX9 select GPIO_TXX9 @@ -66,30 +66,30 @@ config SOC_TX3927 config SOC_TX4927 bool select CEVT_TXX9 - select HAS_TXX9_SERIAL + imply HAS_TXX9_SERIAL select HAVE_PCI select IRQ_TXX9 select PCI_TX4927 select GPIO_TXX9 - select HAS_TXX9_ACLC + imply HAS_TXX9_ACLC config SOC_TX4938 bool select CEVT_TXX9 - select HAS_TXX9_SERIAL + imply HAS_TXX9_SERIAL select HAVE_PCI select IRQ_TXX9 select PCI_TX4927 select GPIO_TXX9 - select HAS_TXX9_ACLC + imply HAS_TXX9_ACLC config SOC_TX4939 bool select CEVT_TXX9 - select HAS_TXX9_SERIAL + imply HAS_TXX9_SERIAL select HAVE_PCI select PCI_TX4927 - select HAS_TXX9_ACLC + imply HAS_TXX9_ACLC config TXX9_7SEGLED bool diff --git a/arch/mips/txx9/Platform b/arch/mips/txx9/Platform index a176d1fd5799..7f4429ba22eb 100644 --- a/arch/mips/txx9/Platform +++ b/arch/mips/txx9/Platform @@ -1,6 +1,3 @@ -platform-$(CONFIG_MACH_TX39XX) += txx9/ -platform-$(CONFIG_MACH_TX49XX) += txx9/ - cflags-$(CONFIG_MACH_TX39XX) += \ -I$(srctree)/arch/mips/include/asm/mach-tx39xx cflags-$(CONFIG_MACH_TX49XX) += \ diff --git a/arch/mips/vdso/Kconfig b/arch/mips/vdso/Kconfig new file mode 100644 index 000000000000..7aec721398d5 --- /dev/null +++ b/arch/mips/vdso/Kconfig @@ -0,0 +1,18 @@ +# For the pre-R6 code in arch/mips/vdso/vdso.h for locating +# the base address of VDSO, the linker will emit a R_MIPS_PC32 +# relocation in binutils > 2.25 but it will fail with older versions +# because that relocation is not supported for that symbol. As a result +# of which we are forced to disable the VDSO symbols when building +# with < 2.25 binutils on pre-R6 kernels. For more references on why we +# can't use other methods to get the base address of VDSO please refer to +# the comments on that file. +# +# GCC (at least up to version 9.2) appears to emit function calls that make use +# of the GOT when targeting microMIPS, which we can't use in the VDSO due to +# the lack of relocations. As such, we disable the VDSO for microMIPS builds. + +config MIPS_LD_CAN_LINK_VDSO + def_bool LD_VERSION >= 225000000 || LD_IS_LLD + +config MIPS_DISABLE_VDSO + def_bool CPU_MICROMIPS || (!CPU_MIPSR6 && !MIPS_LD_CAN_LINK_VDSO) diff --git a/arch/mips/vdso/Makefile b/arch/mips/vdso/Makefile index d7fe8408603e..2e64c7600eea 100644 --- a/arch/mips/vdso/Makefile +++ b/arch/mips/vdso/Makefile @@ -52,44 +52,17 @@ endif CFLAGS_REMOVE_vgettimeofday.o = -pg -DISABLE_VDSO := n - -# -# For the pre-R6 code in arch/mips/vdso/vdso.h for locating -# the base address of VDSO, the linker will emit a R_MIPS_PC32 -# relocation in binutils > 2.25 but it will fail with older versions -# because that relocation is not supported for that symbol. As a result -# of which we are forced to disable the VDSO symbols when building -# with < 2.25 binutils on pre-R6 kernels. For more references on why we -# can't use other methods to get the base address of VDSO please refer to -# the comments on that file. -# -ifndef CONFIG_CPU_MIPSR6 - ifeq ($(call ld-ifversion, -lt, 225000000, y),y) +ifdef CONFIG_MIPS_DISABLE_VDSO + ifndef CONFIG_MIPS_LD_CAN_LINK_VDSO $(warning MIPS VDSO requires binutils >= 2.25) - DISABLE_VDSO := y endif -endif - -# -# GCC (at least up to version 9.2) appears to emit function calls that make use -# of the GOT when targeting microMIPS, which we can't use in the VDSO due to -# the lack of relocations. As such, we disable the VDSO for microMIPS builds. -# -ifdef CONFIG_CPU_MICROMIPS - DISABLE_VDSO := y -endif - -ifeq ($(DISABLE_VDSO),y) obj-vdso-y := $(filter-out vgettimeofday.o, $(obj-vdso-y)) - ccflags-vdso += -DDISABLE_MIPS_VDSO endif # VDSO linker flags. -VDSO_LDFLAGS := \ - -Wl,-Bsymbolic -Wl,--no-undefined -Wl,-soname=linux-vdso.so.1 \ - $(addprefix -Wl$(comma),$(filter -E%,$(KBUILD_CFLAGS))) \ - -nostdlib -shared -Wl,--hash-style=sysv -Wl,--build-id +ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ + $(filter -E%,$(KBUILD_CFLAGS)) -nostdlib -shared \ + -G 0 --eh-frame-hdr --hash-style=sysv --build-id -T CFLAGS_REMOVE_vdso.o = -pg @@ -108,11 +81,7 @@ quiet_cmd_vdso_mips_check = VDSOCHK $@ # quiet_cmd_vdsold_and_vdso_check = LD $@ - cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check); $(cmd_vdso_mips_check) - -quiet_cmd_vdsold = VDSO $@ - cmd_vdsold = $(CC) $(c_flags) $(VDSO_LDFLAGS) \ - -Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@ + cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check); $(cmd_vdso_mips_check) quiet_cmd_vdsoas_o_S = AS $@ cmd_vdsoas_o_S = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/mips/vdso/vdso.lds.S b/arch/mips/vdso/vdso.lds.S index da4627430aba..d90b65724d78 100644 --- a/arch/mips/vdso/vdso.lds.S +++ b/arch/mips/vdso/vdso.lds.S @@ -91,7 +91,7 @@ PHDRS VERSION { LINUX_2.6 { -#ifndef DISABLE_MIPS_VDSO +#ifndef CONFIG_MIPS_DISABLE_VDSO global: __vdso_clock_gettime; __vdso_gettimeofday; diff --git a/arch/mips/vr41xx/Makefile b/arch/mips/vr41xx/Makefile new file mode 100644 index 000000000000..765020d5ee4d --- /dev/null +++ b/arch/mips/vr41xx/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0 +# +obj-$(CONFIG_MACH_VR41XX) += common/ +obj-$(CONFIG_CASIO_E55) += casio-e55/ +obj-$(CONFIG_IBM_WORKPAD) += ibm-workpad/ diff --git a/arch/mips/vr41xx/Platform b/arch/mips/vr41xx/Platform index b6c8d5c08ddb..3f593a3e5678 100644 --- a/arch/mips/vr41xx/Platform +++ b/arch/mips/vr41xx/Platform @@ -1,19 +1,16 @@ # # NEC VR4100 series based machines # -platform-$(CONFIG_MACH_VR41XX) += vr41xx/common/ cflags-$(CONFIG_MACH_VR41XX) += -I$(srctree)/arch/mips/include/asm/mach-vr41xx # # CASIO CASSIPEIA E-55/65 (VR4111) # -platform-$(CONFIG_CASIO_E55) += vr41xx/casio-e55/ load-$(CONFIG_CASIO_E55) += 0xffffffff80004000 # # IBM WorkPad z50 (VR4121) # -platform-$(CONFIG_IBM_WORKPAD) += vr41xx/ibm-workpad/ load-$(CONFIG_IBM_WORKPAD) += 0xffffffff80004000 # diff --git a/arch/nios2/include/asm/checksum.h b/arch/nios2/include/asm/checksum.h index 703c5ee63421..ec39698d3bea 100644 --- a/arch/nios2/include/asm/checksum.h +++ b/arch/nios2/include/asm/checksum.h @@ -14,8 +14,6 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum); extern __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum); -extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, int *csum_err); #define csum_partial_copy_nocheck(src, dst, len, sum) \ csum_partial_copy((src), (dst), (len), (sum)) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 3801a2ef9bca..92128f9164ce 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -68,7 +68,7 @@ config PARISC The PA-RISC microprocessor is designed by Hewlett-Packard and used in many of their workstations & servers (HP9000 700 and 800 series, and later HP3000 series). The PA-RISC Linux project home page is - at <http://www.parisc-linux.org/>. + at <https://parisc.wiki.kernel.org>. config CPU_BIG_ENDIAN def_bool y diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 628cd8bb7ad8..fadbbd010337 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -21,8 +21,6 @@ KBUILD_IMAGE := vmlinuz NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 -LIBGCC = $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name) -export LIBGCC ifdef CONFIG_64BIT UTS_MACHINE := parisc64 @@ -110,6 +108,8 @@ cflags-$(CONFIG_PA8X00) += -march=2.0 -mschedule=8000 head-y := arch/parisc/kernel/head.o KBUILD_CFLAGS += $(cflags-y) +LIBGCC := $(shell $(CC) -print-libgcc-file-name) +export LIBGCC kernel-y := mm/ kernel/ math-emu/ diff --git a/arch/parisc/boot/compressed/Makefile b/arch/parisc/boot/compressed/Makefile index 1e5879c6a752..dff453687530 100644 --- a/arch/parisc/boot/compressed/Makefile +++ b/arch/parisc/boot/compressed/Makefile @@ -16,6 +16,7 @@ targets += real2.S firmware.c KBUILD_CFLAGS := -D__KERNEL__ -O2 -DBOOTLOADER KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING +KBUILD_CFLAGS += -fno-strict-aliasing KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -fno-builtin-printf KBUILD_CFLAGS += -fno-PIE -mno-space-regs -mdisable-fpregs -Os ifndef CONFIG_64BIT diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h index c1c22819a04d..fe8c63b2d2c3 100644 --- a/arch/parisc/include/asm/checksum.h +++ b/arch/parisc/include/asm/checksum.h @@ -27,13 +27,6 @@ extern __wsum csum_partial(const void *, int, __wsum); extern __wsum csum_partial_copy_nocheck(const void *, void *, int, __wsum); /* - * this is a new version of the above that records errors it finds in *errp, - * but continues and zeros the rest of the buffer. - */ -extern __wsum csum_partial_copy_from_user(const void __user *src, - void *dst, int len, __wsum sum, int *errp); - -/* * Optimized for IP headers, which always checksum on 4 octet boundaries. * * Written by Randolph Chung <tausq@debian.org>, and then mucked with by diff --git a/arch/parisc/include/asm/floppy.h b/arch/parisc/include/asm/floppy.h index 09b6f4c1687e..762cfe7778c0 100644 --- a/arch/parisc/include/asm/floppy.h +++ b/arch/parisc/include/asm/floppy.h @@ -29,8 +29,8 @@ #define CSW fd_routine[can_use_virtual_dma & 1] -#define fd_inb(port) readb(port) -#define fd_outb(value, port) writeb(value, port) +#define fd_inb(base, reg) readb((base) + (reg)) +#define fd_outb(value, base, reg) writeb(value, (base) + (reg)) #define fd_request_dma() CSW._request_dma(FLOPPY_DMA,"floppy") #define fd_free_dma() CSW._free_dma(FLOPPY_DMA) @@ -75,27 +75,28 @@ static void floppy_hardint(int irq, void *dev_id, struct pt_regs * regs) register char *lptr = virtual_dma_addr; for (lcount = virtual_dma_count; lcount; lcount--) { - st = fd_inb(virtual_dma_port+4) & 0xa0 ; - if (st != 0xa0) + st = fd_inb(virtual_dma_port, FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) break; if (virtual_dma_mode) { - fd_outb(*lptr, virtual_dma_port+5); + fd_outb(*lptr, virtual_dma_port, FD_DATA); } else { - *lptr = fd_inb(virtual_dma_port+5); + *lptr = fd_inb(virtual_dma_port, FD_DATA); } lptr++; } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = fd_inb(virtual_dma_port+4); + st = fd_inb(virtual_dma_port, FD_STATUS); } #ifdef TRACE_FLPY_INT calls++; #endif - if (st == 0x20) + if (st == STATUS_DMA) return; - if (!(st & 0x20)) { + if (!(st & STATUS_DMA)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count = 0; #ifdef TRACE_FLPY_INT diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 9832c73a7021..cd7df48dc874 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -93,10 +93,8 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) #define set_pte_at(mm, addr, ptep, pteval) \ do { \ - pte_t old_pte; \ unsigned long flags; \ spin_lock_irqsave(pgd_spinlock((mm)->pgd), flags);\ - old_pte = *ptep; \ set_pte(ptep, pteval); \ purge_tlb_entries(mm, addr); \ spin_unlock_irqrestore(pgd_spinlock((mm)->pgd), flags);\ diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 1d976f2ebff0..665b70086685 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -4,7 +4,8 @@ * * PDC == Processor Dependent Code * - * See http://www.parisc-linux.org/documentation/index.html + * See PDC documentation at + * https://parisc.wiki.kernel.org/index.php/Technical_Documentation * for documentation describing the entry points and calling * conventions defined below. * diff --git a/arch/parisc/kernel/hardware.c b/arch/parisc/kernel/hardware.c index 98c5203c1ab0..17161e72ea29 100644 --- a/arch/parisc/kernel/hardware.c +++ b/arch/parisc/kernel/hardware.c @@ -6,7 +6,8 @@ * * Based on the document "PA-RISC 1.1 I/O Firmware Architecture * Reference Specification", March 7, 1999, version 0.96. This - * is available at http://parisc-linux.org/documentation/ + * is available at + * https://parisc.wiki.kernel.org/index.php/Technical_Documentation * * Copyright 1999 by Alex deVries <alex@onefishtwo.ca> * and copyright 1999 The Puffin Group Inc. diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index 1c50093e2ebe..fac18c623d16 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -3,9 +3,9 @@ * * The best reference for this stuff is probably the Processor- * Specific ELF Supplement for PA-RISC: - * http://ftp.parisc-linux.org/docs/arch/elf-pa-hp.pdf + * https://parisc.wiki.kernel.org/index.php/File:Elf-pa-hp.pdf * - * Linux/PA-RISC Project (http://www.parisc-linux.org/) + * Linux/PA-RISC Project * Copyright (C) 2003 Randolph Chung <tausq at debian . org> * Copyright (C) 2008 Helge Deller <deller@gmx.de> * diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index e1a8fee3ad49..d46b6709ec56 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -300,7 +300,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, else return -EFAULT; - if (!capable(CAP_SYS_ADMIN)) + if (!perfmon_capable()) return -EACCES; if (count != sizeof(uint32_t)) diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 52a15f5cd130..5a758fa6ec52 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -435,3 +435,4 @@ 435 common clone3 sys_clone3_wrapper 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c index 256322c7b648..c6f161583549 100644 --- a/arch/parisc/lib/checksum.c +++ b/arch/parisc/lib/checksum.c @@ -123,23 +123,3 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst, return sum; } EXPORT_SYMBOL(csum_partial_copy_nocheck); - -/* - * Copy from userspace and compute checksum. If we catch an exception - * then zero the rest of the buffer. - */ -__wsum csum_partial_copy_from_user(const void __user *src, - void *dst, int len, - __wsum sum, int *err_ptr) -{ - int missing; - - missing = copy_from_user(dst, src, len); - if (missing) { - memset(dst + len - missing, 0, missing); - *err_ptr = -EFAULT; - } - - return csum_partial(dst, len, sum); -} -EXPORT_SYMBOL(csum_partial_copy_from_user); diff --git a/arch/powerpc/crypto/md5-glue.c b/arch/powerpc/crypto/md5-glue.c index 7d1bf2fcf668..c24f605033bd 100644 --- a/arch/powerpc/crypto/md5-glue.c +++ b/arch/powerpc/crypto/md5-glue.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/md5.h> #include <asm/byteorder.h> diff --git a/arch/powerpc/crypto/sha1-spe-glue.c b/arch/powerpc/crypto/sha1-spe-glue.c index 6379990bd604..cb57be4ada61 100644 --- a/arch/powerpc/crypto/sha1-spe-glue.c +++ b/arch/powerpc/crypto/sha1-spe-glue.c @@ -11,7 +11,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> #include <asm/byteorder.h> diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c index 7b43fc352089..b40dc50a6908 100644 --- a/arch/powerpc/crypto/sha1.c +++ b/arch/powerpc/crypto/sha1.c @@ -16,14 +16,13 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> #include <asm/byteorder.h> -extern void powerpc_sha_transform(u32 *state, const u8 *src, u32 *temp); +void powerpc_sha_transform(u32 *state, const u8 *src); -static int sha1_init(struct shash_desc *desc) +static int powerpc_sha1_init(struct shash_desc *desc) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -34,8 +33,8 @@ static int sha1_init(struct shash_desc *desc) return 0; } -static int sha1_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int powerpc_sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { struct sha1_state *sctx = shash_desc_ctx(desc); unsigned int partial, done; @@ -47,7 +46,6 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, src = data; if ((partial + len) > 63) { - u32 temp[SHA_WORKSPACE_WORDS]; if (partial) { done = -partial; @@ -56,12 +54,11 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, } do { - powerpc_sha_transform(sctx->state, src, temp); + powerpc_sha_transform(sctx->state, src); done += 64; src = data + done; } while (done + 63 < len); - memzero_explicit(temp, sizeof(temp)); partial = 0; } memcpy(sctx->buffer + partial, src, len - done); @@ -71,7 +68,7 @@ static int sha1_update(struct shash_desc *desc, const u8 *data, /* Add padding and return the message digest. */ -static int sha1_final(struct shash_desc *desc, u8 *out) +static int powerpc_sha1_final(struct shash_desc *desc, u8 *out) { struct sha1_state *sctx = shash_desc_ctx(desc); __be32 *dst = (__be32 *)out; @@ -84,10 +81,10 @@ static int sha1_final(struct shash_desc *desc, u8 *out) /* Pad out to 56 mod 64 */ index = sctx->count & 0x3f; padlen = (index < 56) ? (56 - index) : ((64+56) - index); - sha1_update(desc, padding, padlen); + powerpc_sha1_update(desc, padding, padlen); /* Append length */ - sha1_update(desc, (const u8 *)&bits, sizeof(bits)); + powerpc_sha1_update(desc, (const u8 *)&bits, sizeof(bits)); /* Store state in digest */ for (i = 0; i < 5; i++) @@ -99,7 +96,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out) return 0; } -static int sha1_export(struct shash_desc *desc, void *out) +static int powerpc_sha1_export(struct shash_desc *desc, void *out) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -107,7 +104,7 @@ static int sha1_export(struct shash_desc *desc, void *out) return 0; } -static int sha1_import(struct shash_desc *desc, const void *in) +static int powerpc_sha1_import(struct shash_desc *desc, const void *in) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -117,11 +114,11 @@ static int sha1_import(struct shash_desc *desc, const void *in) static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_init, - .update = sha1_update, - .final = sha1_final, - .export = sha1_export, - .import = sha1_import, + .init = powerpc_sha1_init, + .update = powerpc_sha1_update, + .final = powerpc_sha1_final, + .export = powerpc_sha1_export, + .import = powerpc_sha1_import, .descsize = sizeof(struct sha1_state), .statesize = sizeof(struct sha1_state), .base = { diff --git a/arch/powerpc/crypto/sha256-spe-glue.c b/arch/powerpc/crypto/sha256-spe-glue.c index 84939e563b81..ceb0b6c980b3 100644 --- a/arch/powerpc/crypto/sha256-spe-glue.c +++ b/arch/powerpc/crypto/sha256-spe-glue.c @@ -12,7 +12,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> #include <asm/byteorder.h> diff --git a/arch/powerpc/include/asm/floppy.h b/arch/powerpc/include/asm/floppy.h index 167c44b58848..7af9a68fd949 100644 --- a/arch/powerpc/include/asm/floppy.h +++ b/arch/powerpc/include/asm/floppy.h @@ -13,8 +13,8 @@ #include <asm/machdep.h> -#define fd_inb(port) inb_p(port) -#define fd_outb(value,port) outb_p(value,port) +#define fd_inb(base, reg) inb_p((base) + (reg)) +#define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) #define fd_enable_dma() enable_dma(FLOPPY_DMA) #define fd_disable_dma() fd_ops->_disable_dma(FLOPPY_DMA) @@ -61,21 +61,22 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id) st = 1; for (lcount=virtual_dma_count, lptr=virtual_dma_addr; lcount; lcount--, lptr++) { - st=inb(virtual_dma_port+4) & 0xa0 ; - if (st != 0xa0) + st = inb(virtual_dma_port + FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) break; if (virtual_dma_mode) - outb_p(*lptr, virtual_dma_port+5); + outb_p(*lptr, virtual_dma_port + FD_DATA); else - *lptr = inb_p(virtual_dma_port+5); + *lptr = inb_p(virtual_dma_port + FD_DATA); } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = inb(virtual_dma_port+4); + st = inb(virtual_dma_port + FD_STATUS); - if (st == 0x20) + if (st == STATUS_DMA) return IRQ_HANDLED; - if (!(st & 0x20)) { + if (!(st & STATUS_DMA)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count=0; doing_vdma = 0; diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 635969b5b58e..13f90dd03450 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -699,10 +699,6 @@ static inline void iosync(void) * * * iounmap undoes such a mapping and can be hooked * - * * __ioremap_at (and the pending __iounmap_at) are low level functions to - * create hand-made mappings for use only by the PCI code and cannot - * currently be hooked. Must be page aligned. - * * * __ioremap_caller is the same as above but takes an explicit caller * reference rather than using __builtin_return_address(0) * @@ -719,6 +715,8 @@ void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size); extern void iounmap(volatile void __iomem *addr); +void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size); + int early_ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot); void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, @@ -727,10 +725,6 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size, extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size, pgprot_t prot, void *caller); -extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea, - unsigned long size, pgprot_t prot); -extern void __iounmap_at(void *ea, unsigned long size); - /* * When CONFIG_PPC_INDIRECT_PIO is set, we use the generic iomap implementation * which needs some additional definitions here. They basically allow PIO diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 506e4df2d730..6e5d85ba588d 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -78,7 +78,7 @@ struct kvmppc_vcore { struct kvm_vcpu *runnable_threads[MAX_SMT_THREADS]; struct list_head preempt_list; spinlock_t lock; - struct swait_queue_head wq; + struct rcuwait wait; spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */ u64 stolen_tb; u64 preempt_tb; diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1dc63101ffe1..337047ba4a56 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -751,7 +751,7 @@ struct kvm_vcpu_arch { u8 irq_pending; /* Used by XIVE to signal pending guest irqs */ u32 last_inst; - struct swait_queue_head *wqp; + struct rcuwait *waitp; struct kvmppc_vcore *vcore; int ret; int trap; diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h index 69f4cb3b7c56..b92e81b256e5 100644 --- a/arch/powerpc/include/asm/pci-bridge.h +++ b/arch/powerpc/include/asm/pci-bridge.h @@ -66,7 +66,7 @@ struct pci_controller { void __iomem *io_base_virt; #ifdef CONFIG_PPC64 - void *io_base_alloc; + void __iomem *io_base_alloc; #endif resource_size_t io_base_phys; resource_size_t pci_io_size; diff --git a/arch/powerpc/include/uapi/asm/kvm_para.h b/arch/powerpc/include/uapi/asm/kvm_para.h index be48c2215fa2..a809b1b44ddf 100644 --- a/arch/powerpc/include/uapi/asm/kvm_para.h +++ b/arch/powerpc/include/uapi/asm/kvm_para.h @@ -31,7 +31,7 @@ * Struct fields are always 32 or 64 bit aligned, depending on them being 32 * or 64 bit wide respectively. * - * See Documentation/virt/kvm/ppc-pv.txt + * See Documentation/virt/kvm/ppc-pv.rst */ struct kvm_vcpu_arch_shared { __u64 scratch1; diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 1f1169856dc8..112d150354b2 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -748,9 +748,8 @@ void do_IRQ(struct pt_regs *regs) static void *__init alloc_vm_stack(void) { - return __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN, VMALLOC_START, - VMALLOC_END, THREADINFO_GFP, PAGE_KERNEL, - 0, NUMA_NO_NODE, (void*)_RET_IP_); + return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP, + NUMA_NO_NODE, (void *)_RET_IP_); } static void __init vmap_irqstack_init(void) diff --git a/arch/powerpc/kernel/isa-bridge.c b/arch/powerpc/kernel/isa-bridge.c index 773671b512df..2257d24e6a26 100644 --- a/arch/powerpc/kernel/isa-bridge.c +++ b/arch/powerpc/kernel/isa-bridge.c @@ -18,6 +18,7 @@ #include <linux/init.h> #include <linux/mm.h> #include <linux/notifier.h> +#include <linux/vmalloc.h> #include <asm/processor.h> #include <asm/io.h> @@ -38,6 +39,22 @@ EXPORT_SYMBOL_GPL(isa_bridge_pcidev); #define ISA_SPACE_MASK 0x1 #define ISA_SPACE_IO 0x1 +static void remap_isa_base(phys_addr_t pa, unsigned long size) +{ + WARN_ON_ONCE(ISA_IO_BASE & ~PAGE_MASK); + WARN_ON_ONCE(pa & ~PAGE_MASK); + WARN_ON_ONCE(size & ~PAGE_MASK); + + if (slab_is_available()) { + if (ioremap_page_range(ISA_IO_BASE, ISA_IO_BASE + size, pa, + pgprot_noncached(PAGE_KERNEL))) + unmap_kernel_range(ISA_IO_BASE, size); + } else { + early_ioremap_range(ISA_IO_BASE, pa, size, + pgprot_noncached(PAGE_KERNEL)); + } +} + static void pci_process_ISA_OF_ranges(struct device_node *isa_node, unsigned long phb_io_base_phys) { @@ -105,15 +122,13 @@ static void pci_process_ISA_OF_ranges(struct device_node *isa_node, if (size > 0x10000) size = 0x10000; - __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, - size, pgprot_noncached(PAGE_KERNEL)); + remap_isa_base(phb_io_base_phys, size); return; inval_range: printk(KERN_ERR "no ISA IO ranges or unexpected isa range, " "mapping 64k\n"); - __ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE, - 0x10000, pgprot_noncached(PAGE_KERNEL)); + remap_isa_base(phb_io_base_phys, 0x10000); } @@ -248,8 +263,7 @@ void __init isa_bridge_init_non_pci(struct device_node *np) * and map it */ isa_io_base = ISA_IO_BASE; - __ioremap_at(pbase, (void *)ISA_IO_BASE, - size, pgprot_noncached(PAGE_KERNEL)); + remap_isa_base(pbase, size); pr_debug("ISA: Non-PCI bridge is %pOF\n", np); } @@ -297,7 +311,7 @@ static void isa_bridge_remove(void) isa_bridge_pcidev = NULL; /* Unmap the ISA area */ - __iounmap_at((void *)ISA_IO_BASE, 0x10000); + unmap_kernel_range(ISA_IO_BASE, 0x10000); } /** diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index fb4f61096613..0cd1c88bfc8b 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -655,9 +655,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, int rc = -1; switch (reason) { - case KMSG_DUMP_RESTART: - case KMSG_DUMP_HALT: - case KMSG_DUMP_POWEROFF: + case KMSG_DUMP_SHUTDOWN: /* These are almost always orderly shutdowns. */ return; case KMSG_DUMP_OOPS: diff --git a/arch/powerpc/kernel/pci_64.c b/arch/powerpc/kernel/pci_64.c index f83d1f69b1dd..d9ac980c398c 100644 --- a/arch/powerpc/kernel/pci_64.c +++ b/arch/powerpc/kernel/pci_64.c @@ -109,23 +109,47 @@ int pcibios_unmap_io_space(struct pci_bus *bus) /* Get the host bridge */ hose = pci_bus_to_host(bus); - /* Check if we have IOs allocated */ - if (hose->io_base_alloc == NULL) - return 0; - pr_debug("IO unmapping for PHB %pOF\n", hose->dn); pr_debug(" alloc=0x%p\n", hose->io_base_alloc); - /* This is a PHB, we fully unmap the IO area */ - vunmap(hose->io_base_alloc); - + iounmap(hose->io_base_alloc); return 0; } EXPORT_SYMBOL_GPL(pcibios_unmap_io_space); -static int pcibios_map_phb_io_space(struct pci_controller *hose) +void __iomem *ioremap_phb(phys_addr_t paddr, unsigned long size) { struct vm_struct *area; + unsigned long addr; + + WARN_ON_ONCE(paddr & ~PAGE_MASK); + WARN_ON_ONCE(size & ~PAGE_MASK); + + /* + * Let's allocate some IO space for that guy. We don't pass VM_IOREMAP + * because we don't care about alignment tricks that the core does in + * that case. Maybe we should due to stupid card with incomplete + * address decoding but I'd rather not deal with those outside of the + * reserved 64K legacy region. + */ + area = __get_vm_area_caller(size, 0, PHB_IO_BASE, PHB_IO_END, + __builtin_return_address(0)); + if (!area) + return NULL; + + addr = (unsigned long)area->addr; + if (ioremap_page_range(addr, addr + size, paddr, + pgprot_noncached(PAGE_KERNEL))) { + unmap_kernel_range(addr, size); + return NULL; + } + + return (void __iomem *)addr; +} +EXPORT_SYMBOL_GPL(ioremap_phb); + +static int pcibios_map_phb_io_space(struct pci_controller *hose) +{ unsigned long phys_page; unsigned long size_page; unsigned long io_virt_offset; @@ -146,12 +170,11 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose) * with incomplete address decoding but I'd rather not deal with * those outside of the reserved 64K legacy region. */ - area = __get_vm_area(size_page, 0, PHB_IO_BASE, PHB_IO_END); - if (area == NULL) + hose->io_base_alloc = ioremap_phb(phys_page, size_page); + if (!hose->io_base_alloc) return -ENOMEM; - hose->io_base_alloc = area->addr; - hose->io_base_virt = (void __iomem *)(area->addr + - hose->io_base_phys - phys_page); + hose->io_base_virt = hose->io_base_alloc + + hose->io_base_phys - phys_page; pr_debug("IO mapping for PHB %pOF\n", hose->dn); pr_debug(" phys=0x%016llx, virt=0x%p (alloc=0x%p)\n", @@ -159,11 +182,6 @@ static int pcibios_map_phb_io_space(struct pci_controller *hose) pr_debug(" size=0x%016llx (alloc=0x%016lx)\n", hose->pci_io_size, size_page); - /* Establish the mapping */ - if (__ioremap_at(phys_page, area->addr, size_page, - pgprot_noncached(PAGE_KERNEL)) == NULL) - return -ENOMEM; - /* Fixup hose IO resource */ io_virt_offset = pcibios_io_space_offset(hose); hose->io_resource.start += io_virt_offset; diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 220ae11555f2..f833a3190822 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -527,3 +527,4 @@ 435 spu clone3 sys_ni_syscall 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 3fca22276bb1..b44dd75de517 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -441,15 +441,9 @@ nonrecoverable: void system_reset_exception(struct pt_regs *regs) { unsigned long hsrr0, hsrr1; - bool nested = in_nmi(); bool saved_hsrrs = false; - /* - * Avoid crashes in case of nested NMI exceptions. Recoverability - * is determined by RI and in_nmi - */ - if (!nested) - nmi_enter(); + nmi_enter(); /* * System reset can interrupt code where HSRRs are live and MSR[RI]=1. @@ -521,8 +515,7 @@ out: mtspr(SPRN_HSRR1, hsrr1); } - if (!nested) - nmi_exit(); + nmi_exit(); /* What should we do here? We could issue a shutdown or hard reset. */ } @@ -823,9 +816,8 @@ int machine_check_generic(struct pt_regs *regs) void machine_check_exception(struct pt_regs *regs) { int recover = 0; - bool nested = in_nmi(); - if (!nested) - nmi_enter(); + + nmi_enter(); __this_cpu_inc(irq_stat.mce_exceptions); @@ -851,8 +843,7 @@ void machine_check_exception(struct pt_regs *regs) if (check_io_access(regs)) goto bail; - if (!nested) - nmi_exit(); + nmi_exit(); die("Machine check", regs, SIGBUS); @@ -863,8 +854,7 @@ void machine_check_exception(struct pt_regs *regs) return; bail: - if (!nested) - nmi_exit(); + nmi_exit(); } void SMIException(struct pt_regs *regs) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 31a0f201fb6f..a1706b63b82d 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -90,6 +90,7 @@ SECTIONS #ifdef CONFIG_PPC64 *(.tramp.ftrace.text); #endif + NOINSTR_TEXT SCHED_TEXT CPUIDLE_TEXT LOCK_TEXT diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 5690a1f9b976..37508a356f28 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -36,41 +36,38 @@ #include "book3s.h" #include "trace.h" -#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ -#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ - /* #define EXIT_DEBUG */ struct kvm_stats_debugfs_item debugfs_entries[] = { - { "exits", VCPU_STAT(sum_exits) }, - { "mmio", VCPU_STAT(mmio_exits) }, - { "sig", VCPU_STAT(signal_exits) }, - { "sysc", VCPU_STAT(syscall_exits) }, - { "inst_emu", VCPU_STAT(emulated_inst_exits) }, - { "dec", VCPU_STAT(dec_exits) }, - { "ext_intr", VCPU_STAT(ext_intr_exits) }, - { "queue_intr", VCPU_STAT(queue_intr) }, - { "halt_poll_success_ns", VCPU_STAT(halt_poll_success_ns) }, - { "halt_poll_fail_ns", VCPU_STAT(halt_poll_fail_ns) }, - { "halt_wait_ns", VCPU_STAT(halt_wait_ns) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll), }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll), }, - { "halt_successful_wait", VCPU_STAT(halt_successful_wait) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "pf_storage", VCPU_STAT(pf_storage) }, - { "sp_storage", VCPU_STAT(sp_storage) }, - { "pf_instruc", VCPU_STAT(pf_instruc) }, - { "sp_instruc", VCPU_STAT(sp_instruc) }, - { "ld", VCPU_STAT(ld) }, - { "ld_slow", VCPU_STAT(ld_slow) }, - { "st", VCPU_STAT(st) }, - { "st_slow", VCPU_STAT(st_slow) }, - { "pthru_all", VCPU_STAT(pthru_all) }, - { "pthru_host", VCPU_STAT(pthru_host) }, - { "pthru_bad_aff", VCPU_STAT(pthru_bad_aff) }, - { "largepages_2M", VM_STAT(num_2M_pages, .mode = 0444) }, - { "largepages_1G", VM_STAT(num_1G_pages, .mode = 0444) }, + VCPU_STAT("exits", sum_exits), + VCPU_STAT("mmio", mmio_exits), + VCPU_STAT("sig", signal_exits), + VCPU_STAT("sysc", syscall_exits), + VCPU_STAT("inst_emu", emulated_inst_exits), + VCPU_STAT("dec", dec_exits), + VCPU_STAT("ext_intr", ext_intr_exits), + VCPU_STAT("queue_intr", queue_intr), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VCPU_STAT("halt_wait_ns", halt_wait_ns), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_successful_wait", halt_successful_wait), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("pf_storage", pf_storage), + VCPU_STAT("sp_storage", sp_storage), + VCPU_STAT("pf_instruc", pf_instruc), + VCPU_STAT("sp_instruc", sp_instruc), + VCPU_STAT("ld", ld), + VCPU_STAT("ld_slow", ld_slow), + VCPU_STAT("st", st), + VCPU_STAT("st_slow", st_slow), + VCPU_STAT("pthru_all", pthru_all), + VCPU_STAT("pthru_host", pthru_host), + VCPU_STAT("pthru_bad_aff", pthru_bad_aff), + VM_STAT("largepages_2M", num_2M_pages, .mode = 0444), + VM_STAT("largepages_1G", num_1G_pages, .mode = 0444), { NULL } }; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 93493f0cbfe8..7f59c47a5b9d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -230,13 +230,11 @@ static bool kvmppc_ipi_thread(int cpu) static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu) { int cpu; - struct swait_queue_head *wqp; + struct rcuwait *waitp; - wqp = kvm_arch_vcpu_wq(vcpu); - if (swq_has_sleeper(wqp)) { - swake_up_one(wqp); + waitp = kvm_arch_vcpu_get_wait(vcpu); + if (rcuwait_wake_up(waitp)) ++vcpu->stat.halt_wakeup; - } cpu = READ_ONCE(vcpu->arch.thread_cpu); if (cpu >= 0 && kvmppc_ipi_thread(cpu)) @@ -2125,7 +2123,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int id) spin_lock_init(&vcore->lock); spin_lock_init(&vcore->stoltb_lock); - init_swait_queue_head(&vcore->wq); + rcuwait_init(&vcore->wait); vcore->preempt_tb = TB_NIL; vcore->lpcr = kvm->arch.lpcr; vcore->first_vcpuid = id; @@ -3784,7 +3782,6 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) ktime_t cur, start_poll, start_wait; int do_sleep = 1; u64 block_ns; - DECLARE_SWAITQUEUE(wait); /* Poll for pending exceptions and ceded state */ cur = start_poll = ktime_get(); @@ -3812,10 +3809,10 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) } } - prepare_to_swait_exclusive(&vc->wq, &wait, TASK_INTERRUPTIBLE); - + prepare_to_rcuwait(&vc->wait); + set_current_state(TASK_INTERRUPTIBLE); if (kvmppc_vcore_check_block(vc)) { - finish_swait(&vc->wq, &wait); + finish_rcuwait(&vc->wait); do_sleep = 0; /* If we polled, count this as a successful poll */ if (vc->halt_poll_ns) @@ -3829,7 +3826,7 @@ static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc) trace_kvmppc_vcore_blocked(vc, 0); spin_unlock(&vc->lock); schedule(); - finish_swait(&vc->wq, &wait); + finish_rcuwait(&vc->wait); spin_lock(&vc->lock); vc->vcore_state = VCORE_INACTIVE; trace_kvmppc_vcore_blocked(vc, 1); @@ -3940,7 +3937,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) kvmppc_start_thread(vcpu, vc); trace_kvm_guest_enter(vcpu); } else if (vc->vcore_state == VCORE_SLEEPING) { - swake_up_one(&vc->wq); + rcuwait_wake_up(&vc->wait); } } @@ -4279,7 +4276,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu) } user_vrsave = mfspr(SPRN_VRSAVE); - vcpu->arch.wqp = &vcpu->arch.vcore->wq; + vcpu->arch.waitp = &vcpu->arch.vcore->wait; vcpu->arch.pgdir = kvm->mm->pgd; vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST; diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 6c18ea88fd25..888afe8d35cc 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -35,29 +35,28 @@ unsigned long kvmppc_booke_handlers; -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU - struct kvm_stats_debugfs_item debugfs_entries[] = { - { "mmio", VCPU_STAT(mmio_exits) }, - { "sig", VCPU_STAT(signal_exits) }, - { "itlb_r", VCPU_STAT(itlb_real_miss_exits) }, - { "itlb_v", VCPU_STAT(itlb_virt_miss_exits) }, - { "dtlb_r", VCPU_STAT(dtlb_real_miss_exits) }, - { "dtlb_v", VCPU_STAT(dtlb_virt_miss_exits) }, - { "sysc", VCPU_STAT(syscall_exits) }, - { "isi", VCPU_STAT(isi_exits) }, - { "dsi", VCPU_STAT(dsi_exits) }, - { "inst_emu", VCPU_STAT(emulated_inst_exits) }, - { "dec", VCPU_STAT(dec_exits) }, - { "ext_intr", VCPU_STAT(ext_intr_exits) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "doorbell", VCPU_STAT(dbell_exits) }, - { "guest doorbell", VCPU_STAT(gdbell_exits) }, - { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, + VCPU_STAT("mmio", mmio_exits), + VCPU_STAT("sig", signal_exits), + VCPU_STAT("itlb_r", itlb_real_miss_exits), + VCPU_STAT("itlb_v", itlb_virt_miss_exits), + VCPU_STAT("dtlb_r", dtlb_real_miss_exits), + VCPU_STAT("dtlb_v", dtlb_virt_miss_exits), + VCPU_STAT("sysc", syscall_exits), + VCPU_STAT("isi", isi_exits), + VCPU_STAT("dsi", dsi_exits), + VCPU_STAT("inst_emu", emulated_inst_exits), + VCPU_STAT("dec", dec_exits), + VCPU_STAT("ext_intr", ext_intr_exits), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("doorbell", dbell_exits), + VCPU_STAT("guest doorbell", gdbell_exits), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VM_STAT("remote_tlb_flush", remote_tlb_flush), { NULL } }; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index ad2f172c26a6..27ccff612903 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -752,7 +752,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) if (err) goto out_vcpu_uninit; - vcpu->arch.wqp = &vcpu->wq; + vcpu->arch.waitp = &vcpu->wait; kvmppc_create_vcpu_debugfs(vcpu, vcpu->vcpu_id); return 0; @@ -1765,8 +1765,9 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) return r; } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_run *run = vcpu->run; int r; vcpu_load(vcpu); diff --git a/arch/powerpc/mm/ioremap_64.c b/arch/powerpc/mm/ioremap_64.c index 50a99d9684f7..ba5cbb0d66bd 100644 --- a/arch/powerpc/mm/ioremap_64.c +++ b/arch/powerpc/mm/ioremap_64.c @@ -4,56 +4,6 @@ #include <linux/slab.h> #include <linux/vmalloc.h> -/** - * Low level function to establish the page tables for an IO mapping - */ -void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot) -{ - int ret; - unsigned long va = (unsigned long)ea; - - /* We don't support the 4K PFN hack with ioremap */ - if (pgprot_val(prot) & H_PAGE_4K_PFN) - return NULL; - - if ((ea + size) >= (void *)IOREMAP_END) { - pr_warn("Outside the supported range\n"); - return NULL; - } - - WARN_ON(pa & ~PAGE_MASK); - WARN_ON(((unsigned long)ea) & ~PAGE_MASK); - WARN_ON(size & ~PAGE_MASK); - - if (slab_is_available()) { - ret = ioremap_page_range(va, va + size, pa, prot); - if (ret) - unmap_kernel_range(va, size); - } else { - ret = early_ioremap_range(va, pa, size, prot); - } - - if (ret) - return NULL; - - return (void __iomem *)ea; -} -EXPORT_SYMBOL(__ioremap_at); - -/** - * Low level function to tear down the page tables for an IO mapping. This is - * used for mappings that are manipulated manually, like partial unmapping of - * PCI IOs or ISA space. - */ -void __iounmap_at(void *ea, unsigned long size) -{ - WARN_ON(((unsigned long)ea) & ~PAGE_MASK); - WARN_ON(size & ~PAGE_MASK); - - unmap_kernel_range((unsigned long)ea, size); -} -EXPORT_SYMBOL(__iounmap_at); - void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller) { diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index eb82dda884e5..0edcfd0b491d 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -976,7 +976,7 @@ static int thread_imc_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - if (!capable(CAP_SYS_ADMIN)) + if (!perfmon_capable()) return -EACCES; /* Sampling not supported */ @@ -1412,7 +1412,7 @@ static int trace_imc_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - if (!capable(CAP_SYS_ADMIN)) + if (!perfmon_capable()) return -EACCES; /* Return if this is a couting event */ diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c index 8b3296b62f65..3b75e8f60609 100644 --- a/arch/powerpc/platforms/cell/spufs/coredump.c +++ b/arch/powerpc/platforms/cell/spufs/coredump.c @@ -21,22 +21,6 @@ #include "spufs.h" -static ssize_t do_coredump_read(int num, struct spu_context *ctx, void *buffer, - size_t size, loff_t *off) -{ - u64 data; - int ret; - - if (spufs_coredump_read[num].read) - return spufs_coredump_read[num].read(ctx, buffer, size, off); - - data = spufs_coredump_read[num].get(ctx); - ret = snprintf(buffer, size, "0x%.16llx", data); - if (ret >= size) - return size; - return ++ret; /* count trailing NULL */ -} - static int spufs_ctx_note_size(struct spu_context *ctx, int dfd) { int i, sz, total = 0; @@ -118,58 +102,43 @@ int spufs_coredump_extra_notes_size(void) static int spufs_arch_write_note(struct spu_context *ctx, int i, struct coredump_params *cprm, int dfd) { - loff_t pos = 0; - int sz, rc, total = 0; - const int bufsz = PAGE_SIZE; - char *name; - char fullname[80], *buf; + size_t sz = spufs_coredump_read[i].size; + char fullname[80]; struct elf_note en; - size_t skip; - - buf = (void *)get_zeroed_page(GFP_KERNEL); - if (!buf) - return -ENOMEM; + size_t ret; - name = spufs_coredump_read[i].name; - sz = spufs_coredump_read[i].size; - - sprintf(fullname, "SPU/%d/%s", dfd, name); + sprintf(fullname, "SPU/%d/%s", dfd, spufs_coredump_read[i].name); en.n_namesz = strlen(fullname) + 1; en.n_descsz = sz; en.n_type = NT_SPU; if (!dump_emit(cprm, &en, sizeof(en))) - goto Eio; - + return -EIO; if (!dump_emit(cprm, fullname, en.n_namesz)) - goto Eio; - + return -EIO; if (!dump_align(cprm, 4)) - goto Eio; - - do { - rc = do_coredump_read(i, ctx, buf, bufsz, &pos); - if (rc > 0) { - if (!dump_emit(cprm, buf, rc)) - goto Eio; - total += rc; - } - } while (rc == bufsz && total < sz); - - if (rc < 0) - goto out; - - skip = roundup(cprm->pos - total + sz, 4) - cprm->pos; - if (!dump_skip(cprm, skip)) - goto Eio; - - rc = 0; -out: - free_page((unsigned long)buf); - return rc; -Eio: - free_page((unsigned long)buf); - return -EIO; + return -EIO; + + if (spufs_coredump_read[i].dump) { + ret = spufs_coredump_read[i].dump(ctx, cprm); + if (ret < 0) + return ret; + } else { + char buf[32]; + + ret = snprintf(buf, sizeof(buf), "0x%.16llx", + spufs_coredump_read[i].get(ctx)); + if (ret >= sizeof(buf)) + return sizeof(buf); + + /* count trailing the NULL: */ + if (!dump_emit(cprm, buf, ret + 1)) + return -EIO; + } + + if (!dump_skip(cprm, roundup(cprm->pos - ret + sz, 4) - cprm->pos)) + return -EIO; + return 0; } int spufs_coredump_extra_notes_write(struct coredump_params *cprm) diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c index c0f950a3f4e1..e44427c24585 100644 --- a/arch/powerpc/platforms/cell/spufs/file.c +++ b/arch/powerpc/platforms/cell/spufs/file.c @@ -9,6 +9,7 @@ #undef DEBUG +#include <linux/coredump.h> #include <linux/fs.h> #include <linux/ioctl.h> #include <linux/export.h> @@ -129,6 +130,14 @@ out: return ret; } +static ssize_t spufs_dump_emit(struct coredump_params *cprm, void *buf, + size_t size) +{ + if (!dump_emit(cprm, buf, size)) + return -EIO; + return size; +} + #define DEFINE_SPUFS_SIMPLE_ATTRIBUTE(__fops, __get, __set, __fmt) \ static int __fops ## _open(struct inode *inode, struct file *file) \ { \ @@ -172,12 +181,9 @@ spufs_mem_release(struct inode *inode, struct file *file) } static ssize_t -__spufs_mem_read(struct spu_context *ctx, char __user *buffer, - size_t size, loff_t *pos) +spufs_mem_dump(struct spu_context *ctx, struct coredump_params *cprm) { - char *local_store = ctx->ops->get_ls(ctx); - return simple_read_from_buffer(buffer, size, pos, local_store, - LS_SIZE); + return spufs_dump_emit(cprm, ctx->ops->get_ls(ctx), LS_SIZE); } static ssize_t @@ -190,7 +196,8 @@ spufs_mem_read(struct file *file, char __user *buffer, ret = spu_acquire(ctx); if (ret) return ret; - ret = __spufs_mem_read(ctx, buffer, size, pos); + ret = simple_read_from_buffer(buffer, size, pos, ctx->ops->get_ls(ctx), + LS_SIZE); spu_release(ctx); return ret; @@ -459,12 +466,10 @@ spufs_regs_open(struct inode *inode, struct file *file) } static ssize_t -__spufs_regs_read(struct spu_context *ctx, char __user *buffer, - size_t size, loff_t *pos) +spufs_regs_dump(struct spu_context *ctx, struct coredump_params *cprm) { - struct spu_lscsa *lscsa = ctx->csa.lscsa; - return simple_read_from_buffer(buffer, size, pos, - lscsa->gprs, sizeof lscsa->gprs); + return spufs_dump_emit(cprm, ctx->csa.lscsa->gprs, + sizeof(ctx->csa.lscsa->gprs)); } static ssize_t @@ -482,7 +487,8 @@ spufs_regs_read(struct file *file, char __user *buffer, ret = spu_acquire_saved(ctx); if (ret) return ret; - ret = __spufs_regs_read(ctx, buffer, size, pos); + ret = simple_read_from_buffer(buffer, size, pos, ctx->csa.lscsa->gprs, + sizeof(ctx->csa.lscsa->gprs)); spu_release_saved(ctx); return ret; } @@ -517,12 +523,10 @@ static const struct file_operations spufs_regs_fops = { }; static ssize_t -__spufs_fpcr_read(struct spu_context *ctx, char __user * buffer, - size_t size, loff_t * pos) +spufs_fpcr_dump(struct spu_context *ctx, struct coredump_params *cprm) { - struct spu_lscsa *lscsa = ctx->csa.lscsa; - return simple_read_from_buffer(buffer, size, pos, - &lscsa->fpcr, sizeof(lscsa->fpcr)); + return spufs_dump_emit(cprm, &ctx->csa.lscsa->fpcr, + sizeof(ctx->csa.lscsa->fpcr)); } static ssize_t @@ -535,7 +539,8 @@ spufs_fpcr_read(struct file *file, char __user * buffer, ret = spu_acquire_saved(ctx); if (ret) return ret; - ret = __spufs_fpcr_read(ctx, buffer, size, pos); + ret = simple_read_from_buffer(buffer, size, pos, &ctx->csa.lscsa->fpcr, + sizeof(ctx->csa.lscsa->fpcr)); spu_release_saved(ctx); return ret; } @@ -590,17 +595,12 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; - u32 mbox_data, __user *udata; + u32 mbox_data, __user *udata = (void __user *)buf; ssize_t count; if (len < 4) return -EINVAL; - if (!access_ok(buf, len)) - return -EFAULT; - - udata = (void __user *)buf; - count = spu_acquire(ctx); if (count) return count; @@ -616,7 +616,7 @@ static ssize_t spufs_mbox_read(struct file *file, char __user *buf, * but still need to return the data we have * read successfully so far. */ - ret = __put_user(mbox_data, udata); + ret = put_user(mbox_data, udata); if (ret) { if (!count) count = -EFAULT; @@ -698,17 +698,12 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; - u32 ibox_data, __user *udata; + u32 ibox_data, __user *udata = (void __user *)buf; ssize_t count; if (len < 4) return -EINVAL; - if (!access_ok(buf, len)) - return -EFAULT; - - udata = (void __user *)buf; - count = spu_acquire(ctx); if (count) goto out; @@ -727,7 +722,7 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf, } /* if we can't write at all, return -EFAULT */ - count = __put_user(ibox_data, udata); + count = put_user(ibox_data, udata); if (count) goto out_unlock; @@ -741,7 +736,7 @@ static ssize_t spufs_ibox_read(struct file *file, char __user *buf, * but still need to return the data we have * read successfully so far. */ - ret = __put_user(ibox_data, udata); + ret = put_user(ibox_data, udata); if (ret) break; } @@ -836,17 +831,13 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; - u32 wbox_data, __user *udata; + u32 wbox_data, __user *udata = (void __user *)buf; ssize_t count; if (len < 4) return -EINVAL; - udata = (void __user *)buf; - if (!access_ok(buf, len)) - return -EFAULT; - - if (__get_user(wbox_data, udata)) + if (get_user(wbox_data, udata)) return -EFAULT; count = spu_acquire(ctx); @@ -873,7 +864,7 @@ static ssize_t spufs_wbox_write(struct file *file, const char __user *buf, /* write as much as possible */ for (count = 4, udata++; (count + 4) <= len; count += 4, udata++) { int ret; - ret = __get_user(wbox_data, udata); + ret = get_user(wbox_data, udata); if (ret) break; @@ -967,28 +958,26 @@ spufs_signal1_release(struct inode *inode, struct file *file) return 0; } -static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf, - size_t len, loff_t *pos) +static ssize_t spufs_signal1_dump(struct spu_context *ctx, + struct coredump_params *cprm) { - int ret = 0; - u32 data; + if (!ctx->csa.spu_chnlcnt_RW[3]) + return 0; + return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[3], + sizeof(ctx->csa.spu_chnldata_RW[3])); +} - if (len < 4) +static ssize_t __spufs_signal1_read(struct spu_context *ctx, char __user *buf, + size_t len) +{ + if (len < sizeof(ctx->csa.spu_chnldata_RW[3])) return -EINVAL; - - if (ctx->csa.spu_chnlcnt_RW[3]) { - data = ctx->csa.spu_chnldata_RW[3]; - ret = 4; - } - - if (!ret) - goto out; - - if (copy_to_user(buf, &data, 4)) + if (!ctx->csa.spu_chnlcnt_RW[3]) + return 0; + if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[3], + sizeof(ctx->csa.spu_chnldata_RW[3]))) return -EFAULT; - -out: - return ret; + return sizeof(ctx->csa.spu_chnldata_RW[3]); } static ssize_t spufs_signal1_read(struct file *file, char __user *buf, @@ -1000,7 +989,7 @@ static ssize_t spufs_signal1_read(struct file *file, char __user *buf, ret = spu_acquire_saved(ctx); if (ret) return ret; - ret = __spufs_signal1_read(ctx, buf, len, pos); + ret = __spufs_signal1_read(ctx, buf, len); spu_release_saved(ctx); return ret; @@ -1104,28 +1093,26 @@ spufs_signal2_release(struct inode *inode, struct file *file) return 0; } -static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf, - size_t len, loff_t *pos) +static ssize_t spufs_signal2_dump(struct spu_context *ctx, + struct coredump_params *cprm) { - int ret = 0; - u32 data; + if (!ctx->csa.spu_chnlcnt_RW[4]) + return 0; + return spufs_dump_emit(cprm, &ctx->csa.spu_chnldata_RW[4], + sizeof(ctx->csa.spu_chnldata_RW[4])); +} - if (len < 4) +static ssize_t __spufs_signal2_read(struct spu_context *ctx, char __user *buf, + size_t len) +{ + if (len < sizeof(ctx->csa.spu_chnldata_RW[4])) return -EINVAL; - - if (ctx->csa.spu_chnlcnt_RW[4]) { - data = ctx->csa.spu_chnldata_RW[4]; - ret = 4; - } - - if (!ret) - goto out; - - if (copy_to_user(buf, &data, 4)) + if (!ctx->csa.spu_chnlcnt_RW[4]) + return 0; + if (copy_to_user(buf, &ctx->csa.spu_chnldata_RW[4], + sizeof(ctx->csa.spu_chnldata_RW[4]))) return -EFAULT; - -out: - return ret; + return sizeof(ctx->csa.spu_chnldata_RW[4]); } static ssize_t spufs_signal2_read(struct file *file, char __user *buf, @@ -1137,7 +1124,7 @@ static ssize_t spufs_signal2_read(struct file *file, char __user *buf, ret = spu_acquire_saved(ctx); if (ret) return ret; - ret = __spufs_signal2_read(ctx, buf, len, pos); + ret = __spufs_signal2_read(ctx, buf, len); spu_release_saved(ctx); return ret; @@ -1961,38 +1948,36 @@ static const struct file_operations spufs_caps_fops = { .release = single_release, }; -static ssize_t __spufs_mbox_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static ssize_t spufs_mbox_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) { - u32 data; - - /* EOF if there's no entry in the mbox */ if (!(ctx->csa.prob.mb_stat_R & 0x0000ff)) return 0; - - data = ctx->csa.prob.pu_mb_R; - - return simple_read_from_buffer(buf, len, pos, &data, sizeof data); + return spufs_dump_emit(cprm, &ctx->csa.prob.pu_mb_R, + sizeof(ctx->csa.prob.pu_mb_R)); } static ssize_t spufs_mbox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { - int ret; struct spu_context *ctx = file->private_data; - - if (!access_ok(buf, len)) - return -EFAULT; + u32 stat, data; + int ret; ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_mbox_info_read(ctx, buf, len, pos); + stat = ctx->csa.prob.mb_stat_R; + data = ctx->csa.prob.pu_mb_R; spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + /* EOF if there's no entry in the mbox */ + if (!(stat & 0x0000ff)) + return 0; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof(data)); } static const struct file_operations spufs_mbox_info_fops = { @@ -2001,38 +1986,36 @@ static const struct file_operations spufs_mbox_info_fops = { .llseek = generic_file_llseek, }; -static ssize_t __spufs_ibox_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static ssize_t spufs_ibox_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) { - u32 data; - - /* EOF if there's no entry in the ibox */ if (!(ctx->csa.prob.mb_stat_R & 0xff0000)) return 0; - - data = ctx->csa.priv2.puint_mb_R; - - return simple_read_from_buffer(buf, len, pos, &data, sizeof data); + return spufs_dump_emit(cprm, &ctx->csa.priv2.puint_mb_R, + sizeof(ctx->csa.priv2.puint_mb_R)); } static ssize_t spufs_ibox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + u32 stat, data; int ret; - if (!access_ok(buf, len)) - return -EFAULT; - ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_ibox_info_read(ctx, buf, len, pos); + stat = ctx->csa.prob.mb_stat_R; + data = ctx->csa.priv2.puint_mb_R; spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + /* EOF if there's no entry in the ibox */ + if (!(stat & 0xff0000)) + return 0; + + return simple_read_from_buffer(buf, len, pos, &data, sizeof(data)); } static const struct file_operations spufs_ibox_info_fops = { @@ -2041,41 +2024,36 @@ static const struct file_operations spufs_ibox_info_fops = { .llseek = generic_file_llseek, }; -static ssize_t __spufs_wbox_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static size_t spufs_wbox_info_cnt(struct spu_context *ctx) { - int i, cnt; - u32 data[4]; - u32 wbox_stat; - - wbox_stat = ctx->csa.prob.mb_stat_R; - cnt = 4 - ((wbox_stat & 0x00ff00) >> 8); - for (i = 0; i < cnt; i++) { - data[i] = ctx->csa.spu_mailbox_data[i]; - } + return (4 - ((ctx->csa.prob.mb_stat_R & 0x00ff00) >> 8)) * sizeof(u32); +} - return simple_read_from_buffer(buf, len, pos, &data, - cnt * sizeof(u32)); +static ssize_t spufs_wbox_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + return spufs_dump_emit(cprm, &ctx->csa.spu_mailbox_data, + spufs_wbox_info_cnt(ctx)); } static ssize_t spufs_wbox_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; - int ret; - - if (!access_ok(buf, len)) - return -EFAULT; + u32 data[ARRAY_SIZE(ctx->csa.spu_mailbox_data)]; + int ret, count; ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_wbox_info_read(ctx, buf, len, pos); + count = spufs_wbox_info_cnt(ctx); + memcpy(&data, &ctx->csa.spu_mailbox_data, sizeof(data)); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &data, + count * sizeof(u32)); } static const struct file_operations spufs_wbox_info_fops = { @@ -2084,50 +2062,53 @@ static const struct file_operations spufs_wbox_info_fops = { .llseek = generic_file_llseek, }; -static ssize_t __spufs_dma_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static void spufs_get_dma_info(struct spu_context *ctx, + struct spu_dma_info *info) { - struct spu_dma_info info; - struct mfc_cq_sr *qp, *spuqp; int i; - info.dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW; - info.dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0]; - info.dma_info_status = ctx->csa.spu_chnldata_RW[24]; - info.dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25]; - info.dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27]; + info->dma_info_type = ctx->csa.priv2.spu_tag_status_query_RW; + info->dma_info_mask = ctx->csa.lscsa->tag_mask.slot[0]; + info->dma_info_status = ctx->csa.spu_chnldata_RW[24]; + info->dma_info_stall_and_notify = ctx->csa.spu_chnldata_RW[25]; + info->dma_info_atomic_command_status = ctx->csa.spu_chnldata_RW[27]; for (i = 0; i < 16; i++) { - qp = &info.dma_info_command_data[i]; - spuqp = &ctx->csa.priv2.spuq[i]; + struct mfc_cq_sr *qp = &info->dma_info_command_data[i]; + struct mfc_cq_sr *spuqp = &ctx->csa.priv2.spuq[i]; qp->mfc_cq_data0_RW = spuqp->mfc_cq_data0_RW; qp->mfc_cq_data1_RW = spuqp->mfc_cq_data1_RW; qp->mfc_cq_data2_RW = spuqp->mfc_cq_data2_RW; qp->mfc_cq_data3_RW = spuqp->mfc_cq_data3_RW; } +} - return simple_read_from_buffer(buf, len, pos, &info, - sizeof info); +static ssize_t spufs_dma_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + struct spu_dma_info info; + + spufs_get_dma_info(ctx, &info); + return spufs_dump_emit(cprm, &info, sizeof(info)); } static ssize_t spufs_dma_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + struct spu_dma_info info; int ret; - if (!access_ok(buf, len)) - return -EFAULT; - ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_dma_info_read(ctx, buf, len, pos); + spufs_get_dma_info(ctx, &info); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &info, + sizeof(info)); } static const struct file_operations spufs_dma_info_fops = { @@ -2136,52 +2117,55 @@ static const struct file_operations spufs_dma_info_fops = { .llseek = no_llseek, }; -static ssize_t __spufs_proxydma_info_read(struct spu_context *ctx, - char __user *buf, size_t len, loff_t *pos) +static void spufs_get_proxydma_info(struct spu_context *ctx, + struct spu_proxydma_info *info) { - struct spu_proxydma_info info; - struct mfc_cq_sr *qp, *puqp; - int ret = sizeof info; int i; - if (len < ret) - return -EINVAL; - - if (!access_ok(buf, len)) - return -EFAULT; + info->proxydma_info_type = ctx->csa.prob.dma_querytype_RW; + info->proxydma_info_mask = ctx->csa.prob.dma_querymask_RW; + info->proxydma_info_status = ctx->csa.prob.dma_tagstatus_R; - info.proxydma_info_type = ctx->csa.prob.dma_querytype_RW; - info.proxydma_info_mask = ctx->csa.prob.dma_querymask_RW; - info.proxydma_info_status = ctx->csa.prob.dma_tagstatus_R; for (i = 0; i < 8; i++) { - qp = &info.proxydma_info_command_data[i]; - puqp = &ctx->csa.priv2.puq[i]; + struct mfc_cq_sr *qp = &info->proxydma_info_command_data[i]; + struct mfc_cq_sr *puqp = &ctx->csa.priv2.puq[i]; qp->mfc_cq_data0_RW = puqp->mfc_cq_data0_RW; qp->mfc_cq_data1_RW = puqp->mfc_cq_data1_RW; qp->mfc_cq_data2_RW = puqp->mfc_cq_data2_RW; qp->mfc_cq_data3_RW = puqp->mfc_cq_data3_RW; } +} - return simple_read_from_buffer(buf, len, pos, &info, - sizeof info); +static ssize_t spufs_proxydma_info_dump(struct spu_context *ctx, + struct coredump_params *cprm) +{ + struct spu_proxydma_info info; + + spufs_get_proxydma_info(ctx, &info); + return spufs_dump_emit(cprm, &info, sizeof(info)); } static ssize_t spufs_proxydma_info_read(struct file *file, char __user *buf, size_t len, loff_t *pos) { struct spu_context *ctx = file->private_data; + struct spu_proxydma_info info; int ret; + if (len < sizeof(info)) + return -EINVAL; + ret = spu_acquire_saved(ctx); if (ret) return ret; spin_lock(&ctx->csa.register_lock); - ret = __spufs_proxydma_info_read(ctx, buf, len, pos); + spufs_get_proxydma_info(ctx, &info); spin_unlock(&ctx->csa.register_lock); spu_release_saved(ctx); - return ret; + return simple_read_from_buffer(buf, len, pos, &info, + sizeof(info)); } static const struct file_operations spufs_proxydma_info_fops = { @@ -2625,23 +2609,23 @@ const struct spufs_tree_descr spufs_dir_debug_contents[] = { }; const struct spufs_coredump_reader spufs_coredump_read[] = { - { "regs", __spufs_regs_read, NULL, sizeof(struct spu_reg128[128])}, - { "fpcr", __spufs_fpcr_read, NULL, sizeof(struct spu_reg128) }, + { "regs", spufs_regs_dump, NULL, sizeof(struct spu_reg128[128])}, + { "fpcr", spufs_fpcr_dump, NULL, sizeof(struct spu_reg128) }, { "lslr", NULL, spufs_lslr_get, 19 }, { "decr", NULL, spufs_decr_get, 19 }, { "decr_status", NULL, spufs_decr_status_get, 19 }, - { "mem", __spufs_mem_read, NULL, LS_SIZE, }, - { "signal1", __spufs_signal1_read, NULL, sizeof(u32) }, + { "mem", spufs_mem_dump, NULL, LS_SIZE, }, + { "signal1", spufs_signal1_dump, NULL, sizeof(u32) }, { "signal1_type", NULL, spufs_signal1_type_get, 19 }, - { "signal2", __spufs_signal2_read, NULL, sizeof(u32) }, + { "signal2", spufs_signal2_dump, NULL, sizeof(u32) }, { "signal2_type", NULL, spufs_signal2_type_get, 19 }, { "event_mask", NULL, spufs_event_mask_get, 19 }, { "event_status", NULL, spufs_event_status_get, 19 }, - { "mbox_info", __spufs_mbox_info_read, NULL, sizeof(u32) }, - { "ibox_info", __spufs_ibox_info_read, NULL, sizeof(u32) }, - { "wbox_info", __spufs_wbox_info_read, NULL, 4 * sizeof(u32)}, - { "dma_info", __spufs_dma_info_read, NULL, sizeof(struct spu_dma_info)}, - { "proxydma_info", __spufs_proxydma_info_read, + { "mbox_info", spufs_mbox_info_dump, NULL, sizeof(u32) }, + { "ibox_info", spufs_ibox_info_dump, NULL, sizeof(u32) }, + { "wbox_info", spufs_wbox_info_dump, NULL, 4 * sizeof(u32)}, + { "dma_info", spufs_dma_info_dump, NULL, sizeof(struct spu_dma_info)}, + { "proxydma_info", spufs_proxydma_info_dump, NULL, sizeof(struct spu_proxydma_info)}, { "object-id", NULL, spufs_object_id_get, 19 }, { "npc", NULL, spufs_npc_get, 19 }, diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index 413c89afe112..1ba4d884febf 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -337,8 +337,7 @@ void spufs_dma_callback(struct spu *spu, int type); extern struct spu_coredump_calls spufs_coredump_calls; struct spufs_coredump_reader { char *name; - ssize_t (*read)(struct spu_context *ctx, - char __user *buffer, size_t size, loff_t *pos); + ssize_t (*dump)(struct spu_context *ctx, struct coredump_params *cprm); u64 (*get)(struct spu_context *ctx); size_t size; }; diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index 13e251699346..b2ba3e95bda7 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -167,7 +167,6 @@ static void pnv_smp_cpu_kill_self(void) /* Standard hot unplug procedure */ idle_task_exit(); - current->active_mm = NULL; /* for sanity */ cpu = smp_processor_id(); DBG("CPU%d offline\n", cpu); generic_set_cpu_dead(cpu); diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 35b60035b6b0..d50706ea1c94 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -473,9 +473,9 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define PAGE_SHARED __pgprot(0) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL +#define TASK_SIZE 0xffffffffUL #define VMALLOC_START 0 - -#define TASK_SIZE 0xffffffffUL +#define VMALLOC_END TASK_SIZE static inline void __kernel_map_pages(struct page *page, int numpages, int enable) {} diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 7eab76a93106..070505d79b06 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -204,7 +204,7 @@ static void note_prot_wx(struct pg_state *st, unsigned long addr) } static void note_page(struct ptdump_state *pt_st, unsigned long addr, - int level, unsigned long val) + int level, u64 val) { struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); u64 pa = PFN_PHYS(pte_pfn(__pte(val))); diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c index 7c15542d3685..698b1e6d3c14 100644 --- a/arch/s390/crypto/sha1_s390.c +++ b/arch/s390/crypto/sha1_s390.c @@ -27,7 +27,7 @@ #include "sha.h" -static int sha1_init(struct shash_desc *desc) +static int s390_sha1_init(struct shash_desc *desc) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); @@ -42,7 +42,7 @@ static int sha1_init(struct shash_desc *desc) return 0; } -static int sha1_export(struct shash_desc *desc, void *out) +static int s390_sha1_export(struct shash_desc *desc, void *out) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); struct sha1_state *octx = out; @@ -53,7 +53,7 @@ static int sha1_export(struct shash_desc *desc, void *out) return 0; } -static int sha1_import(struct shash_desc *desc, const void *in) +static int s390_sha1_import(struct shash_desc *desc, const void *in) { struct s390_sha_ctx *sctx = shash_desc_ctx(desc); const struct sha1_state *ictx = in; @@ -67,11 +67,11 @@ static int sha1_import(struct shash_desc *desc, const void *in) static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, - .init = sha1_init, + .init = s390_sha1_init, .update = s390_sha_update, .final = s390_sha_final, - .export = sha1_export, - .import = sha1_import, + .export = s390_sha1_export, + .import = s390_sha1_import, .descsize = sizeof(struct s390_sha_ctx), .statesize = sizeof(struct sha1_state), .base = { diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h index 91e376b0d28c..6d01c96aeb5c 100644 --- a/arch/s390/include/asm/checksum.h +++ b/arch/s390/include/asm/checksum.h @@ -39,25 +39,6 @@ csum_partial(const void *buff, int len, __wsum sum) return sum; } -/* - * the same as csum_partial_copy, but copies from user space. - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - * - * Copy from userspace and compute checksum. - */ -static inline __wsum -csum_partial_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, - int *err_ptr) -{ - if (unlikely(copy_from_user(dst, src, len))) - *err_ptr = -EFAULT; - return csum_partial(dst, len, sum); -} - - static inline __wsum csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum) { diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index d6bcd34f3ec3..3d554887794e 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -375,6 +375,8 @@ struct kvm_vcpu_stat { u64 halt_poll_invalid; u64 halt_no_poll_steal; u64 halt_wakeup; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; u64 instruction_lctl; u64 instruction_lctlg; u64 instruction_stctl; @@ -971,7 +973,7 @@ struct kvm_arch_async_pf { unsigned long pfault_token; }; -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); @@ -982,6 +984,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); +static inline void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) {} + void kvm_arch_crypto_clear_masks(struct kvm *kvm); void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm, unsigned long *aqm, unsigned long *adm); diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 36445dd40fdb..0f0b140b5558 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -305,12 +305,9 @@ void *restart_stack __section(.data); unsigned long stack_alloc(void) { #ifdef CONFIG_VMAP_STACK - return (unsigned long) - __vmalloc_node_range(THREAD_SIZE, THREAD_SIZE, - VMALLOC_START, VMALLOC_END, - THREADINFO_GFP, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + return (unsigned long)__vmalloc_node(THREAD_SIZE, THREAD_SIZE, + THREADINFO_GFP, NUMA_NO_NODE, + __builtin_return_address(0)); #else return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER); #endif diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index bd7bd3581a0f..bfdcb7633957 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -440,3 +440,4 @@ 435 common clone3 sys_clone3 sys_clone3 437 common openat2 sys_openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 sys_faccessat2 diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index bfb481134994..a4d4ca2769bd 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -3082,7 +3082,7 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer) __airqs_kick_single_vcpu(kvm, pending_mask); hrtimer_forward_now(timer, ns_to_ktime(gi->expires)); return HRTIMER_RESTART; - }; + } return HRTIMER_NORESTART; } diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index d05bb040fd42..06bde4bad205 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -57,110 +57,109 @@ #define VCPU_IRQS_MAX_BUF (sizeof(struct kvm_s390_irq) * \ (KVM_MAX_VCPUS + LOCAL_IRQS)) -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM - struct kvm_stats_debugfs_item debugfs_entries[] = { - { "userspace_handled", VCPU_STAT(exit_userspace) }, - { "exit_null", VCPU_STAT(exit_null) }, - { "exit_validity", VCPU_STAT(exit_validity) }, - { "exit_stop_request", VCPU_STAT(exit_stop_request) }, - { "exit_external_request", VCPU_STAT(exit_external_request) }, - { "exit_io_request", VCPU_STAT(exit_io_request) }, - { "exit_external_interrupt", VCPU_STAT(exit_external_interrupt) }, - { "exit_instruction", VCPU_STAT(exit_instruction) }, - { "exit_pei", VCPU_STAT(exit_pei) }, - { "exit_program_interruption", VCPU_STAT(exit_program_interruption) }, - { "exit_instr_and_program_int", VCPU_STAT(exit_instr_and_program) }, - { "exit_operation_exception", VCPU_STAT(exit_operation_exception) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_no_poll_steal", VCPU_STAT(halt_no_poll_steal) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "instruction_lctlg", VCPU_STAT(instruction_lctlg) }, - { "instruction_lctl", VCPU_STAT(instruction_lctl) }, - { "instruction_stctl", VCPU_STAT(instruction_stctl) }, - { "instruction_stctg", VCPU_STAT(instruction_stctg) }, - { "deliver_ckc", VCPU_STAT(deliver_ckc) }, - { "deliver_cputm", VCPU_STAT(deliver_cputm) }, - { "deliver_emergency_signal", VCPU_STAT(deliver_emergency_signal) }, - { "deliver_external_call", VCPU_STAT(deliver_external_call) }, - { "deliver_service_signal", VCPU_STAT(deliver_service_signal) }, - { "deliver_virtio", VCPU_STAT(deliver_virtio) }, - { "deliver_stop_signal", VCPU_STAT(deliver_stop_signal) }, - { "deliver_prefix_signal", VCPU_STAT(deliver_prefix_signal) }, - { "deliver_restart_signal", VCPU_STAT(deliver_restart_signal) }, - { "deliver_program", VCPU_STAT(deliver_program) }, - { "deliver_io", VCPU_STAT(deliver_io) }, - { "deliver_machine_check", VCPU_STAT(deliver_machine_check) }, - { "exit_wait_state", VCPU_STAT(exit_wait_state) }, - { "inject_ckc", VCPU_STAT(inject_ckc) }, - { "inject_cputm", VCPU_STAT(inject_cputm) }, - { "inject_external_call", VCPU_STAT(inject_external_call) }, - { "inject_float_mchk", VM_STAT(inject_float_mchk) }, - { "inject_emergency_signal", VCPU_STAT(inject_emergency_signal) }, - { "inject_io", VM_STAT(inject_io) }, - { "inject_mchk", VCPU_STAT(inject_mchk) }, - { "inject_pfault_done", VM_STAT(inject_pfault_done) }, - { "inject_program", VCPU_STAT(inject_program) }, - { "inject_restart", VCPU_STAT(inject_restart) }, - { "inject_service_signal", VM_STAT(inject_service_signal) }, - { "inject_set_prefix", VCPU_STAT(inject_set_prefix) }, - { "inject_stop_signal", VCPU_STAT(inject_stop_signal) }, - { "inject_pfault_init", VCPU_STAT(inject_pfault_init) }, - { "inject_virtio", VM_STAT(inject_virtio) }, - { "instruction_epsw", VCPU_STAT(instruction_epsw) }, - { "instruction_gs", VCPU_STAT(instruction_gs) }, - { "instruction_io_other", VCPU_STAT(instruction_io_other) }, - { "instruction_lpsw", VCPU_STAT(instruction_lpsw) }, - { "instruction_lpswe", VCPU_STAT(instruction_lpswe) }, - { "instruction_pfmf", VCPU_STAT(instruction_pfmf) }, - { "instruction_ptff", VCPU_STAT(instruction_ptff) }, - { "instruction_stidp", VCPU_STAT(instruction_stidp) }, - { "instruction_sck", VCPU_STAT(instruction_sck) }, - { "instruction_sckpf", VCPU_STAT(instruction_sckpf) }, - { "instruction_spx", VCPU_STAT(instruction_spx) }, - { "instruction_stpx", VCPU_STAT(instruction_stpx) }, - { "instruction_stap", VCPU_STAT(instruction_stap) }, - { "instruction_iske", VCPU_STAT(instruction_iske) }, - { "instruction_ri", VCPU_STAT(instruction_ri) }, - { "instruction_rrbe", VCPU_STAT(instruction_rrbe) }, - { "instruction_sske", VCPU_STAT(instruction_sske) }, - { "instruction_ipte_interlock", VCPU_STAT(instruction_ipte_interlock) }, - { "instruction_essa", VCPU_STAT(instruction_essa) }, - { "instruction_stsi", VCPU_STAT(instruction_stsi) }, - { "instruction_stfl", VCPU_STAT(instruction_stfl) }, - { "instruction_tb", VCPU_STAT(instruction_tb) }, - { "instruction_tpi", VCPU_STAT(instruction_tpi) }, - { "instruction_tprot", VCPU_STAT(instruction_tprot) }, - { "instruction_tsch", VCPU_STAT(instruction_tsch) }, - { "instruction_sthyi", VCPU_STAT(instruction_sthyi) }, - { "instruction_sie", VCPU_STAT(instruction_sie) }, - { "instruction_sigp_sense", VCPU_STAT(instruction_sigp_sense) }, - { "instruction_sigp_sense_running", VCPU_STAT(instruction_sigp_sense_running) }, - { "instruction_sigp_external_call", VCPU_STAT(instruction_sigp_external_call) }, - { "instruction_sigp_emergency", VCPU_STAT(instruction_sigp_emergency) }, - { "instruction_sigp_cond_emergency", VCPU_STAT(instruction_sigp_cond_emergency) }, - { "instruction_sigp_start", VCPU_STAT(instruction_sigp_start) }, - { "instruction_sigp_stop", VCPU_STAT(instruction_sigp_stop) }, - { "instruction_sigp_stop_store_status", VCPU_STAT(instruction_sigp_stop_store_status) }, - { "instruction_sigp_store_status", VCPU_STAT(instruction_sigp_store_status) }, - { "instruction_sigp_store_adtl_status", VCPU_STAT(instruction_sigp_store_adtl_status) }, - { "instruction_sigp_set_arch", VCPU_STAT(instruction_sigp_arch) }, - { "instruction_sigp_set_prefix", VCPU_STAT(instruction_sigp_prefix) }, - { "instruction_sigp_restart", VCPU_STAT(instruction_sigp_restart) }, - { "instruction_sigp_cpu_reset", VCPU_STAT(instruction_sigp_cpu_reset) }, - { "instruction_sigp_init_cpu_reset", VCPU_STAT(instruction_sigp_init_cpu_reset) }, - { "instruction_sigp_unknown", VCPU_STAT(instruction_sigp_unknown) }, - { "instruction_diag_10", VCPU_STAT(diagnose_10) }, - { "instruction_diag_44", VCPU_STAT(diagnose_44) }, - { "instruction_diag_9c", VCPU_STAT(diagnose_9c) }, - { "diag_9c_ignored", VCPU_STAT(diagnose_9c_ignored) }, - { "instruction_diag_258", VCPU_STAT(diagnose_258) }, - { "instruction_diag_308", VCPU_STAT(diagnose_308) }, - { "instruction_diag_500", VCPU_STAT(diagnose_500) }, - { "instruction_diag_other", VCPU_STAT(diagnose_other) }, + VCPU_STAT("userspace_handled", exit_userspace), + VCPU_STAT("exit_null", exit_null), + VCPU_STAT("exit_validity", exit_validity), + VCPU_STAT("exit_stop_request", exit_stop_request), + VCPU_STAT("exit_external_request", exit_external_request), + VCPU_STAT("exit_io_request", exit_io_request), + VCPU_STAT("exit_external_interrupt", exit_external_interrupt), + VCPU_STAT("exit_instruction", exit_instruction), + VCPU_STAT("exit_pei", exit_pei), + VCPU_STAT("exit_program_interruption", exit_program_interruption), + VCPU_STAT("exit_instr_and_program_int", exit_instr_and_program), + VCPU_STAT("exit_operation_exception", exit_operation_exception), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_no_poll_steal", halt_no_poll_steal), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VCPU_STAT("instruction_lctlg", instruction_lctlg), + VCPU_STAT("instruction_lctl", instruction_lctl), + VCPU_STAT("instruction_stctl", instruction_stctl), + VCPU_STAT("instruction_stctg", instruction_stctg), + VCPU_STAT("deliver_ckc", deliver_ckc), + VCPU_STAT("deliver_cputm", deliver_cputm), + VCPU_STAT("deliver_emergency_signal", deliver_emergency_signal), + VCPU_STAT("deliver_external_call", deliver_external_call), + VCPU_STAT("deliver_service_signal", deliver_service_signal), + VCPU_STAT("deliver_virtio", deliver_virtio), + VCPU_STAT("deliver_stop_signal", deliver_stop_signal), + VCPU_STAT("deliver_prefix_signal", deliver_prefix_signal), + VCPU_STAT("deliver_restart_signal", deliver_restart_signal), + VCPU_STAT("deliver_program", deliver_program), + VCPU_STAT("deliver_io", deliver_io), + VCPU_STAT("deliver_machine_check", deliver_machine_check), + VCPU_STAT("exit_wait_state", exit_wait_state), + VCPU_STAT("inject_ckc", inject_ckc), + VCPU_STAT("inject_cputm", inject_cputm), + VCPU_STAT("inject_external_call", inject_external_call), + VM_STAT("inject_float_mchk", inject_float_mchk), + VCPU_STAT("inject_emergency_signal", inject_emergency_signal), + VM_STAT("inject_io", inject_io), + VCPU_STAT("inject_mchk", inject_mchk), + VM_STAT("inject_pfault_done", inject_pfault_done), + VCPU_STAT("inject_program", inject_program), + VCPU_STAT("inject_restart", inject_restart), + VM_STAT("inject_service_signal", inject_service_signal), + VCPU_STAT("inject_set_prefix", inject_set_prefix), + VCPU_STAT("inject_stop_signal", inject_stop_signal), + VCPU_STAT("inject_pfault_init", inject_pfault_init), + VM_STAT("inject_virtio", inject_virtio), + VCPU_STAT("instruction_epsw", instruction_epsw), + VCPU_STAT("instruction_gs", instruction_gs), + VCPU_STAT("instruction_io_other", instruction_io_other), + VCPU_STAT("instruction_lpsw", instruction_lpsw), + VCPU_STAT("instruction_lpswe", instruction_lpswe), + VCPU_STAT("instruction_pfmf", instruction_pfmf), + VCPU_STAT("instruction_ptff", instruction_ptff), + VCPU_STAT("instruction_stidp", instruction_stidp), + VCPU_STAT("instruction_sck", instruction_sck), + VCPU_STAT("instruction_sckpf", instruction_sckpf), + VCPU_STAT("instruction_spx", instruction_spx), + VCPU_STAT("instruction_stpx", instruction_stpx), + VCPU_STAT("instruction_stap", instruction_stap), + VCPU_STAT("instruction_iske", instruction_iske), + VCPU_STAT("instruction_ri", instruction_ri), + VCPU_STAT("instruction_rrbe", instruction_rrbe), + VCPU_STAT("instruction_sske", instruction_sske), + VCPU_STAT("instruction_ipte_interlock", instruction_ipte_interlock), + VCPU_STAT("instruction_essa", instruction_essa), + VCPU_STAT("instruction_stsi", instruction_stsi), + VCPU_STAT("instruction_stfl", instruction_stfl), + VCPU_STAT("instruction_tb", instruction_tb), + VCPU_STAT("instruction_tpi", instruction_tpi), + VCPU_STAT("instruction_tprot", instruction_tprot), + VCPU_STAT("instruction_tsch", instruction_tsch), + VCPU_STAT("instruction_sthyi", instruction_sthyi), + VCPU_STAT("instruction_sie", instruction_sie), + VCPU_STAT("instruction_sigp_sense", instruction_sigp_sense), + VCPU_STAT("instruction_sigp_sense_running", instruction_sigp_sense_running), + VCPU_STAT("instruction_sigp_external_call", instruction_sigp_external_call), + VCPU_STAT("instruction_sigp_emergency", instruction_sigp_emergency), + VCPU_STAT("instruction_sigp_cond_emergency", instruction_sigp_cond_emergency), + VCPU_STAT("instruction_sigp_start", instruction_sigp_start), + VCPU_STAT("instruction_sigp_stop", instruction_sigp_stop), + VCPU_STAT("instruction_sigp_stop_store_status", instruction_sigp_stop_store_status), + VCPU_STAT("instruction_sigp_store_status", instruction_sigp_store_status), + VCPU_STAT("instruction_sigp_store_adtl_status", instruction_sigp_store_adtl_status), + VCPU_STAT("instruction_sigp_set_arch", instruction_sigp_arch), + VCPU_STAT("instruction_sigp_set_prefix", instruction_sigp_prefix), + VCPU_STAT("instruction_sigp_restart", instruction_sigp_restart), + VCPU_STAT("instruction_sigp_cpu_reset", instruction_sigp_cpu_reset), + VCPU_STAT("instruction_sigp_init_cpu_reset", instruction_sigp_init_cpu_reset), + VCPU_STAT("instruction_sigp_unknown", instruction_sigp_unknown), + VCPU_STAT("instruction_diag_10", diagnose_10), + VCPU_STAT("instruction_diag_44", diagnose_44), + VCPU_STAT("instruction_diag_9c", diagnose_9c), + VCPU_STAT("diag_9c_ignored", diagnose_9c_ignored), + VCPU_STAT("instruction_diag_258", diagnose_258), + VCPU_STAT("instruction_diag_308", diagnose_308), + VCPU_STAT("instruction_diag_500", diagnose_500), + VCPU_STAT("instruction_diag_other", diagnose_other), { NULL } }; @@ -3944,7 +3943,7 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, /* s390 will always inject the page directly */ } -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) { /* * s390 will always inject the page directly, @@ -4337,8 +4336,9 @@ static void store_regs(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) store_regs_fmt2(vcpu, kvm_run); } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_run *kvm_run = vcpu->run; int rc; if (kvm_run->immediate_exit) diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 4f6c22d72072..ef05b4e167fb 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -1000,8 +1000,6 @@ static int do_vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) handle_last_fault(vcpu, vsie_page); - if (need_resched()) - schedule(); if (test_cpu_flag(CIF_MCCK_PENDING)) s390_handle_mcck(); @@ -1185,6 +1183,7 @@ static int vsie_run(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) kvm_s390_vcpu_has_irq(vcpu, 0) || kvm_s390_vcpu_sie_inhibited(vcpu)) break; + cond_resched(); } if (rc == -EFAULT) { diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index 1a95d8809cc3..4b6903fbba4a 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -788,19 +788,19 @@ static inline unsigned long *gmap_table_walk(struct gmap *gmap, unsigned long gaddr, int level) { const int asce_type = gmap->asce & _ASCE_TYPE_MASK; - unsigned long *table; + unsigned long *table = gmap->table; - if ((gmap->asce & _ASCE_TYPE_MASK) + 4 < (level * 4)) - return NULL; if (gmap_is_shadow(gmap) && gmap->removed) return NULL; + if (WARN_ON_ONCE(level > (asce_type >> 2) + 1)) + return NULL; + if (asce_type != _ASCE_TYPE_REGION1 && gaddr & (-1UL << (31 + (asce_type >> 2) * 11))) return NULL; - table = gmap->table; - switch (gmap->asce & _ASCE_TYPE_MASK) { + switch (asce_type) { case _ASCE_TYPE_REGION1: table += (gaddr & _REGION1_INDEX) >> _REGION1_SHIFT; if (level == 4) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index b4f0e37b83eb..97656d20b9ea 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -71,7 +71,6 @@ config SUPERH32 select HAVE_FUNCTION_TRACER select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE - select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE select ARCH_WANT_IPC_PARSE_VERSION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_ARCH_KGDB diff --git a/arch/sh/include/asm/checksum_32.h b/arch/sh/include/asm/checksum_32.h index 36b84cfd3f67..91571a42e44e 100644 --- a/arch/sh/include/asm/checksum_32.h +++ b/arch/sh/include/asm/checksum_32.h @@ -48,12 +48,17 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst, return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL); } +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER static inline -__wsum csum_partial_copy_from_user(const void __user *src, void *dst, +__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr) { - return csum_partial_copy_generic((__force const void *)src, dst, + if (access_ok(src, len)) + return csum_partial_copy_generic((__force const void *)src, dst, len, sum, err_ptr, NULL); + if (len) + *err_ptr = -EFAULT; + return sum; } /* diff --git a/arch/sh/kernel/cpu/sh4/sq.c b/arch/sh/kernel/cpu/sh4/sq.c index 934ff84844fa..d432164b23b7 100644 --- a/arch/sh/kernel/cpu/sh4/sq.c +++ b/arch/sh/kernel/cpu/sh4/sq.c @@ -103,7 +103,8 @@ static int __sq_remap(struct sq_mapping *map, pgprot_t prot) #if defined(CONFIG_MMU) struct vm_struct *vma; - vma = __get_vm_area(map->size, VM_ALLOC, map->sq_addr, SQ_ADDRMAX); + vma = __get_vm_area_caller(map->size, VM_ALLOC, map->sq_addr, + SQ_ADDRMAX, __builtin_return_address(0)); if (!vma) return -ENOMEM; diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index c7a30fcd135f..acc35daa1b79 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -440,3 +440,4 @@ # 435 reserved for clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index 63cf17bc760d..2130381c9d57 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -170,11 +170,21 @@ BUILD_TRAP_HANDLER(bug) force_sig(SIGTRAP); } +#ifdef CONFIG_DYNAMIC_FTRACE +extern void arch_ftrace_nmi_enter(void); +extern void arch_ftrace_nmi_exit(void); +#else +static inline void arch_ftrace_nmi_enter(void) { } +static inline void arch_ftrace_nmi_exit(void) { } +#endif + BUILD_TRAP_HANDLER(nmi) { unsigned int cpu = smp_processor_id(); TRAP_HANDLER_DECL; + arch_ftrace_nmi_enter(); + nmi_enter(); nmi_count(cpu)++; @@ -190,4 +200,6 @@ BUILD_TRAP_HANDLER(nmi) } nmi_exit(); + + arch_ftrace_nmi_exit(); } diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c index 14f6c15be6ae..111283fe837e 100644 --- a/arch/sparc/crypto/md5_glue.c +++ b/arch/sparc/crypto/md5_glue.c @@ -18,7 +18,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/md5.h> diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c index 7c1666304441..dc017782be52 100644 --- a/arch/sparc/crypto/sha1_glue.c +++ b/arch/sparc/crypto/sha1_glue.c @@ -15,7 +15,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c index f403ce9ba6e4..286bc8ecf15b 100644 --- a/arch/sparc/crypto/sha256_glue.c +++ b/arch/sparc/crypto/sha256_glue.c @@ -15,7 +15,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c index a3b532e43c07..3b2ca732ff7a 100644 --- a/arch/sparc/crypto/sha512_glue.c +++ b/arch/sparc/crypto/sha512_glue.c @@ -14,7 +14,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> diff --git a/arch/sparc/include/asm/checksum.h b/arch/sparc/include/asm/checksum.h index c3be56e2e768..a6256cb6fc5c 100644 --- a/arch/sparc/include/asm/checksum.h +++ b/arch/sparc/include/asm/checksum.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef ___ASM_SPARC_CHECKSUM_H #define ___ASM_SPARC_CHECKSUM_H +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER #if defined(__sparc__) && defined(__arch64__) #include <asm/checksum_64.h> #else diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h index 5fc98d80b03b..479a0b812af5 100644 --- a/arch/sparc/include/asm/checksum_32.h +++ b/arch/sparc/include/asm/checksum_32.h @@ -60,7 +60,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) } static inline __wsum -csum_partial_copy_from_user(const void __user *src, void *dst, int len, +csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err) { register unsigned long ret asm("o0") = (unsigned long)src; @@ -68,6 +68,12 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len, register int l asm("g1") = len; register __wsum s asm("g7") = sum; + if (unlikely(!access_ok(src, len))) { + if (len) + *err = -EFAULT; + return sum; + } + __asm__ __volatile__ ( ".section __ex_table,#alloc\n\t" ".align 4\n\t" @@ -83,8 +89,10 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len, return (__force __wsum)ret; } +#define HAVE_CSUM_COPY_USER + static inline __wsum -csum_partial_copy_to_user(const void *src, void __user *dst, int len, +csum_and_copy_to_user(const void *src, void __user *dst, int len, __wsum sum, int *err) { if (!access_ok(dst, len)) { @@ -113,9 +121,6 @@ csum_partial_copy_to_user(const void *src, void __user *dst, int len, } } -#define HAVE_CSUM_COPY_USER -#define csum_and_copy_to_user csum_partial_copy_to_user - /* ihl is always 5 or greater, almost always is 5, and iph is word aligned * the majority of the time. */ diff --git a/arch/sparc/include/asm/checksum_64.h b/arch/sparc/include/asm/checksum_64.h index e52450930e4e..0fa4433f5662 100644 --- a/arch/sparc/include/asm/checksum_64.h +++ b/arch/sparc/include/asm/checksum_64.h @@ -46,7 +46,7 @@ long __csum_partial_copy_from_user(const void __user *src, __wsum sum); static inline __wsum -csum_partial_copy_from_user(const void __user *src, +csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err) { diff --git a/arch/sparc/include/asm/floppy_32.h b/arch/sparc/include/asm/floppy_32.h index b519acf4383d..946dbcbf3a83 100644 --- a/arch/sparc/include/asm/floppy_32.h +++ b/arch/sparc/include/asm/floppy_32.h @@ -59,8 +59,8 @@ struct sun_floppy_ops { static struct sun_floppy_ops sun_fdops; -#define fd_inb(port) sun_fdops.fd_inb(port) -#define fd_outb(value,port) sun_fdops.fd_outb(value,port) +#define fd_inb(base, reg) sun_fdops.fd_inb(reg) +#define fd_outb(value, base, reg) sun_fdops.fd_outb(value, reg) #define fd_enable_dma() sun_fd_enable_dma() #define fd_disable_dma() sun_fd_disable_dma() #define fd_request_dma() (0) /* nothing... */ @@ -114,15 +114,15 @@ static unsigned char sun_read_dir(void) static unsigned char sun_82072_fd_inb(int port) { udelay(5); - switch(port & 7) { + switch (port) { default: printk("floppy: Asked to read unknown port %d\n", port); panic("floppy: Port bolixed."); - case 4: /* FD_STATUS */ + case FD_STATUS: return sun_fdc->status_82072 & ~STATUS_DMA; - case 5: /* FD_DATA */ + case FD_DATA: return sun_fdc->data_82072; - case 7: /* FD_DIR */ + case FD_DIR: return sun_read_dir(); } panic("sun_82072_fd_inb: How did I get here?"); @@ -131,20 +131,20 @@ static unsigned char sun_82072_fd_inb(int port) static void sun_82072_fd_outb(unsigned char value, int port) { udelay(5); - switch(port & 7) { + switch (port) { default: printk("floppy: Asked to write to unknown port %d\n", port); panic("floppy: Port bolixed."); - case 2: /* FD_DOR */ + case FD_DOR: sun_set_dor(value, 0); break; - case 5: /* FD_DATA */ + case FD_DATA: sun_fdc->data_82072 = value; break; - case 7: /* FD_DCR */ + case FD_DCR: sun_fdc->dcr_82072 = value; break; - case 4: /* FD_STATUS */ + case FD_DSR: sun_fdc->status_82072 = value; break; } @@ -154,23 +154,23 @@ static void sun_82072_fd_outb(unsigned char value, int port) static unsigned char sun_82077_fd_inb(int port) { udelay(5); - switch(port & 7) { + switch (port) { default: printk("floppy: Asked to read unknown port %d\n", port); panic("floppy: Port bolixed."); - case 0: /* FD_STATUS_0 */ + case FD_SRA: return sun_fdc->status1_82077; - case 1: /* FD_STATUS_1 */ + case FD_SRB: return sun_fdc->status2_82077; - case 2: /* FD_DOR */ + case FD_DOR: return sun_fdc->dor_82077; - case 3: /* FD_TDR */ + case FD_TDR: return sun_fdc->tapectl_82077; - case 4: /* FD_STATUS */ + case FD_STATUS: return sun_fdc->status_82077 & ~STATUS_DMA; - case 5: /* FD_DATA */ + case FD_DATA: return sun_fdc->data_82077; - case 7: /* FD_DIR */ + case FD_DIR: return sun_read_dir(); } panic("sun_82077_fd_inb: How did I get here?"); @@ -179,23 +179,23 @@ static unsigned char sun_82077_fd_inb(int port) static void sun_82077_fd_outb(unsigned char value, int port) { udelay(5); - switch(port & 7) { + switch (port) { default: printk("floppy: Asked to write to unknown port %d\n", port); panic("floppy: Port bolixed."); - case 2: /* FD_DOR */ + case FD_DOR: sun_set_dor(value, 1); break; - case 5: /* FD_DATA */ + case FD_DATA: sun_fdc->data_82077 = value; break; - case 7: /* FD_DCR */ + case FD_DCR: sun_fdc->dcr_82077 = value; break; - case 4: /* FD_STATUS */ + case FD_DSR: sun_fdc->status_82077 = value; break; - case 3: /* FD_TDR */ + case FD_TDR: sun_fdc->tapectl_82077 = value; break; } diff --git a/arch/sparc/include/asm/floppy_64.h b/arch/sparc/include/asm/floppy_64.h index 3729fc35ba83..070c8c1f5c8f 100644 --- a/arch/sparc/include/asm/floppy_64.h +++ b/arch/sparc/include/asm/floppy_64.h @@ -47,8 +47,9 @@ unsigned long fdc_status; static struct platform_device *floppy_op = NULL; struct sun_floppy_ops { - unsigned char (*fd_inb) (unsigned long port); - void (*fd_outb) (unsigned char value, unsigned long port); + unsigned char (*fd_inb) (unsigned long port, unsigned int reg); + void (*fd_outb) (unsigned char value, unsigned long base, + unsigned int reg); void (*fd_enable_dma) (void); void (*fd_disable_dma) (void); void (*fd_set_dma_mode) (int); @@ -62,8 +63,8 @@ struct sun_floppy_ops { static struct sun_floppy_ops sun_fdops; -#define fd_inb(port) sun_fdops.fd_inb(port) -#define fd_outb(value,port) sun_fdops.fd_outb(value,port) +#define fd_inb(base, reg) sun_fdops.fd_inb(base, reg) +#define fd_outb(value, base, reg) sun_fdops.fd_outb(value, base, reg) #define fd_enable_dma() sun_fdops.fd_enable_dma() #define fd_disable_dma() sun_fdops.fd_disable_dma() #define fd_request_dma() (0) /* nothing... */ @@ -97,42 +98,43 @@ static int sun_floppy_types[2] = { 0, 0 }; /* No 64k boundary crossing problems on the Sparc. */ #define CROSS_64KB(a,s) (0) -static unsigned char sun_82077_fd_inb(unsigned long port) +static unsigned char sun_82077_fd_inb(unsigned long base, unsigned int reg) { udelay(5); - switch(port & 7) { + switch (reg) { default: - printk("floppy: Asked to read unknown port %lx\n", port); + printk("floppy: Asked to read unknown port %x\n", reg); panic("floppy: Port bolixed."); - case 4: /* FD_STATUS */ + case FD_STATUS: return sbus_readb(&sun_fdc->status_82077) & ~STATUS_DMA; - case 5: /* FD_DATA */ + case FD_DATA: return sbus_readb(&sun_fdc->data_82077); - case 7: /* FD_DIR */ + case FD_DIR: /* XXX: Is DCL on 0x80 in sun4m? */ return sbus_readb(&sun_fdc->dir_82077); } panic("sun_82072_fd_inb: How did I get here?"); } -static void sun_82077_fd_outb(unsigned char value, unsigned long port) +static void sun_82077_fd_outb(unsigned char value, unsigned long base, + unsigned int reg) { udelay(5); - switch(port & 7) { + switch (reg) { default: - printk("floppy: Asked to write to unknown port %lx\n", port); + printk("floppy: Asked to write to unknown port %x\n", reg); panic("floppy: Port bolixed."); - case 2: /* FD_DOR */ + case FD_DOR: /* Happily, the 82077 has a real DOR register. */ sbus_writeb(value, &sun_fdc->dor_82077); break; - case 5: /* FD_DATA */ + case FD_DATA: sbus_writeb(value, &sun_fdc->data_82077); break; - case 7: /* FD_DCR */ + case FD_DCR: sbus_writeb(value, &sun_fdc->dcr_82077); break; - case 4: /* FD_STATUS */ + case FD_DSR: sbus_writeb(value, &sun_fdc->status_82077); break; } @@ -298,19 +300,21 @@ static struct sun_pci_dma_op sun_pci_dma_pending = { -1U, 0, 0, NULL}; irqreturn_t floppy_interrupt(int irq, void *dev_id); -static unsigned char sun_pci_fd_inb(unsigned long port) +static unsigned char sun_pci_fd_inb(unsigned long base, unsigned int reg) { udelay(5); - return inb(port); + return inb(base + reg); } -static void sun_pci_fd_outb(unsigned char val, unsigned long port) +static void sun_pci_fd_outb(unsigned char val, unsigned long base, + unsigned int reg) { udelay(5); - outb(val, port); + outb(val, base + reg); } -static void sun_pci_fd_broken_outb(unsigned char val, unsigned long port) +static void sun_pci_fd_broken_outb(unsigned char val, unsigned long base, + unsigned int reg) { udelay(5); /* @@ -320,16 +324,17 @@ static void sun_pci_fd_broken_outb(unsigned char val, unsigned long port) * this does not hurt correct hardware like the AXmp. * (Eddie, Sep 12 1998). */ - if (port == ((unsigned long)sun_fdc) + 2) { + if (reg == FD_DOR) { if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x20)) { val |= 0x10; } } - outb(val, port); + outb(val, base + reg); } #ifdef PCI_FDC_SWAP_DRIVES -static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long port) +static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long base, + unsigned int reg) { udelay(5); /* @@ -339,13 +344,13 @@ static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long port) * this does not hurt correct hardware like the AXmp. * (Eddie, Sep 12 1998). */ - if (port == ((unsigned long)sun_fdc) + 2) { + if (reg == FD_DOR) { if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x10)) { val &= ~(0x03); val |= 0x21; } } - outb(val, port); + outb(val, base + reg); } #endif /* PCI_FDC_SWAP_DRIVES */ diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index f13615ecdecc..8004a276cb74 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -483,3 +483,4 @@ # 435 reserved for clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 diff --git a/arch/unicore32/kernel/ksyms.c b/arch/unicore32/kernel/ksyms.c index f4b84872d640..731445008932 100644 --- a/arch/unicore32/kernel/ksyms.c +++ b/arch/unicore32/kernel/ksyms.c @@ -9,7 +9,6 @@ #include <linux/module.h> #include <linux/sched.h> #include <linux/string.h> -#include <linux/cryptohash.h> #include <linux/delay.h> #include <linux/in6.h> #include <linux/syscalls.h> diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2d3f963fd6f1..70c668d976cc 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -60,6 +60,7 @@ config X86 select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_EARLY_DEBUG if KGDB select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FILTER_PGPROT @@ -92,6 +93,7 @@ config X86 select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS + select ARCH_USE_SYM_ANNOTATIONS select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH select ARCH_WANT_DEFAULT_BPF_JIT if X86_64 select ARCH_WANTS_DYNAMIC_TASK_STRUCT @@ -596,7 +598,7 @@ config X86_INTEL_MID select I2C select DW_APB_TIMER select APB_TIMER - select INTEL_SCU_IPC + select INTEL_SCU_PCI select MFD_INTEL_MSIC ---help--- Select to build a kernel capable of supporting Intel MID (Mobile @@ -1611,19 +1613,10 @@ config NODES_SHIFT Specify the maximum number of NUMA Nodes available on the target system. Increases memory reserved to accommodate various tables. -config ARCH_HAVE_MEMORY_PRESENT - def_bool y - depends on X86_32 && DISCONTIGMEM - config ARCH_FLATMEM_ENABLE def_bool y depends on X86_32 && !NUMA -config ARCH_DISCONTIGMEM_ENABLE - def_bool n - depends on NUMA && X86_32 - depends on BROKEN - config ARCH_SPARSEMEM_ENABLE def_bool y depends on X86_64 || NUMA || X86_32 || X86_32_NON_STANDARD @@ -1888,10 +1881,10 @@ config X86_UMIP results are dummy. config X86_INTEL_MEMORY_PROTECTION_KEYS - prompt "Intel Memory Protection Keys" + prompt "Memory Protection Keys" def_bool y # Note: only available in 64-bit mode - depends on CPU_SUP_INTEL && X86_64 + depends on X86_64 && (CPU_SUP_INTEL || CPU_SUP_AMD) select ARCH_USES_HIGH_VMA_FLAGS select ARCH_HAS_PKEYS ---help--- diff --git a/arch/x86/Kconfig.assembler b/arch/x86/Kconfig.assembler index 13de0db38d4e..26b8c08e2fc4 100644 --- a/arch/x86/Kconfig.assembler +++ b/arch/x86/Kconfig.assembler @@ -15,3 +15,7 @@ config AS_SHA256_NI def_bool $(as-instr,sha256msg1 %xmm0$(comma)%xmm1) help Supported by binutils >= 2.24 and LLVM integrated assembler +config AS_TPAUSE + def_bool $(as-instr,tpause %ecx) + help + Supported by binutils >= 2.31.1 and LLVM integrated assembler >= V7 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2e74690b028a..f909d3ce36e6 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -99,15 +99,6 @@ config DEBUG_WX If in doubt, say "Y". -config DOUBLEFAULT - default y - bool "Enable doublefault exception handler" if EXPERT && X86_32 - ---help--- - This option allows trapping of rare doublefault exceptions that - would otherwise cause a system to silently reboot. Disabling this - option saves about 4k and might cause you much additional grey - hair. - config DEBUG_TLBFLUSH bool "Set upper limit of TLB entries to flush one-by-one" depends on DEBUG_KERNEL diff --git a/arch/x86/Makefile b/arch/x86/Makefile index b65ec63c7db7..00e378de8bc0 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -246,7 +246,7 @@ drivers-$(CONFIG_FB) += arch/x86/video/ boot := arch/x86/boot -BOOT_TARGETS = bzlilo bzdisk fdimage fdimage144 fdimage288 isoimage +BOOT_TARGETS = bzdisk fdimage fdimage144 fdimage288 isoimage PHONY += bzImage $(BOOT_TARGETS) @@ -267,8 +267,8 @@ endif $(BOOT_TARGETS): vmlinux $(Q)$(MAKE) $(build)=$(boot) $@ -PHONY += install -install: +PHONY += install bzlilo +install bzlilo: $(Q)$(MAKE) $(build)=$(boot) $@ PHONY += vdso_install diff --git a/arch/x86/boot/Makefile b/arch/x86/boot/Makefile index e17be90ab312..4c5355684321 100644 --- a/arch/x86/boot/Makefile +++ b/arch/x86/boot/Makefile @@ -57,11 +57,10 @@ $(obj)/cpu.o: $(obj)/cpustr.h quiet_cmd_cpustr = CPUSTR $@ cmd_cpustr = $(obj)/mkcpustr > $@ -targets += cpustr.h $(obj)/cpustr.h: $(obj)/mkcpustr FORCE $(call if_changed,cpustr) endif -clean-files += cpustr.h +targets += cpustr.h # --------------------------------------------------------------------------- @@ -129,6 +128,8 @@ quiet_cmd_genimage = GENIMAGE $3 cmd_genimage = sh $(srctree)/$(src)/genimage.sh $2 $3 $(obj)/bzImage \ $(obj)/mtools.conf '$(image_cmdline)' $(FDINITRD) +PHONY += bzdisk fdimage fdimage144 fdimage288 isoimage bzlilo install + # This requires write access to /dev/fd0 bzdisk: $(obj)/bzImage $(obj)/mtools.conf $(call cmd,genimage,bzdisk,/dev/fd0) @@ -146,7 +147,7 @@ isoimage: $(obj)/bzImage $(call cmd,genimage,isoimage,$(obj)/image.iso) @$(kecho) 'Kernel: $(obj)/image.iso is ready' -bzlilo: $(obj)/bzImage +bzlilo: if [ -f $(INSTALL_PATH)/vmlinuz ]; then mv $(INSTALL_PATH)/vmlinuz $(INSTALL_PATH)/vmlinuz.old; fi if [ -f $(INSTALL_PATH)/System.map ]; then mv $(INSTALL_PATH)/System.map $(INSTALL_PATH)/System.old; fi cat $(obj)/bzImage > $(INSTALL_PATH)/vmlinuz diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index ef2ad7253cd5..8bcbcee54aa1 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -280,9 +280,9 @@ acpi_physical_address get_rsdp_addr(void) */ #define MAX_ADDR_LEN 19 -static acpi_physical_address get_cmdline_acpi_rsdp(void) +static unsigned long get_cmdline_acpi_rsdp(void) { - acpi_physical_address addr = 0; + unsigned long addr = 0; #ifdef CONFIG_KEXEC char val[MAX_ADDR_LEN] = { }; @@ -292,7 +292,7 @@ static acpi_physical_address get_cmdline_acpi_rsdp(void) if (ret < 0) return 0; - if (kstrtoull(val, 16, &addr)) + if (boot_kstrtoul(val, 16, &addr)) return 0; #endif return addr; @@ -314,7 +314,6 @@ static unsigned long get_acpi_srat_table(void) * different ideas about whether to trust a command-line parameter. */ rsdp = (struct acpi_table_rsdp *)get_cmdline_acpi_rsdp(); - if (!rsdp) rsdp = (struct acpi_table_rsdp *)(long) boot_params->acpi_rsdp_addr; diff --git a/arch/x86/boot/compressed/efi_thunk_64.S b/arch/x86/boot/compressed/efi_thunk_64.S index 2b2049259619..c4bb0f9363f5 100644 --- a/arch/x86/boot/compressed/efi_thunk_64.S +++ b/arch/x86/boot/compressed/efi_thunk_64.S @@ -28,8 +28,6 @@ SYM_FUNC_START(__efi64_thunk) push %rbx leaq 1f(%rip), %rbp - leaq efi_gdt64(%rip), %rbx - movl %ebx, 2(%rbx) /* Fixup the gdt base address */ movl %ds, %eax push %rax @@ -48,7 +46,8 @@ SYM_FUNC_START(__efi64_thunk) movl %r8d, 0xc(%rsp) movl %r9d, 0x10(%rsp) - sgdt 0x14(%rsp) + leaq 0x14(%rsp), %rbx + sgdt (%rbx) /* * Switch to gdt with 32-bit segments. This is the firmware GDT @@ -68,8 +67,7 @@ SYM_FUNC_START(__efi64_thunk) pushq %rax lretq -1: lgdt 0x14(%rsp) - addq $32, %rsp +1: addq $32, %rsp movq %rdi, %rax pop %rbx @@ -175,14 +173,3 @@ SYM_DATA_END(efi32_boot_cs) SYM_DATA_START(efi32_boot_ds) .word 0 SYM_DATA_END(efi32_boot_ds) - -SYM_DATA_START(efi_gdt64) - .word efi_gdt64_end - efi_gdt64 - .long 0 /* Filled out by user */ - .word 0 - .quad 0x0000000000000000 /* NULL descriptor */ - .quad 0x00af9a000000ffff /* __KERNEL_CS */ - .quad 0x00cf92000000ffff /* __KERNEL_DS */ - .quad 0x0080890000000000 /* TS descriptor */ - .quad 0x0000000000000000 /* TS continued */ -SYM_DATA_END_LABEL(efi_gdt64, SYM_L_LOCAL, efi_gdt64_end) diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index ab3307036ba4..03557f2174bf 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -49,16 +49,17 @@ * Position Independent Executable (PIE) so that linker won't optimize * R_386_GOT32X relocation to its fixed symbol address. Older * linkers generate R_386_32 relocations against locally defined symbols, - * _bss, _ebss, _got and _egot, in PIE. It isn't wrong, just less + * _bss, _ebss, _got, _egot and _end, in PIE. It isn't wrong, just less * optimal than R_386_RELATIVE. But the x86 kernel fails to properly handle * R_386_32 relocations when relocating the kernel. To generate - * R_386_RELATIVE relocations, we mark _bss, _ebss, _got and _egot as + * R_386_RELATIVE relocations, we mark _bss, _ebss, _got, _egot and _end as * hidden: */ .hidden _bss .hidden _ebss .hidden _got .hidden _egot + .hidden _end __HEAD SYM_FUNC_START(startup_32) diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 4f7e6b84be07..e821a7d7d5c4 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -42,6 +42,7 @@ .hidden _ebss .hidden _got .hidden _egot + .hidden _end __HEAD .code32 @@ -393,6 +394,14 @@ SYM_CODE_START(startup_64) addq %rax, 2(%rax) lgdt (%rax) + /* Reload CS so IRET returns to a CS actually in the GDT */ + pushq $__KERNEL_CS + leaq .Lon_kernel_cs(%rip), %rax + pushq %rax + lretq + +.Lon_kernel_cs: + /* * paging_prepare() sets up the trampoline and checks if we need to * enable 5-level paging. diff --git a/arch/x86/boot/compressed/vmlinux.lds.S b/arch/x86/boot/compressed/vmlinux.lds.S index 508cfa6828c5..8f1025d1f681 100644 --- a/arch/x86/boot/compressed/vmlinux.lds.S +++ b/arch/x86/boot/compressed/vmlinux.lds.S @@ -52,6 +52,7 @@ SECTIONS _data = . ; *(.data) *(.data.*) + *(.bss.efistub) _edata = . ; } . = ALIGN(L1_CACHE_BYTES); @@ -73,4 +74,6 @@ SECTIONS #endif . = ALIGN(PAGE_SIZE); /* keep ZO size page aligned */ _end = .; + + DISCARDS } diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 8272a4492844..8a3fff9128bb 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -117,7 +117,6 @@ static unsigned int simple_guess_base(const char *cp) * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use */ - unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) { unsigned long long result = 0; @@ -335,3 +334,45 @@ int kstrtoull(const char *s, unsigned int base, unsigned long long *res) s++; return _kstrtoull(s, base, res); } + +static int _kstrtoul(const char *s, unsigned int base, unsigned long *res) +{ + unsigned long long tmp; + int rv; + + rv = kstrtoull(s, base, &tmp); + if (rv < 0) + return rv; + if (tmp != (unsigned long)tmp) + return -ERANGE; + *res = tmp; + return 0; +} + +/** + * kstrtoul - convert a string to an unsigned long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign, but not a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the simple_strtoull. + */ +int boot_kstrtoul(const char *s, unsigned int base, unsigned long *res) +{ + /* + * We want to shortcut function call, but + * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0. + */ + if (sizeof(unsigned long) == sizeof(unsigned long long) && + __alignof__(unsigned long) == __alignof__(unsigned long long)) + return kstrtoull(s, base, (unsigned long long *)res); + else + return _kstrtoul(s, base, res); +} diff --git a/arch/x86/boot/string.h b/arch/x86/boot/string.h index 38d8f2f5e47e..995f7b7ad512 100644 --- a/arch/x86/boot/string.h +++ b/arch/x86/boot/string.h @@ -30,4 +30,5 @@ extern unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base); int kstrtoull(const char *s, unsigned int base, unsigned long long *res); +int boot_kstrtoul(const char *s, unsigned int base, unsigned long *res); #endif /* BOOT_STRING_H */ diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index cad6e1bfa7d5..54e7d15dbd0d 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -2758,7 +2758,7 @@ SYM_FUNC_START(aesni_xts_crypt8) pxor INC, STATE4 movdqu IV, 0x30(OUTP) - CALL_NOSPEC %r11 + CALL_NOSPEC r11 movdqu 0x00(OUTP), INC pxor INC, STATE1 @@ -2803,7 +2803,7 @@ SYM_FUNC_START(aesni_xts_crypt8) _aesni_gf128mul_x_ble() movups IV, (IVP) - CALL_NOSPEC %r11 + CALL_NOSPEC r11 movdqu 0x40(OUTP), INC pxor INC, STATE1 diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index d01ddd73de65..ecc0a9a905c4 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -1228,7 +1228,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_16way) vpxor 14 * 16(%rax), %xmm15, %xmm14; vpxor 15 * 16(%rax), %xmm15, %xmm15; - CALL_NOSPEC %r9; + CALL_NOSPEC r9; addq $(16 * 16), %rsp; diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 563ef6e83cdd..0907243c501c 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -1339,7 +1339,7 @@ SYM_FUNC_START_LOCAL(camellia_xts_crypt_32way) vpxor 14 * 32(%rax), %ymm15, %ymm14; vpxor 15 * 32(%rax), %ymm15, %ymm15; - CALL_NOSPEC %r9; + CALL_NOSPEC r9; addq $(16 * 32), %rsp; diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 0e6690e3618c..8501ec4532f4 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -75,7 +75,7 @@ .text SYM_FUNC_START(crc_pcl) -#define bufp %rdi +#define bufp rdi #define bufp_dw %edi #define bufp_w %di #define bufp_b %dil @@ -105,9 +105,9 @@ SYM_FUNC_START(crc_pcl) ## 1) ALIGN: ################################################################ - mov bufp, bufptmp # rdi = *buf - neg bufp - and $7, bufp # calculate the unalignment amount of + mov %bufp, bufptmp # rdi = *buf + neg %bufp + and $7, %bufp # calculate the unalignment amount of # the address je proc_block # Skip if aligned @@ -123,13 +123,13 @@ SYM_FUNC_START(crc_pcl) do_align: #### Calculate CRC of unaligned bytes of the buffer (if any) movq (bufptmp), tmp # load a quadward from the buffer - add bufp, bufptmp # align buffer pointer for quadword + add %bufp, bufptmp # align buffer pointer for quadword # processing - sub bufp, len # update buffer length + sub %bufp, len # update buffer length align_loop: crc32b %bl, crc_init_dw # compute crc32 of 1-byte shr $8, tmp # get next byte - dec bufp + dec %bufp jne align_loop proc_block: @@ -169,10 +169,10 @@ continue_block: xor crc2, crc2 ## branch into array - lea jump_table(%rip), bufp - movzxw (bufp, %rax, 2), len - lea crc_array(%rip), bufp - lea (bufp, len, 1), bufp + lea jump_table(%rip), %bufp + movzxw (%bufp, %rax, 2), len + lea crc_array(%rip), %bufp + lea (%bufp, len, 1), %bufp JMP_NOSPEC bufp ################################################################ @@ -218,9 +218,9 @@ LABEL crc_ %i ## 4) Combine three results: ################################################################ - lea (K_table-8)(%rip), bufp # first entry is for idx 1 + lea (K_table-8)(%rip), %bufp # first entry is for idx 1 shlq $3, %rax # rax *= 8 - pmovzxdq (bufp,%rax), %xmm0 # 2 consts: K1:K2 + pmovzxdq (%bufp,%rax), %xmm0 # 2 consts: K1:K2 leal (%eax,%eax,2), %eax # rax *= 3 (total *24) subq %rax, tmp # tmp -= rax*24 diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index a801ffc10cbb..18200135603f 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -21,7 +21,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha1_base.h> diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 6394b5fe8db6..dd06249229e1 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -34,7 +34,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/types.h> #include <crypto/sha.h> #include <crypto/sha256_base.h> diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 82cc1b3ced1d..b0b05c93409e 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -32,7 +32,6 @@ #include <linux/init.h> #include <linux/module.h> #include <linux/mm.h> -#include <linux/cryptohash.h> #include <linux/string.h> #include <linux/types.h> #include <crypto/sha.h> diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index b67bae7091d7..a5eed844e948 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -816,7 +816,7 @@ SYM_CODE_START(ret_from_fork) /* kernel thread */ 1: movl %edi, %eax - CALL_NOSPEC %ebx + CALL_NOSPEC ebx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() @@ -1501,7 +1501,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception_read_cr2) TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer - CALL_NOSPEC %edi + CALL_NOSPEC edi jmp ret_from_exception SYM_CODE_END(common_exception_read_cr2) @@ -1522,7 +1522,7 @@ SYM_CODE_START_LOCAL_NOALIGN(common_exception) TRACE_IRQS_OFF movl %esp, %eax # pt_regs pointer - CALL_NOSPEC %edi + CALL_NOSPEC edi jmp ret_from_exception SYM_CODE_END(common_exception) @@ -1536,7 +1536,6 @@ SYM_CODE_START(debug) jmp common_exception SYM_CODE_END(debug) -#ifdef CONFIG_DOUBLEFAULT SYM_CODE_START(double_fault) 1: /* @@ -1576,7 +1575,6 @@ SYM_CODE_START(double_fault) hlt jmp 1b SYM_CODE_END(double_fault) -#endif /* * NMI is doubly nasty. It can happen on the first instruction of @@ -1693,14 +1691,6 @@ SYM_CODE_START(general_protection) jmp common_exception SYM_CODE_END(general_protection) -#ifdef CONFIG_KVM_GUEST -SYM_CODE_START(async_page_fault) - ASM_CLAC - pushl $do_async_page_fault - jmp common_exception_read_cr2 -SYM_CODE_END(async_page_fault) -#endif - SYM_CODE_START(rewind_stack_do_exit) /* Prevent any naive code from trying to unwind to our caller. */ xorl %ebp, %ebp diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 3063aa9090f9..eead1e2bebd5 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -348,7 +348,7 @@ SYM_CODE_START(ret_from_fork) /* kernel thread */ UNWIND_HINT_EMPTY movq %r12, %rdi - CALL_NOSPEC %rbx + CALL_NOSPEC rbx /* * A kernel thread is allowed to return here after successfully * calling do_execve(). Exit to userspace to complete the execve() @@ -1202,10 +1202,6 @@ idtentry xendebug do_debug has_error_code=0 idtentry general_protection do_general_protection has_error_code=1 idtentry page_fault do_page_fault has_error_code=1 read_cr2=1 -#ifdef CONFIG_KVM_GUEST -idtentry async_page_fault do_async_page_fault has_error_code=1 read_cr2=1 -#endif - #ifdef CONFIG_X86_MCE idtentry machine_check do_mce has_error_code=0 paranoid=1 #endif diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 54581ac671b4..d8f8a1a69ed1 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -442,3 +442,4 @@ 435 i386 clone3 sys_clone3 437 i386 openat2 sys_openat2 438 i386 pidfd_getfd sys_pidfd_getfd +439 i386 faccessat2 sys_faccessat2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 37b844f839bc..78847b32e137 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -359,6 +359,7 @@ 435 common clone3 sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 433a1259f61d..54e03ab26ff3 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -24,6 +24,8 @@ VDSO32-$(CONFIG_IA32_EMULATION) := y # files to link into the vdso vobjs-y := vdso-note.o vclock_gettime.o vgetcpu.o +vobjs32-y := vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o +vobjs32-y += vdso32/vclock_gettime.o # files to link into kernel obj-y += vma.o @@ -37,10 +39,12 @@ vdso_img-$(VDSO32-y) += 32 obj-$(VDSO32-y) += vdso32-setup.o vobjs := $(foreach F,$(vobjs-y),$(obj)/$F) +vobjs32 := $(foreach F,$(vobjs32-y),$(obj)/$F) $(obj)/vdso.o: $(obj)/vdso.so targets += vdso.lds $(vobjs-y) +targets += vdso32/vdso32.lds $(vobjs32-y) # Build the vDSO image C files and link them in. vdso_img_objs := $(vdso_img-y:%=vdso-image-%.o) @@ -130,10 +134,6 @@ $(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE CPPFLAGS_vdso32/vdso32.lds = $(CPPFLAGS_vdso.lds) VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -soname linux-gate.so.1 -targets += vdso32/vdso32.lds -targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o -targets += vdso32/vclock_gettime.o - KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO $(obj)/vdso32.so.dbg: KBUILD_AFLAGS = $(KBUILD_AFLAGS_32) $(obj)/vdso32.so.dbg: asflags-$(CONFIG_X86_64) += -m32 @@ -158,12 +158,7 @@ endif $(obj)/vdso32.so.dbg: KBUILD_CFLAGS = $(KBUILD_CFLAGS_32) -$(obj)/vdso32.so.dbg: FORCE \ - $(obj)/vdso32/vdso32.lds \ - $(obj)/vdso32/vclock_gettime.o \ - $(obj)/vdso32/note.o \ - $(obj)/vdso32/system_call.o \ - $(obj)/vdso32/sigreturn.o +$(obj)/vdso32.so.dbg: $(obj)/vdso32/vdso32.lds $(vobjs32) FORCE $(call if_changed,vdso_and_check) # diff --git a/arch/x86/entry/vdso/vdso2c.c b/arch/x86/entry/vdso/vdso2c.c index 3842873b3ae3..7380908045c7 100644 --- a/arch/x86/entry/vdso/vdso2c.c +++ b/arch/x86/entry/vdso/vdso2c.c @@ -187,7 +187,7 @@ static void map_input(const char *name, void **addr, size_t *len, int prot) int fd = open(name, O_RDONLY); if (fd == -1) - err(1, "%s", name); + err(1, "open(%s)", name); tmp_len = lseek(fd, 0, SEEK_END); if (tmp_len == (off_t)-1) @@ -240,7 +240,7 @@ int main(int argc, char **argv) outfilename = argv[3]; outfile = fopen(outfilename, "w"); if (!outfile) - err(1, "%s", argv[2]); + err(1, "fopen(%s)", outfilename); go(raw_addr, raw_len, stripped_addr, stripped_len, outfile, name); diff --git a/arch/x86/entry/vdso/vdso2c.h b/arch/x86/entry/vdso/vdso2c.h index a20b134de2a8..6f46e11ce539 100644 --- a/arch/x86/entry/vdso/vdso2c.h +++ b/arch/x86/entry/vdso/vdso2c.h @@ -13,8 +13,7 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, unsigned long load_size = -1; /* Work around bogus warning */ unsigned long mapping_size; ELF(Ehdr) *hdr = (ELF(Ehdr) *)raw_addr; - int i; - unsigned long j; + unsigned long i, syms_nr; ELF(Shdr) *symtab_hdr = NULL, *strtab_hdr, *secstrings_hdr, *alt_sec = NULL; ELF(Dyn) *dyn = 0, *dyn_end = 0; @@ -86,11 +85,10 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, strtab_hdr = raw_addr + GET_LE(&hdr->e_shoff) + GET_LE(&hdr->e_shentsize) * GET_LE(&symtab_hdr->sh_link); + syms_nr = GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize); /* Walk the symbol table */ - for (i = 0; - i < GET_LE(&symtab_hdr->sh_size) / GET_LE(&symtab_hdr->sh_entsize); - i++) { - int k; + for (i = 0; i < syms_nr; i++) { + unsigned int k; ELF(Sym) *sym = raw_addr + GET_LE(&symtab_hdr->sh_offset) + GET_LE(&symtab_hdr->sh_entsize) * i; const char *sym_name = raw_addr + @@ -150,11 +148,11 @@ static void BITSFUNC(go)(void *raw_addr, size_t raw_len, fprintf(outfile, "static unsigned char raw_data[%lu] __ro_after_init __aligned(PAGE_SIZE) = {", mapping_size); - for (j = 0; j < stripped_len; j++) { - if (j % 10 == 0) + for (i = 0; i < stripped_len; i++) { + if (i % 10 == 0) fprintf(outfile, "\n\t"); fprintf(outfile, "0x%02X, ", - (int)((unsigned char *)stripped_addr)[j]); + (int)((unsigned char *)stripped_addr)[i]); } fprintf(outfile, "\n};\n\n"); diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig index 9a7a1446cb3a..4a809c6cbd2f 100644 --- a/arch/x86/events/Kconfig +++ b/arch/x86/events/Kconfig @@ -10,11 +10,11 @@ config PERF_EVENTS_INTEL_UNCORE available on NehalemEX and more modern processors. config PERF_EVENTS_INTEL_RAPL - tristate "Intel rapl performance events" - depends on PERF_EVENTS && CPU_SUP_INTEL && PCI + tristate "Intel/AMD rapl performance events" + depends on PERF_EVENTS && (CPU_SUP_INTEL || CPU_SUP_AMD) && PCI default y ---help--- - Include support for Intel rapl performance events for power + Include support for Intel and AMD rapl performance events for power monitoring on modern processors. config PERF_EVENTS_INTEL_CSTATE diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 9e07f554333f..12c42eba77ec 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,5 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-only obj-y += core.o probe.o +obj-$(PERF_EVENTS_INTEL_RAPL) += rapl.o obj-y += amd/ obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_CPU_SUP_INTEL) += intel/ +obj-$(CONFIG_CPU_SUP_CENTAUR) += zhaoxin/ +obj-$(CONFIG_CPU_SUP_ZHAOXIN) += zhaoxin/ diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index a619763e96e1..9e63ee50b19a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1839,6 +1839,10 @@ static int __init init_hw_perf_events(void) err = amd_pmu_init(); x86_pmu.name = "HYGON"; break; + case X86_VENDOR_ZHAOXIN: + case X86_VENDOR_CENTAUR: + err = zhaoxin_pmu_init(); + break; default: err = -ENOTSUPP; } diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile index 3468b0c1dc7c..e67a5886336c 100644 --- a/arch/x86/events/intel/Makefile +++ b/arch/x86/events/intel/Makefile @@ -2,8 +2,6 @@ obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o -obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o -intel-rapl-perf-objs := rapl.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o intel-uncore-objs := uncore.o uncore_nhmex.o uncore_snb.o uncore_snbep.o obj-$(CONFIG_PERF_EVENTS_INTEL_CSTATE) += intel-cstate.o diff --git a/arch/x86/events/intel/bts.c b/arch/x86/events/intel/bts.c index 6a3b599ee0fe..731dd8d0dbb1 100644 --- a/arch/x86/events/intel/bts.c +++ b/arch/x86/events/intel/bts.c @@ -58,7 +58,7 @@ struct bts_buffer { local_t head; unsigned long end; void **data_pages; - struct bts_phys buf[0]; + struct bts_phys buf[]; }; static struct pmu bts_pmu; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 332954cccece..ca35c8b5ee10 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -1892,8 +1892,8 @@ static __initconst const u64 tnt_hw_cache_extra_regs static struct extra_reg intel_tnt_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ - INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0xffffff9fffull, RSP_0), - INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xffffff9fffull, RSP_1), + INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff0ffffff9fffull, RSP_0), + INTEL_UEVENT_EXTRA_REG(0x02b7, MSR_OFFCORE_RSP_1, 0xff0ffffff9fffull, RSP_1), EVENT_EXTRA_END }; diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 1db7a51d9792..e94af4a54d0d 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -226,8 +226,6 @@ static int __init pt_pmu_hw_init(void) pt_pmu.vmx = true; } - attrs = NULL; - for (i = 0; i < PT_CPUID_LEAVES; i++) { cpuid_count(20, i, &pt_pmu.caps[CPUID_EAX + i*PT_CPUID_REGS_NUM], diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 0da4a4605536..b469ddd45515 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -130,7 +130,7 @@ struct intel_uncore_box { struct list_head list; struct list_head active_list; void __iomem *io_addr; - struct intel_uncore_extra_reg shared_regs[0]; + struct intel_uncore_extra_reg shared_regs[]; }; /* CFL uncore 8th cbox MSRs */ diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index f1cd1ca1a77b..e17a3d8a47ed 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -618,6 +618,7 @@ struct x86_pmu { /* PMI handler bits */ unsigned int late_ack :1, + enabled_ack :1, counter_freezing :1; /* * sysfs attrs @@ -1133,3 +1134,12 @@ static inline int is_ht_workaround_enabled(void) return 0; } #endif /* CONFIG_CPU_SUP_INTEL */ + +#if ((defined CONFIG_CPU_SUP_CENTAUR) || (defined CONFIG_CPU_SUP_ZHAOXIN)) +int zhaoxin_pmu_init(void); +#else +static inline int zhaoxin_pmu_init(void) +{ + return 0; +} +#endif /*CONFIG_CPU_SUP_CENTAUR or CONFIG_CPU_SUP_ZHAOXIN*/ diff --git a/arch/x86/events/probe.c b/arch/x86/events/probe.c index c2ede2f3b277..136a1e847254 100644 --- a/arch/x86/events/probe.c +++ b/arch/x86/events/probe.c @@ -10,6 +10,11 @@ not_visible(struct kobject *kobj, struct attribute *attr, int i) return 0; } +/* + * Accepts msr[] array with non populated entries as long as either + * msr[i].msr is 0 or msr[i].grp is NULL. Note that the default sysfs + * visibility is visible when group->is_visible callback is set. + */ unsigned long perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) { @@ -24,8 +29,16 @@ perf_msr_probe(struct perf_msr *msr, int cnt, bool zero, void *data) if (!msr[bit].no_check) { struct attribute_group *grp = msr[bit].grp; + /* skip entry with no group */ + if (!grp) + continue; + grp->is_visible = not_visible; + /* skip unpopulated entry */ + if (!msr[bit].msr) + continue; + if (msr[bit].test && !msr[bit].test(bit, data)) continue; /* Virt sucks; you cannot tell if a R/O MSR is present :/ */ diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/rapl.c index a5dbd25852cb..0f2bf59f4354 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/rapl.c @@ -1,11 +1,14 @@ // SPDX-License-Identifier: GPL-2.0-only /* - * Support Intel RAPL energy consumption counters + * Support Intel/AMD RAPL energy consumption counters * Copyright (C) 2013 Google, Inc., Stephane Eranian * * Intel RAPL interface is specified in the IA-32 Manual Vol3b * section 14.7.1 (September 2013) * + * AMD RAPL interface for Fam17h is described in the public PPR: + * https://bugzilla.kernel.org/show_bug.cgi?id=206537 + * * RAPL provides more controls than just reporting energy consumption * however here we only expose the 3 energy consumption free running * counters (pp0, pkg, dram). @@ -58,8 +61,8 @@ #include <linux/nospec.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> -#include "../perf_event.h" -#include "../probe.h" +#include "perf_event.h" +#include "probe.h" MODULE_LICENSE("GPL"); @@ -128,7 +131,9 @@ struct rapl_pmus { }; struct rapl_model { + struct perf_msr *rapl_msrs; unsigned long events; + unsigned int msr_power_unit; bool apply_quirk; }; @@ -138,7 +143,7 @@ static struct rapl_pmus *rapl_pmus; static cpumask_t rapl_cpu_mask; static unsigned int rapl_cntr_mask; static u64 rapl_timer_ms; -static struct perf_msr rapl_msrs[]; +static struct perf_msr *rapl_msrs; static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) { @@ -455,9 +460,16 @@ static struct attribute *rapl_events_cores[] = { NULL, }; +static umode_t +rapl_not_visible(struct kobject *kobj, struct attribute *attr, int i) +{ + return 0; +} + static struct attribute_group rapl_events_cores_group = { .name = "events", .attrs = rapl_events_cores, + .is_visible = rapl_not_visible, }; static struct attribute *rapl_events_pkg[] = { @@ -470,6 +482,7 @@ static struct attribute *rapl_events_pkg[] = { static struct attribute_group rapl_events_pkg_group = { .name = "events", .attrs = rapl_events_pkg, + .is_visible = rapl_not_visible, }; static struct attribute *rapl_events_ram[] = { @@ -482,6 +495,7 @@ static struct attribute *rapl_events_ram[] = { static struct attribute_group rapl_events_ram_group = { .name = "events", .attrs = rapl_events_ram, + .is_visible = rapl_not_visible, }; static struct attribute *rapl_events_gpu[] = { @@ -494,6 +508,7 @@ static struct attribute *rapl_events_gpu[] = { static struct attribute_group rapl_events_gpu_group = { .name = "events", .attrs = rapl_events_gpu, + .is_visible = rapl_not_visible, }; static struct attribute *rapl_events_psys[] = { @@ -506,6 +521,7 @@ static struct attribute *rapl_events_psys[] = { static struct attribute_group rapl_events_psys_group = { .name = "events", .attrs = rapl_events_psys, + .is_visible = rapl_not_visible, }; static bool test_msr(int idx, void *data) @@ -513,7 +529,7 @@ static bool test_msr(int idx, void *data) return test_bit(idx, (unsigned long *) data); } -static struct perf_msr rapl_msrs[] = { +static struct perf_msr intel_rapl_msrs[] = { [PERF_RAPL_PP0] = { MSR_PP0_ENERGY_STATUS, &rapl_events_cores_group, test_msr }, [PERF_RAPL_PKG] = { MSR_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, [PERF_RAPL_RAM] = { MSR_DRAM_ENERGY_STATUS, &rapl_events_ram_group, test_msr }, @@ -521,6 +537,16 @@ static struct perf_msr rapl_msrs[] = { [PERF_RAPL_PSYS] = { MSR_PLATFORM_ENERGY_STATUS, &rapl_events_psys_group, test_msr }, }; +/* + * Force to PERF_RAPL_MAX size due to: + * - perf_msr_probe(PERF_RAPL_MAX) + * - want to use same event codes across both architectures + */ +static struct perf_msr amd_rapl_msrs[PERF_RAPL_MAX] = { + [PERF_RAPL_PKG] = { MSR_AMD_PKG_ENERGY_STATUS, &rapl_events_pkg_group, test_msr }, +}; + + static int rapl_cpu_offline(unsigned int cpu) { struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); @@ -575,13 +601,13 @@ static int rapl_cpu_online(unsigned int cpu) return 0; } -static int rapl_check_hw_unit(bool apply_quirk) +static int rapl_check_hw_unit(struct rapl_model *rm) { u64 msr_rapl_power_unit_bits; int i; /* protect rdmsrl() to handle virtualization */ - if (rdmsrl_safe(MSR_RAPL_POWER_UNIT, &msr_rapl_power_unit_bits)) + if (rdmsrl_safe(rm->msr_power_unit, &msr_rapl_power_unit_bits)) return -1; for (i = 0; i < NR_RAPL_DOMAINS; i++) rapl_hw_unit[i] = (msr_rapl_power_unit_bits >> 8) & 0x1FULL; @@ -592,7 +618,7 @@ static int rapl_check_hw_unit(bool apply_quirk) * "Intel Xeon Processor E5-1600 and E5-2600 v3 Product Families, V2 * of 2. Datasheet, September 2014, Reference Number: 330784-001 " */ - if (apply_quirk) + if (rm->apply_quirk) rapl_hw_unit[PERF_RAPL_RAM] = 16; /* @@ -673,6 +699,8 @@ static struct rapl_model model_snb = { BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_PP1), .apply_quirk = false, + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, }; static struct rapl_model model_snbep = { @@ -680,6 +708,8 @@ static struct rapl_model model_snbep = { BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), .apply_quirk = false, + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, }; static struct rapl_model model_hsw = { @@ -688,6 +718,8 @@ static struct rapl_model model_hsw = { BIT(PERF_RAPL_RAM) | BIT(PERF_RAPL_PP1), .apply_quirk = false, + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, }; static struct rapl_model model_hsx = { @@ -695,12 +727,16 @@ static struct rapl_model model_hsx = { BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), .apply_quirk = true, + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, }; static struct rapl_model model_knl = { .events = BIT(PERF_RAPL_PKG) | BIT(PERF_RAPL_RAM), .apply_quirk = true, + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, }; static struct rapl_model model_skl = { @@ -710,6 +746,15 @@ static struct rapl_model model_skl = { BIT(PERF_RAPL_PP1) | BIT(PERF_RAPL_PSYS), .apply_quirk = false, + .msr_power_unit = MSR_RAPL_POWER_UNIT, + .rapl_msrs = intel_rapl_msrs, +}; + +static struct rapl_model model_amd_fam17h = { + .events = BIT(PERF_RAPL_PKG), + .apply_quirk = false, + .msr_power_unit = MSR_AMD_RAPL_POWER_UNIT, + .rapl_msrs = amd_rapl_msrs, }; static const struct x86_cpu_id rapl_model_match[] __initconst = { @@ -738,8 +783,11 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_GOLDMONT_PLUS, &model_hsw), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &model_hsx), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &model_hsx), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE_L, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, &model_skl), + X86_MATCH_VENDOR_FAM(AMD, 0x17, &model_amd_fam17h), {}, }; MODULE_DEVICE_TABLE(x86cpu, rapl_model_match); @@ -755,10 +803,13 @@ static int __init rapl_pmu_init(void) return -ENODEV; rm = (struct rapl_model *) id->driver_data; + + rapl_msrs = rm->rapl_msrs; + rapl_cntr_mask = perf_msr_probe(rapl_msrs, PERF_RAPL_MAX, false, (void *) &rm->events); - ret = rapl_check_hw_unit(rm->apply_quirk); + ret = rapl_check_hw_unit(rm); if (ret) return ret; diff --git a/arch/x86/events/zhaoxin/Makefile b/arch/x86/events/zhaoxin/Makefile new file mode 100644 index 000000000000..642c1174d662 --- /dev/null +++ b/arch/x86/events/zhaoxin/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +obj-y += core.o diff --git a/arch/x86/events/zhaoxin/core.c b/arch/x86/events/zhaoxin/core.c new file mode 100644 index 000000000000..898fa1ae9ceb --- /dev/null +++ b/arch/x86/events/zhaoxin/core.c @@ -0,0 +1,613 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Zhoaxin PMU; like Intel Architectural PerfMon-v2 + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/stddef.h> +#include <linux/types.h> +#include <linux/init.h> +#include <linux/slab.h> +#include <linux/export.h> +#include <linux/nmi.h> + +#include <asm/cpufeature.h> +#include <asm/hardirq.h> +#include <asm/apic.h> + +#include "../perf_event.h" + +/* + * Zhaoxin PerfMon, used on zxc and later. + */ +static u64 zx_pmon_event_map[PERF_COUNT_HW_MAX] __read_mostly = { + + [PERF_COUNT_HW_CPU_CYCLES] = 0x0082, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0515, + [PERF_COUNT_HW_CACHE_MISSES] = 0x051a, + [PERF_COUNT_HW_BUS_CYCLES] = 0x0083, +}; + +static struct event_constraint zxc_event_constraints[] __read_mostly = { + + FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint zxd_event_constraints[] __read_mostly = { + + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* retired instructions */ + FIXED_EVENT_CONSTRAINT(0x0082, 1), /* unhalted core clock cycles */ + FIXED_EVENT_CONSTRAINT(0x0083, 2), /* unhalted bus clock cycles */ + EVENT_CONSTRAINT_END +}; + +static __initconst const u64 zxd_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0042, + [C(RESULT_MISS)] = 0x0538, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x0043, + [C(RESULT_MISS)] = 0x0562, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0300, + [C(RESULT_MISS)] = 0x0301, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x030a, + [C(RESULT_MISS)] = 0x030b, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0042, + [C(RESULT_MISS)] = 0x052c, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x0043, + [C(RESULT_MISS)] = 0x0530, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0564, + [C(RESULT_MISS)] = 0x0565, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c0, + [C(RESULT_MISS)] = 0x0534, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0700, + [C(RESULT_MISS)] = 0x0709, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +}; + +static __initconst const u64 zxe_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { +[C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0568, + [C(RESULT_MISS)] = 0x054b, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x0669, + [C(RESULT_MISS)] = 0x0562, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0300, + [C(RESULT_MISS)] = 0x0301, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x030a, + [C(RESULT_MISS)] = 0x030b, + }, +}, +[C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0, + [C(RESULT_MISS)] = 0x0, + }, +}, +[C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0568, + [C(RESULT_MISS)] = 0x052c, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = 0x0669, + [C(RESULT_MISS)] = 0x0530, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = 0x0564, + [C(RESULT_MISS)] = 0x0565, + }, +}, +[C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x00c0, + [C(RESULT_MISS)] = 0x0534, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = 0x0028, + [C(RESULT_MISS)] = 0x0029, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +[C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = -1, + [C(RESULT_MISS)] = -1, + }, +}, +}; + +static void zhaoxin_pmu_disable_all(void) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); +} + +static void zhaoxin_pmu_enable_all(int added) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); +} + +static inline u64 zhaoxin_pmu_get_status(void) +{ + u64 status; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + + return status; +} + +static inline void zhaoxin_pmu_ack_status(u64 ack) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); +} + +static inline void zxc_pmu_ack_status(u64 ack) +{ + /* + * ZXC needs global control enabled in order to clear status bits. + */ + zhaoxin_pmu_enable_all(0); + zhaoxin_pmu_ack_status(ack); + zhaoxin_pmu_disable_all(); +} + +static void zhaoxin_pmu_disable_fixed(struct hw_perf_event *hwc) +{ + int idx = hwc->idx - INTEL_PMC_IDX_FIXED; + u64 ctrl_val, mask; + + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + wrmsrl(hwc->config_base, ctrl_val); +} + +static void zhaoxin_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + zhaoxin_pmu_disable_fixed(hwc); + return; + } + + x86_pmu_disable_event(event); +} + +static void zhaoxin_pmu_enable_fixed(struct hw_perf_event *hwc) +{ + int idx = hwc->idx - INTEL_PMC_IDX_FIXED; + u64 ctrl_val, bits, mask; + + /* + * Enable IRQ generation (0x8), + * and enable ring-3 counting (0x2) and ring-0 counting (0x1) + * if requested: + */ + bits = 0x8ULL; + if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) + bits |= 0x2; + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + bits |= 0x1; + + bits <<= (idx * 4); + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + ctrl_val |= bits; + wrmsrl(hwc->config_base, ctrl_val); +} + +static void zhaoxin_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + zhaoxin_pmu_enable_fixed(hwc); + return; + } + + __x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE); +} + +/* + * This handler is triggered by the local APIC, so the APIC IRQ handling + * rules apply: + */ +static int zhaoxin_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + int handled = 0; + u64 status; + int bit; + + cpuc = this_cpu_ptr(&cpu_hw_events); + apic_write(APIC_LVTPC, APIC_DM_NMI); + zhaoxin_pmu_disable_all(); + status = zhaoxin_pmu_get_status(); + if (!status) + goto done; + +again: + if (x86_pmu.enabled_ack) + zxc_pmu_ack_status(status); + else + zhaoxin_pmu_ack_status(status); + + inc_irq_stat(apic_perf_irqs); + + /* + * CondChgd bit 63 doesn't mean any overflow status. Ignore + * and clear the bit. + */ + if (__test_and_clear_bit(63, (unsigned long *)&status)) { + if (!status) + goto done; + } + + for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { + struct perf_event *event = cpuc->events[bit]; + + handled++; + + if (!test_bit(bit, cpuc->active_mask)) + continue; + + x86_perf_event_update(event); + perf_sample_data_init(&data, 0, event->hw.last_period); + + if (!x86_perf_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + x86_pmu_stop(event, 0); + } + + /* + * Repeat if there is more work to be done: + */ + status = zhaoxin_pmu_get_status(); + if (status) + goto again; + +done: + zhaoxin_pmu_enable_all(0); + return handled; +} + +static u64 zhaoxin_pmu_event_map(int hw_event) +{ + return zx_pmon_event_map[hw_event]; +} + +static struct event_constraint * +zhaoxin_get_event_constraints(struct cpu_hw_events *cpuc, int idx, + struct perf_event *event) +{ + struct event_constraint *c; + + if (x86_pmu.event_constraints) { + for_each_event_constraint(c, x86_pmu.event_constraints) { + if ((event->hw.config & c->cmask) == c->code) + return c; + } + } + + return &unconstrained; +} + +PMU_FORMAT_ATTR(event, "config:0-7"); +PMU_FORMAT_ATTR(umask, "config:8-15"); +PMU_FORMAT_ATTR(edge, "config:18"); +PMU_FORMAT_ATTR(inv, "config:23"); +PMU_FORMAT_ATTR(cmask, "config:24-31"); + +static struct attribute *zx_arch_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + &format_attr_edge.attr, + &format_attr_inv.attr, + &format_attr_cmask.attr, + NULL, +}; + +static ssize_t zhaoxin_event_sysfs_show(char *page, u64 config) +{ + u64 event = (config & ARCH_PERFMON_EVENTSEL_EVENT); + + return x86_event_sysfs_show(page, config, event); +} + +static const struct x86_pmu zhaoxin_pmu __initconst = { + .name = "zhaoxin", + .handle_irq = zhaoxin_pmu_handle_irq, + .disable_all = zhaoxin_pmu_disable_all, + .enable_all = zhaoxin_pmu_enable_all, + .enable = zhaoxin_pmu_enable_event, + .disable = zhaoxin_pmu_disable_event, + .hw_config = x86_pmu_hw_config, + .schedule_events = x86_schedule_events, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = zhaoxin_pmu_event_map, + .max_events = ARRAY_SIZE(zx_pmon_event_map), + .apic = 1, + /* + * For zxd/zxe, read/write operation for PMCx MSR is 48 bits. + */ + .max_period = (1ULL << 47) - 1, + .get_event_constraints = zhaoxin_get_event_constraints, + + .format_attrs = zx_arch_formats_attr, + .events_sysfs_show = zhaoxin_event_sysfs_show, +}; + +static const struct { int id; char *name; } zx_arch_events_map[] __initconst = { + { PERF_COUNT_HW_CPU_CYCLES, "cpu cycles" }, + { PERF_COUNT_HW_INSTRUCTIONS, "instructions" }, + { PERF_COUNT_HW_BUS_CYCLES, "bus cycles" }, + { PERF_COUNT_HW_CACHE_REFERENCES, "cache references" }, + { PERF_COUNT_HW_CACHE_MISSES, "cache misses" }, + { PERF_COUNT_HW_BRANCH_INSTRUCTIONS, "branch instructions" }, + { PERF_COUNT_HW_BRANCH_MISSES, "branch misses" }, +}; + +static __init void zhaoxin_arch_events_quirk(void) +{ + int bit; + + /* disable event that reported as not presend by cpuid */ + for_each_set_bit(bit, x86_pmu.events_mask, ARRAY_SIZE(zx_arch_events_map)) { + zx_pmon_event_map[zx_arch_events_map[bit].id] = 0; + pr_warn("CPUID marked event: \'%s\' unavailable\n", + zx_arch_events_map[bit].name); + } +} + +__init int zhaoxin_pmu_init(void) +{ + union cpuid10_edx edx; + union cpuid10_eax eax; + union cpuid10_ebx ebx; + struct event_constraint *c; + unsigned int unused; + int version; + + pr_info("Welcome to zhaoxin pmu!\n"); + + /* + * Check whether the Architectural PerfMon supports + * hw_event or not. + */ + cpuid(10, &eax.full, &ebx.full, &unused, &edx.full); + + if (eax.split.mask_length < ARCH_PERFMON_EVENTS_COUNT - 1) + return -ENODEV; + + version = eax.split.version_id; + if (version != 2) + return -ENODEV; + + x86_pmu = zhaoxin_pmu; + pr_info("Version check pass!\n"); + + x86_pmu.version = version; + x86_pmu.num_counters = eax.split.num_counters; + x86_pmu.cntval_bits = eax.split.bit_width; + x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; + x86_pmu.events_maskl = ebx.full; + x86_pmu.events_mask_len = eax.split.mask_length; + + x86_pmu.num_counters_fixed = edx.split.num_counters_fixed; + x86_add_quirk(zhaoxin_arch_events_quirk); + + switch (boot_cpu_data.x86) { + case 0x06: + if (boot_cpu_data.x86_model == 0x0f || boot_cpu_data.x86_model == 0x19) { + + x86_pmu.max_period = x86_pmu.cntval_mask >> 1; + + /* Clearing status works only if the global control is enable on zxc. */ + x86_pmu.enabled_ack = 1; + + x86_pmu.event_constraints = zxc_event_constraints; + zx_pmon_event_map[PERF_COUNT_HW_INSTRUCTIONS] = 0; + zx_pmon_event_map[PERF_COUNT_HW_CACHE_REFERENCES] = 0; + zx_pmon_event_map[PERF_COUNT_HW_CACHE_MISSES] = 0; + zx_pmon_event_map[PERF_COUNT_HW_BUS_CYCLES] = 0; + + pr_cont("ZXC events, "); + break; + } + return -ENODEV; + + case 0x07: + zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = + X86_CONFIG(.event = 0x01, .umask = 0x01, .inv = 0x01, .cmask = 0x01); + + zx_pmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = + X86_CONFIG(.event = 0x0f, .umask = 0x04, .inv = 0, .cmask = 0); + + switch (boot_cpu_data.x86_model) { + case 0x1b: + memcpy(hw_cache_event_ids, zxd_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + x86_pmu.event_constraints = zxd_event_constraints; + + zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0700; + zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0709; + + pr_cont("ZXD events, "); + break; + case 0x3b: + memcpy(hw_cache_event_ids, zxe_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + x86_pmu.event_constraints = zxd_event_constraints; + + zx_pmon_event_map[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0028; + zx_pmon_event_map[PERF_COUNT_HW_BRANCH_MISSES] = 0x0029; + + pr_cont("ZXE events, "); + break; + default: + return -ENODEV; + } + break; + + default: + return -ENODEV; + } + + x86_pmu.intel_ctrl = (1 << (x86_pmu.num_counters)) - 1; + x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_counters_fixed)-1) << INTEL_PMC_IDX_FIXED; + + if (x86_pmu.event_constraints) { + for_each_event_constraint(c, x86_pmu.event_constraints) { + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; + c->weight += x86_pmu.num_counters; + } + } + + return 0; +} + diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index acf76b466db6..e2137070386a 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -97,8 +97,7 @@ static int hv_cpu_init(unsigned int cpu) * not be stopped in the case of CPU offlining and the VM will hang. */ if (!*hvp) { - *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO, - PAGE_KERNEL); + *hvp = __vmalloc(PAGE_SIZE, GFP_KERNEL | __GFP_ZERO); } if (*hvp) { @@ -379,7 +378,7 @@ void __init hyperv_init(void) guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); - hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); + hv_hypercall_pg = vmalloc_exec(PAGE_SIZE); if (hv_hypercall_pg == NULL) { wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); goto remove_cpuhp_state; diff --git a/arch/x86/ia32/audit.c b/arch/x86/ia32/audit.c index 3d21eab7aaed..6efe6cb3768a 100644 --- a/arch/x86/ia32/audit.c +++ b/arch/x86/ia32/audit.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <asm/unistd_32.h> +#include <asm/audit.h> unsigned ia32_dir_class[] = { #include <asm-generic/audit_dir_write.h> diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index f9d8804144d0..81cf22398cd1 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -350,7 +350,7 @@ int ia32_setup_rt_frame(int sig, struct ksignal *ksig, unsafe_put_user(*(__u64 *)set, (__u64 *)&frame->uc.uc_sigmask, Efault); user_access_end(); - if (__copy_siginfo_to_user32(&frame->info, &ksig->info, false)) + if (__copy_siginfo_to_user32(&frame->info, &ksig->info)) return -EFAULT; /* Set up registers for signal handler */ diff --git a/arch/x86/include/asm/GEN-for-each-reg.h b/arch/x86/include/asm/GEN-for-each-reg.h new file mode 100644 index 000000000000..1b07fb102c4e --- /dev/null +++ b/arch/x86/include/asm/GEN-for-each-reg.h @@ -0,0 +1,25 @@ +#ifdef CONFIG_64BIT +GEN(rax) +GEN(rbx) +GEN(rcx) +GEN(rdx) +GEN(rsi) +GEN(rdi) +GEN(rbp) +GEN(r8) +GEN(r9) +GEN(r10) +GEN(r11) +GEN(r12) +GEN(r13) +GEN(r14) +GEN(r15) +#else +GEN(eax) +GEN(ebx) +GEN(ecx) +GEN(edx) +GEN(esi) +GEN(edi) +GEN(ebp) +#endif diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h index 99bb207fc04c..87ce8e963215 100644 --- a/arch/x86/include/asm/apb_timer.h +++ b/arch/x86/include/asm/apb_timer.h @@ -25,11 +25,7 @@ #define APBT_MIN_FREQ 1000000 #define APBT_MMAP_SIZE 1024 -#define APBT_DEV_USED 1 - extern void apbt_time_init(void); -extern unsigned long apbt_quick_calibrate(void); -extern int arch_setup_apbt_irqs(int irq, int trigger, int mask, int cpu); extern void apbt_setup_secondary_clock(void); extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint); @@ -38,7 +34,6 @@ extern int sfi_mtimer_num; #else /* CONFIG_APB_TIMER */ -static inline unsigned long apbt_quick_calibrate(void) {return 0; } static inline void apbt_time_init(void) { } #endif diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 7a4bb1bd4bdb..ebc248e49549 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -15,16 +15,6 @@ #define RDRAND_RETRY_LOOPS 10 -#define RDRAND_INT ".byte 0x0f,0xc7,0xf0" -#define RDSEED_INT ".byte 0x0f,0xc7,0xf8" -#ifdef CONFIG_X86_64 -# define RDRAND_LONG ".byte 0x48,0x0f,0xc7,0xf0" -# define RDSEED_LONG ".byte 0x48,0x0f,0xc7,0xf8" -#else -# define RDRAND_LONG RDRAND_INT -# define RDSEED_LONG RDSEED_INT -#endif - /* Unconditional execution of RDRAND and RDSEED */ static inline bool __must_check rdrand_long(unsigned long *v) @@ -32,9 +22,9 @@ static inline bool __must_check rdrand_long(unsigned long *v) bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; do { - asm volatile(RDRAND_LONG + asm volatile("rdrand %[out]" CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); + : CC_OUT(c) (ok), [out] "=r" (*v)); if (ok) return true; } while (--retry); @@ -46,9 +36,9 @@ static inline bool __must_check rdrand_int(unsigned int *v) bool ok; unsigned int retry = RDRAND_RETRY_LOOPS; do { - asm volatile(RDRAND_INT + asm volatile("rdrand %[out]" CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); + : CC_OUT(c) (ok), [out] "=r" (*v)); if (ok) return true; } while (--retry); @@ -58,18 +48,18 @@ static inline bool __must_check rdrand_int(unsigned int *v) static inline bool __must_check rdseed_long(unsigned long *v) { bool ok; - asm volatile(RDSEED_LONG + asm volatile("rdseed %[out]" CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); + : CC_OUT(c) (ok), [out] "=r" (*v)); return ok; } static inline bool __must_check rdseed_int(unsigned int *v) { bool ok; - asm volatile(RDSEED_INT + asm volatile("rdseed %[out]" CC_SET(c) - : CC_OUT(c) (ok), "=a" (*v)); + : CC_OUT(c) (ok), [out] "=r" (*v)); return ok; } diff --git a/arch/x86/include/asm/asm-prototypes.h b/arch/x86/include/asm/asm-prototypes.h index ce92c4acc913..9bf2620ce817 100644 --- a/arch/x86/include/asm/asm-prototypes.h +++ b/arch/x86/include/asm/asm-prototypes.h @@ -17,24 +17,19 @@ extern void cmpxchg8b_emu(void); #endif #ifdef CONFIG_RETPOLINE -#ifdef CONFIG_X86_32 -#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_e ## reg(void); -#else -#define INDIRECT_THUNK(reg) extern asmlinkage void __x86_indirect_thunk_r ## reg(void); -INDIRECT_THUNK(8) -INDIRECT_THUNK(9) -INDIRECT_THUNK(10) -INDIRECT_THUNK(11) -INDIRECT_THUNK(12) -INDIRECT_THUNK(13) -INDIRECT_THUNK(14) -INDIRECT_THUNK(15) -#endif -INDIRECT_THUNK(ax) -INDIRECT_THUNK(bx) -INDIRECT_THUNK(cx) -INDIRECT_THUNK(dx) -INDIRECT_THUNK(si) -INDIRECT_THUNK(di) -INDIRECT_THUNK(bp) + +#define DECL_INDIRECT_THUNK(reg) \ + extern asmlinkage void __x86_indirect_thunk_ ## reg (void); + +#define DECL_RETPOLINE(reg) \ + extern asmlinkage void __x86_retpoline_ ## reg (void); + +#undef GEN +#define GEN(reg) DECL_INDIRECT_THUNK(reg) +#include <asm/GEN-for-each-reg.h> + +#undef GEN +#define GEN(reg) DECL_RETPOLINE(reg) +#include <asm/GEN-for-each-reg.h> + #endif /* CONFIG_RETPOLINE */ diff --git a/arch/x86/include/asm/audit.h b/arch/x86/include/asm/audit.h new file mode 100644 index 000000000000..36aec57ea7a3 --- /dev/null +++ b/arch/x86/include/asm/audit.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AUDIT_H +#define _ASM_X86_AUDIT_H + +int ia32_classify_syscall(unsigned int syscall); + +#endif /* _ASM_X86_AUDIT_H */ diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h index d79d1e622dcf..0ada98d5d09f 100644 --- a/arch/x86/include/asm/checksum.h +++ b/arch/x86/include/asm/checksum.h @@ -1,4 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1 +#define HAVE_CSUM_COPY_USER #ifdef CONFIG_X86_32 # include <asm/checksum_32.h> #else diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h index f57b94e02c57..11624c8a9d8d 100644 --- a/arch/x86/include/asm/checksum_32.h +++ b/arch/x86/include/asm/checksum_32.h @@ -44,18 +44,21 @@ static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL); } -static inline __wsum csum_partial_copy_from_user(const void __user *src, - void *dst, - int len, __wsum sum, - int *err_ptr) +static inline __wsum csum_and_copy_from_user(const void __user *src, + void *dst, int len, + __wsum sum, int *err_ptr) { __wsum ret; might_sleep(); - stac(); + if (!user_access_begin(src, len)) { + if (len) + *err_ptr = -EFAULT; + return sum; + } ret = csum_partial_copy_generic((__force void *)src, dst, len, sum, err_ptr, NULL); - clac(); + user_access_end(); return ret; } @@ -173,7 +176,6 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, /* * Copy and checksum to user */ -#define HAVE_CSUM_COPY_USER static inline __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, __wsum sum, @@ -182,11 +184,10 @@ static inline __wsum csum_and_copy_to_user(const void *src, __wsum ret; might_sleep(); - if (access_ok(dst, len)) { - stac(); + if (user_access_begin(dst, len)) { ret = csum_partial_copy_generic(src, (__force void *)dst, len, sum, NULL, err_ptr); - clac(); + user_access_end(); return ret; } diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h index 3ec6d3267cf9..0a289b87e872 100644 --- a/arch/x86/include/asm/checksum_64.h +++ b/arch/x86/include/asm/checksum_64.h @@ -129,27 +129,19 @@ static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, */ extern __wsum csum_partial(const void *buff, int len, __wsum sum); -#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1 -#define HAVE_CSUM_COPY_USER 1 - - /* Do not call this directly. Use the wrappers below */ extern __visible __wsum csum_partial_copy_generic(const void *src, const void *dst, int len, __wsum sum, int *src_err_ptr, int *dst_err_ptr); -extern __wsum csum_partial_copy_from_user(const void __user *src, void *dst, +extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum isum, int *errp); -extern __wsum csum_partial_copy_to_user(const void *src, void __user *dst, +extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len, __wsum isum, int *errp); extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum); -/* Old names. To be removed. */ -#define csum_and_copy_to_user csum_partial_copy_to_user -#define csum_and_copy_from_user csum_partial_copy_from_user - /** * ip_compute_csum - Compute an 16bit IP checksum. * @buff: buffer address. diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 52e9f3480f69..d4edf281fff4 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -214,7 +214,11 @@ static inline bool in_compat_syscall(void) #endif struct compat_siginfo; -int __copy_siginfo_to_user32(struct compat_siginfo __user *to, - const kernel_siginfo_t *from, bool x32_ABI); + +#ifdef CONFIG_X86_X32_ABI +int copy_siginfo_to_user32(struct compat_siginfo __user *to, + const kernel_siginfo_t *from); +#define copy_siginfo_to_user32 copy_siginfo_to_user32 +#endif /* CONFIG_X86_X32_ABI */ #endif /* _ASM_X86_COMPAT_H */ diff --git a/arch/x86/include/asm/cpu_device_id.h b/arch/x86/include/asm/cpu_device_id.h index cf3d621c6892..eb8fcede9e3b 100644 --- a/arch/x86/include/asm/cpu_device_id.h +++ b/arch/x86/include/asm/cpu_device_id.h @@ -20,12 +20,14 @@ #define X86_CENTAUR_FAM6_C7_D 0xd #define X86_CENTAUR_FAM6_NANO 0xf +#define X86_STEPPINGS(mins, maxs) GENMASK(maxs, mins) /** - * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Base macro for CPU matching + * X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE - Base macro for CPU matching * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY * The name is expanded to X86_VENDOR_@_vendor * @_family: The family number or X86_FAMILY_ANY * @_model: The model number, model constant or X86_MODEL_ANY + * @_steppings: Bitmask for steppings, stepping constant or X86_STEPPING_ANY * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY * @_data: Driver specific data or NULL. The internal storage * format is unsigned long. The supplied value, pointer @@ -37,16 +39,35 @@ * into another macro at the usage site for good reasons, then please * start this local macro with X86_MATCH to allow easy grepping. */ -#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(_vendor, _family, _model, \ - _feature, _data) { \ +#define X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(_vendor, _family, _model, \ + _steppings, _feature, _data) { \ .vendor = X86_VENDOR_##_vendor, \ .family = _family, \ .model = _model, \ + .steppings = _steppings, \ .feature = _feature, \ .driver_data = (unsigned long) _data \ } /** + * X86_MATCH_VENDOR_FAM_MODEL_FEATURE - Macro for CPU matching + * @_vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY + * The name is expanded to X86_VENDOR_@_vendor + * @_family: The family number or X86_FAMILY_ANY + * @_model: The model number, model constant or X86_MODEL_ANY + * @_feature: A X86_FEATURE bit or X86_FEATURE_ANY + * @_data: Driver specific data or NULL. The internal storage + * format is unsigned long. The supplied value, pointer + * etc. is casted to unsigned long internally. + * + * The steppings arguments of X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE() is + * set to wildcards. + */ +#define X86_MATCH_VENDOR_FAM_MODEL_FEATURE(vendor, family, model, feature, data) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(vendor, family, model, \ + X86_STEPPING_ANY, feature, data) + +/** * X86_MATCH_VENDOR_FAM_FEATURE - Macro for matching vendor, family and CPU feature * @vendor: The vendor name, e.g. INTEL, AMD, HYGON, ..., ANY * The name is expanded to X86_VENDOR_@vendor @@ -139,6 +160,10 @@ #define X86_MATCH_INTEL_FAM6_MODEL(model, data) \ X86_MATCH_VENDOR_FAM_MODEL(INTEL, 6, INTEL_FAM6_##model, data) +#define X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(model, steppings, data) \ + X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, INTEL_FAM6_##model, \ + steppings, X86_FEATURE_ANY, data) + /* * Match specific microcode revisions. * diff --git a/arch/x86/include/asm/delay.h b/arch/x86/include/asm/delay.h index de9e7841f953..630891d25819 100644 --- a/arch/x86/include/asm/delay.h +++ b/arch/x86/include/asm/delay.h @@ -3,8 +3,10 @@ #define _ASM_X86_DELAY_H #include <asm-generic/delay.h> +#include <linux/init.h> -void use_tsc_delay(void); +void __init use_tsc_delay(void); +void __init use_tpause_delay(void); void use_mwaitx_delay(void); #endif /* _ASM_X86_DELAY_H */ diff --git a/arch/x86/include/asm/doublefault.h b/arch/x86/include/asm/doublefault.h index af9a14ac8962..54a6e4a2e132 100644 --- a/arch/x86/include/asm/doublefault.h +++ b/arch/x86/include/asm/doublefault.h @@ -2,7 +2,7 @@ #ifndef _ASM_X86_DOUBLEFAULT_H #define _ASM_X86_DOUBLEFAULT_H -#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT) +#ifdef CONFIG_X86_32 extern void doublefault_init_cpu_tss(void); #else static inline void doublefault_init_cpu_tss(void) diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8391c115c0ec..89dcc7aa7e2c 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -9,6 +9,7 @@ #include <asm/nospec-branch.h> #include <asm/mmu_context.h> #include <linux/build_bug.h> +#include <linux/kernel.h> extern unsigned long efi_fw_vendor, efi_config_table; @@ -225,14 +226,21 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, /* arch specific definitions used by the stub code */ -__attribute_const__ bool efi_is_64bit(void); +#ifdef CONFIG_EFI_MIXED + +#define ARCH_HAS_EFISTUB_WRAPPERS + +static inline bool efi_is_64bit(void) +{ + extern const bool efi_is64; + + return efi_is64; +} static inline bool efi_is_native(void) { if (!IS_ENABLED(CONFIG_X86_64)) return true; - if (!IS_ENABLED(CONFIG_EFI_MIXED)) - return true; return efi_is_64bit(); } @@ -286,6 +294,15 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_allocate_pool(type, size, buffer) \ ((type), (size), efi64_zero_upper(buffer)) +#define __efi64_argmap_create_event(type, tpl, f, c, event) \ + ((type), (tpl), (f), (c), efi64_zero_upper(event)) + +#define __efi64_argmap_set_timer(event, type, time) \ + ((event), (type), lower_32_bits(time), upper_32_bits(time)) + +#define __efi64_argmap_wait_for_event(num, event, index) \ + ((num), (event), efi64_zero_upper(index)) + #define __efi64_argmap_handle_protocol(handle, protocol, interface) \ ((handle), (protocol), efi64_zero_upper(interface)) @@ -307,6 +324,10 @@ static inline u32 efi64_convert_status(efi_status_t status) #define __efi64_argmap_load_file(protocol, path, policy, bufsize, buf) \ ((protocol), (path), (policy), efi64_zero_upper(bufsize), (buf)) +/* Graphics Output Protocol */ +#define __efi64_argmap_query_mode(gop, mode, size, info) \ + ((gop), (mode), efi64_zero_upper(size), efi64_zero_upper(info)) + /* * The macros below handle the plumbing for the argument mapping. To add a * mapping for a specific EFI method, simply define a macro @@ -335,15 +356,26 @@ static inline u32 efi64_convert_status(efi_status_t status) #define efi_bs_call(func, ...) \ (efi_is_native() \ - ? efi_system_table()->boottime->func(__VA_ARGS__) \ - : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ - boottime), func, __VA_ARGS__)) + ? efi_system_table->boottime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table, \ + boottime), \ + func, __VA_ARGS__)) #define efi_rt_call(func, ...) \ (efi_is_native() \ - ? efi_system_table()->runtime->func(__VA_ARGS__) \ - : __efi64_thunk_map(efi_table_attr(efi_system_table(), \ - runtime), func, __VA_ARGS__)) + ? efi_system_table->runtime->func(__VA_ARGS__) \ + : __efi64_thunk_map(efi_table_attr(efi_system_table, \ + runtime), \ + func, __VA_ARGS__)) + +#else /* CONFIG_EFI_MIXED */ + +static inline bool efi_is_64bit(void) +{ + return IS_ENABLED(CONFIG_X86_64); +} + +#endif /* CONFIG_EFI_MIXED */ extern bool efi_reboot_required(void); extern bool efi_is_table_address(unsigned long phys_addr); diff --git a/arch/x86/include/asm/floppy.h b/arch/x86/include/asm/floppy.h index 7ec59edde154..d43717b423cb 100644 --- a/arch/x86/include/asm/floppy.h +++ b/arch/x86/include/asm/floppy.h @@ -31,8 +31,8 @@ #define CSW fd_routine[can_use_virtual_dma & 1] -#define fd_inb(port) inb_p(port) -#define fd_outb(value, port) outb_p(value, port) +#define fd_inb(base, reg) inb_p((base) + (reg)) +#define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) #define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy") #define fd_free_dma() CSW._free_dma(FLOPPY_DMA) @@ -77,25 +77,26 @@ static irqreturn_t floppy_hardint(int irq, void *dev_id) st = 1; for (lcount = virtual_dma_count, lptr = virtual_dma_addr; lcount; lcount--, lptr++) { - st = inb(virtual_dma_port + 4) & 0xa0; - if (st != 0xa0) + st = inb(virtual_dma_port + FD_STATUS); + st &= STATUS_DMA | STATUS_READY; + if (st != (STATUS_DMA | STATUS_READY)) break; if (virtual_dma_mode) - outb_p(*lptr, virtual_dma_port + 5); + outb_p(*lptr, virtual_dma_port + FD_DATA); else - *lptr = inb_p(virtual_dma_port + 5); + *lptr = inb_p(virtual_dma_port + FD_DATA); } virtual_dma_count = lcount; virtual_dma_addr = lptr; - st = inb(virtual_dma_port + 4); + st = inb(virtual_dma_port + FD_STATUS); } #ifdef TRACE_FLPY_INT calls++; #endif - if (st == 0x20) + if (st == STATUS_DMA) return IRQ_HANDLED; - if (!(st & 0x20)) { + if (!(st & STATUS_DMA)) { virtual_dma_residue += virtual_dma_count; virtual_dma_count = 0; #ifdef TRACE_FLPY_INT diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 44c48e34d799..42159f45bf9c 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -31,7 +31,8 @@ extern void fpu__save(struct fpu *fpu); extern int fpu__restore_sig(void __user *buf, int ia32_frame); extern void fpu__drop(struct fpu *fpu); extern int fpu__copy(struct task_struct *dst, struct task_struct *src); -extern void fpu__clear(struct fpu *fpu); +extern void fpu__clear_user_states(struct fpu *fpu); +extern void fpu__clear_all(struct fpu *fpu); extern int fpu__exception_code(struct fpu *fpu, int trap_nr); extern int dump_fpu(struct pt_regs *ptregs, struct user_i387_struct *fpstate); @@ -92,7 +93,7 @@ static inline void fpstate_init_xstate(struct xregs_state *xsave) * XRSTORS requires these bits set in xcomp_bv, or it will * trigger #GP: */ - xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask; + xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask_all; } static inline void fpstate_init_fxstate(struct fxregs_state *fx) @@ -399,7 +400,10 @@ static inline int copy_kernel_to_xregs_err(struct xregs_state *xstate, u64 mask) u32 hmask = mask >> 32; int err; - XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); + if (static_cpu_has(X86_FEATURE_XSAVES)) + XSTATE_OP(XRSTORS, xstate, lmask, hmask, err); + else + XSTATE_OP(XRSTOR, xstate, lmask, hmask, err); return err; } diff --git a/arch/x86/include/asm/fpu/xstate.h b/arch/x86/include/asm/fpu/xstate.h index c6136d79f8c0..422d8369012a 100644 --- a/arch/x86/include/asm/fpu/xstate.h +++ b/arch/x86/include/asm/fpu/xstate.h @@ -21,19 +21,29 @@ #define XSAVE_YMM_SIZE 256 #define XSAVE_YMM_OFFSET (XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET) -/* Supervisor features */ -#define XFEATURE_MASK_SUPERVISOR (XFEATURE_MASK_PT) - -/* All currently supported features */ -#define XCNTXT_MASK (XFEATURE_MASK_FP | \ - XFEATURE_MASK_SSE | \ - XFEATURE_MASK_YMM | \ - XFEATURE_MASK_OPMASK | \ - XFEATURE_MASK_ZMM_Hi256 | \ - XFEATURE_MASK_Hi16_ZMM | \ - XFEATURE_MASK_PKRU | \ - XFEATURE_MASK_BNDREGS | \ - XFEATURE_MASK_BNDCSR) +/* All currently supported user features */ +#define XFEATURE_MASK_USER_SUPPORTED (XFEATURE_MASK_FP | \ + XFEATURE_MASK_SSE | \ + XFEATURE_MASK_YMM | \ + XFEATURE_MASK_OPMASK | \ + XFEATURE_MASK_ZMM_Hi256 | \ + XFEATURE_MASK_Hi16_ZMM | \ + XFEATURE_MASK_PKRU | \ + XFEATURE_MASK_BNDREGS | \ + XFEATURE_MASK_BNDCSR) + +/* All currently supported supervisor features */ +#define XFEATURE_MASK_SUPERVISOR_SUPPORTED (0) + +/* + * Unsupported supervisor features. When a supervisor feature in this mask is + * supported in the future, move it to the supported supervisor feature mask. + */ +#define XFEATURE_MASK_SUPERVISOR_UNSUPPORTED (XFEATURE_MASK_PT) + +/* All supervisor states including supported and unsupported states. */ +#define XFEATURE_MASK_SUPERVISOR_ALL (XFEATURE_MASK_SUPERVISOR_SUPPORTED | \ + XFEATURE_MASK_SUPERVISOR_UNSUPPORTED) #ifdef CONFIG_X86_64 #define REX_PREFIX "0x48, " @@ -41,7 +51,18 @@ #define REX_PREFIX #endif -extern u64 xfeatures_mask; +extern u64 xfeatures_mask_all; + +static inline u64 xfeatures_mask_supervisor(void) +{ + return xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_SUPPORTED; +} + +static inline u64 xfeatures_mask_user(void) +{ + return xfeatures_mask_all & XFEATURE_MASK_USER_SUPPORTED; +} + extern u64 xstate_fx_sw_bytes[USER_XSTATE_FX_SW_WORDS]; extern void __init update_regset_xstate_info(unsigned int size, @@ -54,8 +75,9 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned int offset, unsigned int size); int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf); int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf); +void copy_supervisor_to_kernel(struct xregs_state *xsave); /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ -extern int validate_xstate_header(const struct xstate_header *hdr); +int validate_user_xstate_header(const struct xstate_header *hdr); #endif diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 29336574d0bc..7a4d2062385c 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -11,17 +11,6 @@ #include <linux/types.h> #include <asm/page.h> - -/* - * While not explicitly listed in the TLFS, Hyper-V always runs with a page size - * of 4096. These definitions are used when communicating with Hyper-V using - * guest physical pages and guest physical page addresses, since the guest page - * size may not be 4096 on all architectures. - */ -#define HV_HYP_PAGE_SHIFT 12 -#define HV_HYP_PAGE_SIZE BIT(HV_HYP_PAGE_SHIFT) -#define HV_HYP_PAGE_MASK (~(HV_HYP_PAGE_SIZE - 1)) - /* * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent * is set by CPUID(HvCpuIdFunctionVersionAndFeatures). @@ -39,78 +28,41 @@ #define HYPERV_CPUID_MAX 0x4000ffff /* - * Feature identification. EAX indicates which features are available - * to the partition based upon the current partition privileges. - * These are HYPERV_CPUID_FEATURES.EAX bits. + * Aliases for Group A features that have X64 in the name. + * On x86/x64 these are HYPERV_CPUID_FEATURES.EAX bits. */ -/* VP Runtime (HV_X64_MSR_VP_RUNTIME) available */ -#define HV_X64_MSR_VP_RUNTIME_AVAILABLE BIT(0) -/* Partition Reference Counter (HV_X64_MSR_TIME_REF_COUNT) available*/ -#define HV_MSR_TIME_REF_COUNT_AVAILABLE BIT(1) -/* - * Basic SynIC MSRs (HV_X64_MSR_SCONTROL through HV_X64_MSR_EOM - * and HV_X64_MSR_SINT0 through HV_X64_MSR_SINT15) available - */ -#define HV_X64_MSR_SYNIC_AVAILABLE BIT(2) -/* - * Synthetic Timer MSRs (HV_X64_MSR_STIMER0_CONFIG through - * HV_X64_MSR_STIMER3_COUNT) available - */ -#define HV_MSR_SYNTIMER_AVAILABLE BIT(3) -/* - * APIC access MSRs (HV_X64_MSR_EOI, HV_X64_MSR_ICR and HV_X64_MSR_TPR) - * are available - */ -#define HV_X64_MSR_APIC_ACCESS_AVAILABLE BIT(4) -/* Hypercall MSRs (HV_X64_MSR_GUEST_OS_ID and HV_X64_MSR_HYPERCALL) available*/ -#define HV_X64_MSR_HYPERCALL_AVAILABLE BIT(5) -/* Access virtual processor index MSR (HV_X64_MSR_VP_INDEX) available*/ -#define HV_X64_MSR_VP_INDEX_AVAILABLE BIT(6) -/* Virtual system reset MSR (HV_X64_MSR_RESET) is available*/ -#define HV_X64_MSR_RESET_AVAILABLE BIT(7) -/* - * Access statistics pages MSRs (HV_X64_MSR_STATS_PARTITION_RETAIL_PAGE, - * HV_X64_MSR_STATS_PARTITION_INTERNAL_PAGE, HV_X64_MSR_STATS_VP_RETAIL_PAGE, - * HV_X64_MSR_STATS_VP_INTERNAL_PAGE) available - */ -#define HV_X64_MSR_STAT_PAGES_AVAILABLE BIT(8) -/* Partition reference TSC MSR is available */ -#define HV_MSR_REFERENCE_TSC_AVAILABLE BIT(9) -/* Partition Guest IDLE MSR is available */ -#define HV_X64_MSR_GUEST_IDLE_AVAILABLE BIT(10) -/* - * There is a single feature flag that signifies if the partition has access - * to MSRs with local APIC and TSC frequencies. - */ -#define HV_X64_ACCESS_FREQUENCY_MSRS BIT(11) -/* AccessReenlightenmentControls privilege */ -#define HV_X64_ACCESS_REENLIGHTENMENT BIT(13) -/* AccessTscInvariantControls privilege */ -#define HV_X64_ACCESS_TSC_INVARIANT BIT(15) +#define HV_X64_MSR_VP_RUNTIME_AVAILABLE \ + HV_MSR_VP_RUNTIME_AVAILABLE +#define HV_X64_MSR_SYNIC_AVAILABLE \ + HV_MSR_SYNIC_AVAILABLE +#define HV_X64_MSR_APIC_ACCESS_AVAILABLE \ + HV_MSR_APIC_ACCESS_AVAILABLE +#define HV_X64_MSR_HYPERCALL_AVAILABLE \ + HV_MSR_HYPERCALL_AVAILABLE +#define HV_X64_MSR_VP_INDEX_AVAILABLE \ + HV_MSR_VP_INDEX_AVAILABLE +#define HV_X64_MSR_RESET_AVAILABLE \ + HV_MSR_RESET_AVAILABLE +#define HV_X64_MSR_GUEST_IDLE_AVAILABLE \ + HV_MSR_GUEST_IDLE_AVAILABLE +#define HV_X64_ACCESS_FREQUENCY_MSRS \ + HV_ACCESS_FREQUENCY_MSRS +#define HV_X64_ACCESS_REENLIGHTENMENT \ + HV_ACCESS_REENLIGHTENMENT +#define HV_X64_ACCESS_TSC_INVARIANT \ + HV_ACCESS_TSC_INVARIANT /* - * Feature identification: indicates which flags were specified at partition - * creation. The format is the same as the partition creation flag structure - * defined in section Partition Creation Flags. - * These are HYPERV_CPUID_FEATURES.EBX bits. + * Aliases for Group B features that have X64 in the name. + * On x86/x64 these are HYPERV_CPUID_FEATURES.EBX bits. */ -#define HV_X64_CREATE_PARTITIONS BIT(0) -#define HV_X64_ACCESS_PARTITION_ID BIT(1) -#define HV_X64_ACCESS_MEMORY_POOL BIT(2) -#define HV_X64_ADJUST_MESSAGE_BUFFERS BIT(3) -#define HV_X64_POST_MESSAGES BIT(4) -#define HV_X64_SIGNAL_EVENTS BIT(5) -#define HV_X64_CREATE_PORT BIT(6) -#define HV_X64_CONNECT_PORT BIT(7) -#define HV_X64_ACCESS_STATS BIT(8) -#define HV_X64_DEBUGGING BIT(11) -#define HV_X64_CPU_POWER_MANAGEMENT BIT(12) +#define HV_X64_POST_MESSAGES HV_POST_MESSAGES +#define HV_X64_SIGNAL_EVENTS HV_SIGNAL_EVENTS /* - * Feature identification. EDX indicates which miscellaneous features - * are available to the partition. - * These are HYPERV_CPUID_FEATURES.EDX bits. + * Group D Features. The bit assignments are custom to each architecture. + * On x86/x64 these are HYPERV_CPUID_FEATURES.EDX bits. */ /* The MWAIT instruction is available (per section MONITOR / MWAIT) */ #define HV_X64_MWAIT_AVAILABLE BIT(0) @@ -131,6 +83,8 @@ #define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE BIT(8) /* Crash MSR available */ #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE BIT(10) +/* Support for debug MSRs available */ +#define HV_FEATURE_DEBUG_MSRS_AVAILABLE BIT(11) /* stimer Direct Mode is available */ #define HV_STIMER_DIRECT_MODE_AVAILABLE BIT(19) @@ -187,7 +141,7 @@ * processor, except for virtual processors that are reported as sibling SMT * threads. */ -#define HV_X64_NO_NONARCH_CORESHARING BIT(18) +#define HV_X64_NO_NONARCH_CORESHARING BIT(18) /* Nested features. These are HYPERV_CPUID_NESTED_FEATURES.EAX bits. */ #define HV_X64_NESTED_DIRECT_FLUSH BIT(17) @@ -295,43 +249,6 @@ union hv_x64_msr_hypercall_contents { } __packed; }; -/* - * TSC page layout. - */ -struct ms_hyperv_tsc_page { - volatile u32 tsc_sequence; - u32 reserved1; - volatile u64 tsc_scale; - volatile s64 tsc_offset; - u64 reserved2[509]; -} __packed; - -/* - * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is - * specified in the Hyper-V specification: - * - * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx - * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * - * - * Bit(s) - * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 - * 55:48 - Distro specific identification - * 47:16 - Linux kernel version number - * 15:0 - Distro specific identification - * - * - */ - -#define HV_LINUX_VENDOR_ID 0x8100 - struct hv_reenlightenment_control { __u64 vector:8; __u64 reserved1:8; @@ -355,31 +272,12 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) -/* - * Crash notification (HV_X64_MSR_CRASH_CTL) flags. - */ -#define HV_CRASH_CTL_CRASH_NOTIFY_MSG BIT_ULL(62) -#define HV_CRASH_CTL_CRASH_NOTIFY BIT_ULL(63) #define HV_X64_MSR_CRASH_PARAMS \ (1 + (HV_X64_MSR_CRASH_P4 - HV_X64_MSR_CRASH_P0)) #define HV_IPI_LOW_VECTOR 0x10 #define HV_IPI_HIGH_VECTOR 0xff -/* Declare the various hypercall operations. */ -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 -#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 -#define HVCALL_SEND_IPI 0x000b -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 -#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 -#define HVCALL_SEND_IPI_EX 0x0015 -#define HVCALL_POST_MESSAGE 0x005c -#define HVCALL_SIGNAL_EVENT 0x005d -#define HVCALL_RETARGET_INTERRUPT 0x007e -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af -#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0 - #define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12 #define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \ @@ -391,75 +289,6 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_TSC_REFERENCE_ENABLE 0x00000001 #define HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT 12 -#define HV_PROCESSOR_POWER_STATE_C0 0 -#define HV_PROCESSOR_POWER_STATE_C1 1 -#define HV_PROCESSOR_POWER_STATE_C2 2 -#define HV_PROCESSOR_POWER_STATE_C3 3 - -#define HV_FLUSH_ALL_PROCESSORS BIT(0) -#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1) -#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2) -#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) - -enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARSE_4K, - HV_GENERIC_SET_ALL, -}; - -#define HV_PARTITION_ID_SELF ((u64)-1) - -#define HV_HYPERCALL_RESULT_MASK GENMASK_ULL(15, 0) -#define HV_HYPERCALL_FAST_BIT BIT(16) -#define HV_HYPERCALL_VARHEAD_OFFSET 17 -#define HV_HYPERCALL_REP_COMP_OFFSET 32 -#define HV_HYPERCALL_REP_COMP_MASK GENMASK_ULL(43, 32) -#define HV_HYPERCALL_REP_START_OFFSET 48 -#define HV_HYPERCALL_REP_START_MASK GENMASK_ULL(59, 48) - -/* hypercall status code */ -#define HV_STATUS_SUCCESS 0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 -#define HV_STATUS_INVALID_ALIGNMENT 4 -#define HV_STATUS_INVALID_PARAMETER 5 -#define HV_STATUS_INSUFFICIENT_MEMORY 11 -#define HV_STATUS_INVALID_PORT_ID 17 -#define HV_STATUS_INVALID_CONNECTION_ID 18 -#define HV_STATUS_INSUFFICIENT_BUFFERS 19 - -/* - * The Hyper-V TimeRefCount register and the TSC - * page provide a guest VM clock with 100ns tick rate - */ -#define HV_CLOCK_HZ (NSEC_PER_SEC/100) - -typedef struct _HV_REFERENCE_TSC_PAGE { - __u32 tsc_sequence; - __u32 res1; - __u64 tsc_scale; - __s64 tsc_offset; -} __packed HV_REFERENCE_TSC_PAGE, *PHV_REFERENCE_TSC_PAGE; - -/* Define the number of synthetic interrupt sources. */ -#define HV_SYNIC_SINT_COUNT (16) -/* Define the expected SynIC version. */ -#define HV_SYNIC_VERSION_1 (0x1) -/* Valid SynIC vectors are 16-255. */ -#define HV_SYNIC_FIRST_VALID_VECTOR (16) - -#define HV_SYNIC_CONTROL_ENABLE (1ULL << 0) -#define HV_SYNIC_SIMP_ENABLE (1ULL << 0) -#define HV_SYNIC_SIEFP_ENABLE (1ULL << 0) -#define HV_SYNIC_SINT_MASKED (1ULL << 16) -#define HV_SYNIC_SINT_AUTO_EOI (1ULL << 17) -#define HV_SYNIC_SINT_VECTOR_MASK (0xFF) - -#define HV_SYNIC_STIMER_COUNT (4) - -/* Define synthetic interrupt controller message constants. */ -#define HV_MESSAGE_SIZE (256) -#define HV_MESSAGE_PAYLOAD_BYTE_COUNT (240) -#define HV_MESSAGE_PAYLOAD_QWORD_COUNT (30) /* Define hypervisor message types. */ enum hv_message_type { @@ -470,76 +299,25 @@ enum hv_message_type { HVMSG_GPA_INTERCEPT = 0x80000001, /* Timer notification messages. */ - HVMSG_TIMER_EXPIRED = 0x80000010, + HVMSG_TIMER_EXPIRED = 0x80000010, /* Error messages. */ HVMSG_INVALID_VP_REGISTER_VALUE = 0x80000020, HVMSG_UNRECOVERABLE_EXCEPTION = 0x80000021, - HVMSG_UNSUPPORTED_FEATURE = 0x80000022, + HVMSG_UNSUPPORTED_FEATURE = 0x80000022, /* Trace buffer complete messages. */ HVMSG_EVENTLOG_BUFFERCOMPLETE = 0x80000040, /* Platform-specific processor intercept messages. */ - HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, + HVMSG_X64_IOPORT_INTERCEPT = 0x80010000, HVMSG_X64_MSR_INTERCEPT = 0x80010001, - HVMSG_X64_CPUID_INTERCEPT = 0x80010002, + HVMSG_X64_CPUID_INTERCEPT = 0x80010002, HVMSG_X64_EXCEPTION_INTERCEPT = 0x80010003, - HVMSG_X64_APIC_EOI = 0x80010004, - HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 -}; - -/* Define synthetic interrupt controller message flags. */ -union hv_message_flags { - __u8 asu8; - struct { - __u8 msg_pending:1; - __u8 reserved:7; - } __packed; -}; - -/* Define port identifier type. */ -union hv_port_id { - __u32 asu32; - struct { - __u32 id:24; - __u32 reserved:8; - } __packed u; + HVMSG_X64_APIC_EOI = 0x80010004, + HVMSG_X64_LEGACY_FP_ERROR = 0x80010005 }; -/* Define synthetic interrupt controller message header. */ -struct hv_message_header { - __u32 message_type; - __u8 payload_size; - union hv_message_flags message_flags; - __u8 reserved[2]; - union { - __u64 sender; - union hv_port_id port; - }; -} __packed; - -/* Define synthetic interrupt controller message format. */ -struct hv_message { - struct hv_message_header header; - union { - __u64 payload[HV_MESSAGE_PAYLOAD_QWORD_COUNT]; - } u; -} __packed; - -/* Define the synthetic interrupt message page layout. */ -struct hv_message_page { - struct hv_message sint_message[HV_SYNIC_SINT_COUNT]; -} __packed; - -/* Define timer message payload structure. */ -struct hv_timer_message_payload { - __u32 timer_index; - __u32 reserved; - __u64 expiration_time; /* When the timer expired */ - __u64 delivery_time; /* When the message was delivered */ -} __packed; - struct hv_nested_enlightenments_control { struct { __u32 directhypercall:1; @@ -767,187 +545,11 @@ struct hv_enlightened_vmcs { #define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF -/* Define synthetic interrupt controller flag constants. */ -#define HV_EVENT_FLAGS_COUNT (256 * 8) -#define HV_EVENT_FLAGS_LONG_COUNT (256 / sizeof(unsigned long)) - -/* - * Synthetic timer configuration. - */ -union hv_stimer_config { - u64 as_uint64; - struct { - u64 enable:1; - u64 periodic:1; - u64 lazy:1; - u64 auto_enable:1; - u64 apic_vector:8; - u64 direct_mode:1; - u64 reserved_z0:3; - u64 sintx:4; - u64 reserved_z1:44; - } __packed; -}; - - -/* Define the synthetic interrupt controller event flags format. */ -union hv_synic_event_flags { - unsigned long flags[HV_EVENT_FLAGS_LONG_COUNT]; -}; - -/* Define SynIC control register. */ -union hv_synic_scontrol { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:63; - } __packed; -}; - -/* Define synthetic interrupt source. */ -union hv_synic_sint { - u64 as_uint64; - struct { - u64 vector:8; - u64 reserved1:8; - u64 masked:1; - u64 auto_eoi:1; - u64 polling:1; - u64 reserved2:45; - } __packed; -}; - -/* Define the format of the SIMP register */ -union hv_synic_simp { - u64 as_uint64; - struct { - u64 simp_enabled:1; - u64 preserved:11; - u64 base_simp_gpa:52; - } __packed; -}; - -/* Define the format of the SIEFP register */ -union hv_synic_siefp { - u64 as_uint64; - struct { - u64 siefp_enabled:1; - u64 preserved:11; - u64 base_siefp_gpa:52; - } __packed; -}; - -struct hv_vpset { - u64 format; - u64 valid_bank_mask; - u64 bank_contents[]; -} __packed; - -/* HvCallSendSyntheticClusterIpi hypercall */ -struct hv_send_ipi { - u32 vector; - u32 reserved; - u64 cpu_mask; -} __packed; - -/* HvCallSendSyntheticClusterIpiEx hypercall */ -struct hv_send_ipi_ex { - u32 vector; - u32 reserved; - struct hv_vpset vp_set; -} __packed; - -/* HvFlushGuestPhysicalAddressSpace hypercalls */ -struct hv_guest_mapping_flush { - u64 address_space; - u64 flags; -} __packed; - -/* - * HV_MAX_FLUSH_PAGES = "additional_pages" + 1. It's limited - * by the bitwidth of "additional_pages" in union hv_gpa_page_range. - */ -#define HV_MAX_FLUSH_PAGES (2048) - -/* HvFlushGuestPhysicalAddressList hypercall */ -union hv_gpa_page_range { - u64 address_space; - struct { - u64 additional_pages:11; - u64 largepage:1; - u64 basepfn:52; - } page; -}; - -/* - * All input flush parameters should be in single page. The max flush - * count is equal with how many entries of union hv_gpa_page_range can - * be populated into the input parameter page. - */ -#define HV_MAX_FLUSH_REP_COUNT ((HV_HYP_PAGE_SIZE - 2 * sizeof(u64)) / \ - sizeof(union hv_gpa_page_range)) - -struct hv_guest_mapping_flush_list { - u64 address_space; - u64 flags; - union hv_gpa_page_range gpa_list[HV_MAX_FLUSH_REP_COUNT]; -}; - -/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */ -struct hv_tlb_flush { - u64 address_space; - u64 flags; - u64 processor_mask; - u64 gva_list[]; -} __packed; - -/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */ -struct hv_tlb_flush_ex { - u64 address_space; - u64 flags; - struct hv_vpset hv_vp_set; - u64 gva_list[]; -} __packed; - struct hv_partition_assist_pg { u32 tlb_lock_count; }; -union hv_msi_entry { - u64 as_uint64; - struct { - u32 address; - u32 data; - } __packed; -}; - -struct hv_interrupt_entry { - u32 source; /* 1 for MSI(-X) */ - u32 reserved1; - union hv_msi_entry msi_entry; -} __packed; -/* - * flags for hv_device_interrupt_target.flags - */ -#define HV_DEVICE_INTERRUPT_TARGET_MULTICAST 1 -#define HV_DEVICE_INTERRUPT_TARGET_PROCESSOR_SET 2 - -struct hv_device_interrupt_target { - u32 vector; - u32 flags; - union { - u64 vp_mask; - struct hv_vpset vp_set; - }; -} __packed; +#include <asm-generic/hyperv-tlfs.h> -/* HvRetargetDeviceInterrupt hypercall */ -struct hv_retarget_device_interrupt { - u64 partition_id; /* use "self" */ - u64 device_id; - struct hv_interrupt_entry int_entry; - u64 reserved2; - struct hv_device_interrupt_target int_target; -} __packed __aligned(8); #endif diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 8e5af119dc2d..de58391bdee0 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -88,11 +88,17 @@ static inline bool intel_mid_has_msic(void) return (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL); } +extern void intel_scu_devices_create(void); +extern void intel_scu_devices_destroy(void); + #else /* !CONFIG_X86_INTEL_MID */ #define intel_mid_identify_cpu() 0 #define intel_mid_has_msic() 0 +static inline void intel_scu_devices_create(void) { } +static inline void intel_scu_devices_destroy(void) { } + #endif /* !CONFIG_X86_INTEL_MID */ enum intel_mid_timer_options { @@ -115,9 +121,6 @@ extern enum intel_mid_timer_options intel_mid_timer_options; #define SFI_MTMR_MAX_NUM 8 #define SFI_MRTC_MAX 8 -extern void intel_scu_devices_create(void); -extern void intel_scu_devices_destroy(void); - /* VRTC timer */ #define MRST_VRTC_MAP_SZ 1024 /* #define MRST_VRTC_PGOFFSET 0xc00 */ diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h deleted file mode 100644 index e6da1ce26256..000000000000 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ /dev/null @@ -1,59 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_INTEL_PMC_IPC_H_ -#define _ASM_X86_INTEL_PMC_IPC_H_ - -/* Commands */ -#define PMC_IPC_PMIC_ACCESS 0xFF -#define PMC_IPC_PMIC_ACCESS_READ 0x0 -#define PMC_IPC_PMIC_ACCESS_WRITE 0x1 -#define PMC_IPC_USB_PWR_CTRL 0xF0 -#define PMC_IPC_PMIC_BLACKLIST_SEL 0xEF -#define PMC_IPC_PHY_CONFIG 0xEE -#define PMC_IPC_NORTHPEAK_CTRL 0xED -#define PMC_IPC_PM_DEBUG 0xEC -#define PMC_IPC_PMC_TELEMTRY 0xEB -#define PMC_IPC_PMC_FW_MSG_CTRL 0xEA - -/* IPC return code */ -#define IPC_ERR_NONE 0 -#define IPC_ERR_CMD_NOT_SUPPORTED 1 -#define IPC_ERR_CMD_NOT_SERVICED 2 -#define IPC_ERR_UNABLE_TO_SERVICE 3 -#define IPC_ERR_CMD_INVALID 4 -#define IPC_ERR_CMD_FAILED 5 -#define IPC_ERR_EMSECURITY 6 -#define IPC_ERR_UNSIGNEDKERNEL 7 - -/* GCR reg offsets from gcr base*/ -#define PMC_GCR_PMC_CFG_REG 0x08 -#define PMC_GCR_TELEM_DEEP_S0IX_REG 0x78 -#define PMC_GCR_TELEM_SHLW_S0IX_REG 0x80 - -#if IS_ENABLED(CONFIG_INTEL_PMC_IPC) - -int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen); -int intel_pmc_s0ix_counter_read(u64 *data); -int intel_pmc_gcr_read64(u32 offset, u64 *data); - -#else - -static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, - u32 *out, u32 outlen) -{ - return -EINVAL; -} - -static inline int intel_pmc_s0ix_counter_read(u64 *data) -{ - return -EINVAL; -} - -static inline int intel_pmc_gcr_read64(u32 offset, u64 *data) -{ - return -EINVAL; -} - -#endif /*CONFIG_INTEL_PMC_IPC*/ - -#endif diff --git a/arch/x86/include/asm/intel_scu_ipc.h b/arch/x86/include/asm/intel_scu_ipc.h index 2a1442ba6e78..11d457af68c5 100644 --- a/arch/x86/include/asm/intel_scu_ipc.h +++ b/arch/x86/include/asm/intel_scu_ipc.h @@ -2,61 +2,69 @@ #ifndef _ASM_X86_INTEL_SCU_IPC_H_ #define _ASM_X86_INTEL_SCU_IPC_H_ -#include <linux/notifier.h> - -#define IPCMSG_INDIRECT_READ 0x02 -#define IPCMSG_INDIRECT_WRITE 0x05 - -#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */ - -#define IPCMSG_WARM_RESET 0xF0 -#define IPCMSG_COLD_RESET 0xF1 -#define IPCMSG_SOFT_RESET 0xF2 -#define IPCMSG_COLD_BOOT 0xF3 - -#define IPCMSG_VRTC 0xFA /* Set vRTC device */ - /* Command id associated with message IPCMSG_VRTC */ - #define IPC_CMD_VRTC_SETTIME 1 /* Set time */ - #define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */ - -/* Read single register */ -int intel_scu_ipc_ioread8(u16 addr, u8 *data); - -/* Read a vector */ -int intel_scu_ipc_readv(u16 *addr, u8 *data, int len); - -/* Write single register */ -int intel_scu_ipc_iowrite8(u16 addr, u8 data); - -/* Write a vector */ -int intel_scu_ipc_writev(u16 *addr, u8 *data, int len); - -/* Update single register based on the mask */ -int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask); - -/* Issue commands to the SCU with or without data */ -int intel_scu_ipc_simple_command(int cmd, int sub); -int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, - u32 *out, int outlen); - -extern struct blocking_notifier_head intel_scu_notifier; - -static inline void intel_scu_notifier_add(struct notifier_block *nb) -{ - blocking_notifier_chain_register(&intel_scu_notifier, nb); -} - -static inline void intel_scu_notifier_remove(struct notifier_block *nb) -{ - blocking_notifier_chain_unregister(&intel_scu_notifier, nb); -} - -static inline int intel_scu_notifier_post(unsigned long v, void *p) +#include <linux/ioport.h> + +struct device; +struct intel_scu_ipc_dev; + +/** + * struct intel_scu_ipc_data - Data used to configure SCU IPC + * @mem: Base address of SCU IPC MMIO registers + * @irq: The IRQ number used for SCU (optional) + */ +struct intel_scu_ipc_data { + struct resource mem; + int irq; +}; + +struct intel_scu_ipc_dev * +__intel_scu_ipc_register(struct device *parent, + const struct intel_scu_ipc_data *scu_data, + struct module *owner); + +#define intel_scu_ipc_register(parent, scu_data) \ + __intel_scu_ipc_register(parent, scu_data, THIS_MODULE) + +void intel_scu_ipc_unregister(struct intel_scu_ipc_dev *scu); + +struct intel_scu_ipc_dev * +__devm_intel_scu_ipc_register(struct device *parent, + const struct intel_scu_ipc_data *scu_data, + struct module *owner); + +#define devm_intel_scu_ipc_register(parent, scu_data) \ + __devm_intel_scu_ipc_register(parent, scu_data, THIS_MODULE) + +struct intel_scu_ipc_dev *intel_scu_ipc_dev_get(void); +void intel_scu_ipc_dev_put(struct intel_scu_ipc_dev *scu); +struct intel_scu_ipc_dev *devm_intel_scu_ipc_dev_get(struct device *dev); + +int intel_scu_ipc_dev_ioread8(struct intel_scu_ipc_dev *scu, u16 addr, + u8 *data); +int intel_scu_ipc_dev_iowrite8(struct intel_scu_ipc_dev *scu, u16 addr, + u8 data); +int intel_scu_ipc_dev_readv(struct intel_scu_ipc_dev *scu, u16 *addr, + u8 *data, size_t len); +int intel_scu_ipc_dev_writev(struct intel_scu_ipc_dev *scu, u16 *addr, + u8 *data, size_t len); + +int intel_scu_ipc_dev_update(struct intel_scu_ipc_dev *scu, u16 addr, + u8 data, u8 mask); + +int intel_scu_ipc_dev_simple_command(struct intel_scu_ipc_dev *scu, int cmd, + int sub); +int intel_scu_ipc_dev_command_with_size(struct intel_scu_ipc_dev *scu, int cmd, + int sub, const void *in, size_t inlen, + size_t size, void *out, size_t outlen); + +static inline int intel_scu_ipc_dev_command(struct intel_scu_ipc_dev *scu, int cmd, + int sub, const void *in, size_t inlen, + void *out, size_t outlen) { - return blocking_notifier_call_chain(&intel_scu_notifier, v, p); + return intel_scu_ipc_dev_command_with_size(scu, cmd, sub, in, inlen, + inlen, out, outlen); } -#define SCU_AVAILABLE 1 -#define SCU_DOWN 2 +#include <asm/intel_scu_ipc_legacy.h> #endif diff --git a/arch/x86/include/asm/intel_scu_ipc_legacy.h b/arch/x86/include/asm/intel_scu_ipc_legacy.h new file mode 100644 index 000000000000..4cf13fecb673 --- /dev/null +++ b/arch/x86/include/asm/intel_scu_ipc_legacy.h @@ -0,0 +1,91 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_INTEL_SCU_IPC_LEGACY_H_ +#define _ASM_X86_INTEL_SCU_IPC_LEGACY_H_ + +#include <linux/notifier.h> + +#define IPCMSG_INDIRECT_READ 0x02 +#define IPCMSG_INDIRECT_WRITE 0x05 + +#define IPCMSG_COLD_OFF 0x80 /* Only for Tangier */ + +#define IPCMSG_WARM_RESET 0xF0 +#define IPCMSG_COLD_RESET 0xF1 +#define IPCMSG_SOFT_RESET 0xF2 +#define IPCMSG_COLD_BOOT 0xF3 + +#define IPCMSG_VRTC 0xFA /* Set vRTC device */ +/* Command id associated with message IPCMSG_VRTC */ +#define IPC_CMD_VRTC_SETTIME 1 /* Set time */ +#define IPC_CMD_VRTC_SETALARM 2 /* Set alarm */ + +/* Don't call these in new code - they will be removed eventually */ + +/* Read single register */ +static inline int intel_scu_ipc_ioread8(u16 addr, u8 *data) +{ + return intel_scu_ipc_dev_ioread8(NULL, addr, data); +} + +/* Read a vector */ +static inline int intel_scu_ipc_readv(u16 *addr, u8 *data, int len) +{ + return intel_scu_ipc_dev_readv(NULL, addr, data, len); +} + +/* Write single register */ +static inline int intel_scu_ipc_iowrite8(u16 addr, u8 data) +{ + return intel_scu_ipc_dev_iowrite8(NULL, addr, data); +} + +/* Write a vector */ +static inline int intel_scu_ipc_writev(u16 *addr, u8 *data, int len) +{ + return intel_scu_ipc_dev_writev(NULL, addr, data, len); +} + +/* Update single register based on the mask */ +static inline int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask) +{ + return intel_scu_ipc_dev_update(NULL, addr, data, mask); +} + +/* Issue commands to the SCU with or without data */ +static inline int intel_scu_ipc_simple_command(int cmd, int sub) +{ + return intel_scu_ipc_dev_simple_command(NULL, cmd, sub); +} + +static inline int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen, + u32 *out, int outlen) +{ + /* New API takes both inlen and outlen as bytes so convert here */ + size_t inbytes = inlen * sizeof(u32); + size_t outbytes = outlen * sizeof(u32); + + return intel_scu_ipc_dev_command_with_size(NULL, cmd, sub, in, inbytes, + inlen, out, outbytes); +} + +extern struct blocking_notifier_head intel_scu_notifier; + +static inline void intel_scu_notifier_add(struct notifier_block *nb) +{ + blocking_notifier_chain_register(&intel_scu_notifier, nb); +} + +static inline void intel_scu_notifier_remove(struct notifier_block *nb) +{ + blocking_notifier_chain_unregister(&intel_scu_notifier, nb); +} + +static inline int intel_scu_notifier_post(unsigned long v, void *p) +{ + return blocking_notifier_call_chain(&intel_scu_notifier, v, p); +} + +#define SCU_AVAILABLE 1 +#define SCU_DOWN 2 + +#endif diff --git a/arch/x86/include/asm/intel_telemetry.h b/arch/x86/include/asm/intel_telemetry.h index 2f77e31a1283..8046e70dfd7c 100644 --- a/arch/x86/include/asm/intel_telemetry.h +++ b/arch/x86/include/asm/intel_telemetry.h @@ -10,6 +10,8 @@ #define TELEM_MAX_EVENTS_SRAM 28 #define TELEM_MAX_OS_ALLOCATED_EVENTS 20 +#include <asm/intel_scu_ipc.h> + enum telemetry_unit { TELEM_PSS = 0, TELEM_IOSS, @@ -51,6 +53,8 @@ struct telemetry_plt_config { struct telemetry_unit_config ioss_config; struct mutex telem_trace_lock; struct mutex telem_lock; + struct intel_pmc_dev *pmc; + struct intel_scu_ipc_dev *scu; bool telem_in_use; }; @@ -92,7 +96,7 @@ int telemetry_set_pltdata(const struct telemetry_core_ops *ops, int telemetry_clear_pltdata(void); -int telemetry_pltconfig_valid(void); +struct telemetry_plt_config *telemetry_get_pltdata(void); int telemetry_get_evtname(enum telemetry_unit telem_unit, const char **name, int len); diff --git a/arch/x86/include/asm/invpcid.h b/arch/x86/include/asm/invpcid.h index 989cfa86de85..734482afbf81 100644 --- a/arch/x86/include/asm/invpcid.h +++ b/arch/x86/include/asm/invpcid.h @@ -12,12 +12,9 @@ static inline void __invpcid(unsigned long pcid, unsigned long addr, * stale TLB entries and, especially if we're flushing global * mappings, we don't want the compiler to reorder any subsequent * memory accesses before the TLB flush. - * - * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and - * invpcid (%rcx), %rax in long mode. */ - asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01" - : : "m" (desc), "a" (type), "c" (&desc) : "memory"); + asm volatile("invpcid %[desc], %[type]" + :: [desc] "m" (desc), [type] "r" (type) : "memory"); } #define INVPCID_TYPE_INDIV_ADDR 0 diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0a6b35353fc7..1da5858501ca 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -83,6 +83,10 @@ #define KVM_REQ_GET_VMCS12_PAGES KVM_ARCH_REQ(24) #define KVM_REQ_APICV_UPDATE \ KVM_ARCH_REQ_FLAGS(25, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_TLB_FLUSH_CURRENT KVM_ARCH_REQ(26) +#define KVM_REQ_HV_TLB_FLUSH \ + KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_APF_READY KVM_ARCH_REQ(28) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ @@ -107,15 +111,8 @@ #define UNMAPPED_GVA (~(gpa_t)0) /* KVM Hugepage definitions for x86 */ -enum { - PT_PAGE_TABLE_LEVEL = 1, - PT_DIRECTORY_LEVEL = 2, - PT_PDPE_LEVEL = 3, - /* set max level to the biggest one */ - PT_MAX_HUGEPAGE_LEVEL = PT_PDPE_LEVEL, -}; -#define KVM_NR_PAGE_SIZES (PT_MAX_HUGEPAGE_LEVEL - \ - PT_PAGE_TABLE_LEVEL + 1) +#define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G +#define KVM_NR_PAGE_SIZES (KVM_MAX_HUGEPAGE_LEVEL - PG_LEVEL_4K + 1) #define KVM_HPAGE_GFN_SHIFT(x) (((x) - 1) * 9) #define KVM_HPAGE_SHIFT(x) (PAGE_SHIFT + KVM_HPAGE_GFN_SHIFT(x)) #define KVM_HPAGE_SIZE(x) (1UL << KVM_HPAGE_SHIFT(x)) @@ -124,7 +121,7 @@ enum { static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) { - /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ + /* KVM_HPAGE_GFN_SHIFT(PG_LEVEL_4K) must be 0. */ return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); } @@ -164,9 +161,13 @@ enum kvm_reg { NR_VCPU_REGS, VCPU_EXREG_PDPTR = NR_VCPU_REGS, + VCPU_EXREG_CR0, VCPU_EXREG_CR3, + VCPU_EXREG_CR4, VCPU_EXREG_RFLAGS, VCPU_EXREG_SEGMENTS, + VCPU_EXREG_EXIT_INFO_1, + VCPU_EXREG_EXIT_INFO_2, }; enum { @@ -182,8 +183,10 @@ enum { enum exit_fastpath_completion { EXIT_FASTPATH_NONE, - EXIT_FASTPATH_SKIP_EMUL_INS, + EXIT_FASTPATH_REENTER_GUEST, + EXIT_FASTPATH_EXIT_HANDLED, }; +typedef enum exit_fastpath_completion fastpath_t; struct x86_emulate_ctxt; struct x86_exception; @@ -372,12 +375,12 @@ struct rsvd_bits_validate { }; struct kvm_mmu_root_info { - gpa_t cr3; + gpa_t pgd; hpa_t hpa; }; #define KVM_MMU_ROOT_INFO_INVALID \ - ((struct kvm_mmu_root_info) { .cr3 = INVALID_PAGE, .hpa = INVALID_PAGE }) + ((struct kvm_mmu_root_info) { .pgd = INVALID_PAGE, .hpa = INVALID_PAGE }) #define KVM_MMU_NUM_PREV_ROOTS 3 @@ -403,7 +406,7 @@ struct kvm_mmu { void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte); hpa_t root_hpa; - gpa_t root_cr3; + gpa_t root_pgd; union kvm_mmu_role mmu_role; u8 root_level; u8 shadow_root_level; @@ -598,6 +601,7 @@ struct kvm_vcpu_arch { u64 ia32_xss; u64 microcode_version; u64 arch_capabilities; + u64 perf_capabilities; /* * Paging state of the vcpu @@ -650,7 +654,6 @@ struct kvm_vcpu_arch { u64 xcr0; u64 guest_supported_xcr0; - u32 guest_xstate_size; struct kvm_pio_request pio; void *pio_data; @@ -680,6 +683,7 @@ struct kvm_vcpu_arch { struct kvm_cpuid_entry2 cpuid_entries[KVM_MAX_CPUID_ENTRIES]; int maxphyaddr; + int tdp_level; /* emulate context */ @@ -703,6 +707,7 @@ struct kvm_vcpu_arch { struct gfn_to_pfn_cache cache; } st; + u64 l1_tsc_offset; u64 tsc_offset; u64 last_guest_tsc; u64 last_host_tsc; @@ -762,14 +767,17 @@ struct kvm_vcpu_arch { struct { bool halted; - gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)]; + gfn_t gfns[ASYNC_PF_PER_VCPU]; struct gfn_to_hva_cache data; - u64 msr_val; + u64 msr_en_val; /* MSR_KVM_ASYNC_PF_EN */ + u64 msr_int_val; /* MSR_KVM_ASYNC_PF_INT */ + u16 vec; u32 id; bool send_user_only; - u32 host_apf_reason; + u32 host_apf_flags; unsigned long nested_apf_token; bool delivery_as_pf_vmexit; + bool pageready_pending; } apf; /* OSVW MSRs (AMD only) */ @@ -855,6 +863,18 @@ struct kvm_apic_map { struct kvm_lapic *phys_map[]; }; +/* Hyper-V synthetic debugger (SynDbg)*/ +struct kvm_hv_syndbg { + struct { + u64 control; + u64 status; + u64 send_page; + u64 recv_page; + u64 pending_page; + } control; + u64 options; +}; + /* Hyper-V emulation context */ struct kvm_hv { struct mutex hv_lock; @@ -866,7 +886,7 @@ struct kvm_hv { u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS]; u64 hv_crash_ctl; - HV_REFERENCE_TSC_PAGE tsc_ref; + struct ms_hyperv_tsc_page tsc_ref; struct idr conn_to_evt; @@ -878,6 +898,7 @@ struct kvm_hv { atomic_t num_mismatched_vp_indexes; struct hv_partition_assist_pg *hv_pa_pg; + struct kvm_hv_syndbg hv_syndbg; }; enum kvm_irqchip_mode { @@ -1028,6 +1049,8 @@ struct kvm_vcpu_stat { u64 irq_injections; u64 nmi_injections; u64 req_event; + u64 halt_poll_success_ns; + u64 halt_poll_fail_ns; }; struct x86_instruction_info; @@ -1059,7 +1082,7 @@ struct kvm_x86_ops { void (*hardware_disable)(void); void (*hardware_unsetup)(void); bool (*cpu_has_accelerated_tpr)(void); - bool (*has_emulated_msr)(int index); + bool (*has_emulated_msr)(u32 index); void (*cpuid_update)(struct kvm_vcpu *vcpu); unsigned int vm_size; @@ -1085,8 +1108,6 @@ struct kvm_x86_ops { void (*set_segment)(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); void (*get_cs_db_l_bits)(struct kvm_vcpu *vcpu, int *db, int *l); - void (*decache_cr0_guest_bits)(struct kvm_vcpu *vcpu); - void (*decache_cr4_guest_bits)(struct kvm_vcpu *vcpu); void (*set_cr0)(struct kvm_vcpu *vcpu, unsigned long cr0); int (*set_cr4)(struct kvm_vcpu *vcpu, unsigned long cr4); void (*set_efer)(struct kvm_vcpu *vcpu, u64 efer); @@ -1100,7 +1121,8 @@ struct kvm_x86_ops { unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); - void (*tlb_flush)(struct kvm_vcpu *vcpu, bool invalidate_gpa); + void (*tlb_flush_all)(struct kvm_vcpu *vcpu); + void (*tlb_flush_current)(struct kvm_vcpu *vcpu); int (*tlb_remote_flush)(struct kvm *kvm); int (*tlb_remote_flush_with_range)(struct kvm *kvm, struct kvm_tlb_range *range); @@ -1113,7 +1135,13 @@ struct kvm_x86_ops { */ void (*tlb_flush_gva)(struct kvm_vcpu *vcpu, gva_t addr); - void (*run)(struct kvm_vcpu *vcpu); + /* + * Flush any TLB entries created by the guest. Like tlb_flush_gva(), + * does not need to flush GPA->HPA mappings. + */ + void (*tlb_flush_guest)(struct kvm_vcpu *vcpu); + + enum exit_fastpath_completion (*run)(struct kvm_vcpu *vcpu); int (*handle_exit)(struct kvm_vcpu *vcpu, enum exit_fastpath_completion exit_fastpath); int (*skip_emulated_instruction)(struct kvm_vcpu *vcpu); @@ -1126,8 +1154,8 @@ struct kvm_x86_ops { void (*set_nmi)(struct kvm_vcpu *vcpu); void (*queue_exception)(struct kvm_vcpu *vcpu); void (*cancel_injection)(struct kvm_vcpu *vcpu); - int (*interrupt_allowed)(struct kvm_vcpu *vcpu); - int (*nmi_allowed)(struct kvm_vcpu *vcpu); + int (*interrupt_allowed)(struct kvm_vcpu *vcpu, bool for_injection); + int (*nmi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); bool (*get_nmi_mask)(struct kvm_vcpu *vcpu); void (*set_nmi_mask)(struct kvm_vcpu *vcpu, bool masked); void (*enable_nmi_window)(struct kvm_vcpu *vcpu); @@ -1141,7 +1169,7 @@ struct kvm_x86_ops { bool (*guest_apic_has_interrupt)(struct kvm_vcpu *vcpu); void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap); void (*set_virtual_apic_mode)(struct kvm_vcpu *vcpu); - void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); + void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu); int (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); @@ -1153,7 +1181,6 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); - u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); /* Returns actual tsc_offset set in active VMCS */ u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); @@ -1163,10 +1190,8 @@ struct kvm_x86_ops { struct x86_instruction_info *info, enum x86_intercept_stage stage, struct x86_exception *exception); - void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion *exit_fastpath); + void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu); - int (*check_nested_events)(struct kvm_vcpu *vcpu); void (*request_immediate_exit)(struct kvm_vcpu *vcpu); void (*sched_in)(struct kvm_vcpu *kvm, int cpu); @@ -1199,6 +1224,7 @@ struct kvm_x86_ops { /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; + const struct kvm_x86_nested_ops *nested_ops; /* * Architecture specific hooks for vCPU blocking due to @@ -1226,18 +1252,10 @@ struct kvm_x86_ops { void (*setup_mce)(struct kvm_vcpu *vcpu); - int (*get_nested_state)(struct kvm_vcpu *vcpu, - struct kvm_nested_state __user *user_kvm_nested_state, - unsigned user_data_size); - int (*set_nested_state)(struct kvm_vcpu *vcpu, - struct kvm_nested_state __user *user_kvm_nested_state, - struct kvm_nested_state *kvm_state); - bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); - - int (*smi_allowed)(struct kvm_vcpu *vcpu); + int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); int (*pre_enter_smm)(struct kvm_vcpu *vcpu, char *smstate); int (*pre_leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); - int (*enable_smi_window)(struct kvm_vcpu *vcpu); + void (*enable_smi_window)(struct kvm_vcpu *vcpu); int (*mem_enc_op)(struct kvm *kvm, void __user *argp); int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp); @@ -1245,14 +1263,28 @@ struct kvm_x86_ops { int (*get_msr_feature)(struct kvm_msr_entry *entry); - int (*nested_enable_evmcs)(struct kvm_vcpu *vcpu, - uint16_t *vmcs_version); - uint16_t (*nested_get_evmcs_version)(struct kvm_vcpu *vcpu); - bool (*need_emulation_on_page_fault)(struct kvm_vcpu *vcpu); bool (*apic_init_signal_blocked)(struct kvm_vcpu *vcpu); int (*enable_direct_tlbflush)(struct kvm_vcpu *vcpu); + + void (*migrate_timers)(struct kvm_vcpu *vcpu); +}; + +struct kvm_x86_nested_ops { + int (*check_events)(struct kvm_vcpu *vcpu); + bool (*hv_timer_pending)(struct kvm_vcpu *vcpu); + int (*get_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + unsigned user_data_size); + int (*set_state)(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + struct kvm_nested_state *kvm_state); + bool (*get_vmcs12_pages)(struct kvm_vcpu *vcpu); + + int (*enable_evmcs)(struct kvm_vcpu *vcpu, + uint16_t *vmcs_version); + uint16_t (*get_evmcs_version)(struct kvm_vcpu *vcpu); }; struct kvm_x86_init_ops { @@ -1279,8 +1311,7 @@ extern struct kmem_cache *x86_fpu_cache; #define __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { - return __vmalloc(kvm_x86_ops.vm_size, - GFP_KERNEL_ACCOUNT | __GFP_ZERO, PAGE_KERNEL); + return __vmalloc(kvm_x86_ops.vm_size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); } void kvm_arch_free_vm(struct kvm *kvm); @@ -1452,6 +1483,8 @@ void kvm_queue_exception_p(struct kvm_vcpu *vcpu, unsigned nr, unsigned long pay void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault); +bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, + struct x86_exception *fault); int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t gfn, void *data, int offset, int len, u32 access); @@ -1479,6 +1512,8 @@ void kvm_pic_clear_all(struct kvm_pic *pic, int irq_source_id); void kvm_inject_nmi(struct kvm_vcpu *vcpu); +void kvm_update_dr7(struct kvm_vcpu *vcpu); + int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn); int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva); void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu); @@ -1509,8 +1544,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu); int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code, void *insn, int insn_len); void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva); +void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gva_t gva, hpa_t root_hpa); void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid); -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush); +void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush, + bool skip_mmu_sync); void kvm_configure_mmu(bool enable_tdp, int tdp_page_level); @@ -1574,8 +1612,6 @@ enum { }; #define HF_GIF_MASK (1 << 0) -#define HF_HIF_MASK (1 << 1) -#define HF_VINTR_MASK (1 << 2) #define HF_NMI_MASK (1 << 3) #define HF_IRET_MASK (1 << 4) #define HF_GUEST_MASK (1 << 5) /* VCPU is in guest-mode */ @@ -1641,7 +1677,8 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work); -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu); +void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu); +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu); extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn); int kvm_skip_emulated_instruction(struct kvm_vcpu *vcpu); diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 9b4df6eaa11a..57fd1966c4ea 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -88,11 +88,21 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1, bool kvm_para_available(void); unsigned int kvm_arch_para_features(void); unsigned int kvm_arch_para_hints(void); -void kvm_async_pf_task_wait(u32 token, int interrupt_kernel); +void kvm_async_pf_task_wait_schedule(u32 token); void kvm_async_pf_task_wake(u32 token); -u32 kvm_read_and_reset_pf_reason(void); -extern void kvm_disable_steal_time(void); -void do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address); +u32 kvm_read_and_reset_apf_flags(void); +void kvm_disable_steal_time(void); +bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token); + +DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled); + +static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) +{ + if (static_branch_unlikely(&kvm_async_pf_enabled)) + return __kvm_handle_async_pf(regs, token); + else + return false; +} #ifdef CONFIG_PARAVIRT_SPINLOCKS void __init kvm_spinlock_init(void); @@ -103,7 +113,7 @@ static inline void kvm_spinlock_init(void) #endif /* CONFIG_PARAVIRT_SPINLOCKS */ #else /* CONFIG_KVM_GUEST */ -#define kvm_async_pf_task_wait(T, I) do {} while(0) +#define kvm_async_pf_task_wait_schedule(T) do {} while(0) #define kvm_async_pf_task_wake(T) do {} while(0) static inline bool kvm_para_available(void) @@ -121,7 +131,7 @@ static inline unsigned int kvm_arch_para_hints(void) return 0; } -static inline u32 kvm_read_and_reset_pf_reason(void) +static inline u32 kvm_read_and_reset_apf_flags(void) { return 0; } @@ -130,6 +140,11 @@ static inline void kvm_disable_steal_time(void) { return; } + +static inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token) +{ + return false; +} #endif #endif /* _ASM_X86_KVM_PARA_H */ diff --git a/arch/x86/include/asm/mmzone_32.h b/arch/x86/include/asm/mmzone_32.h index 73d8dd14dda2..2d4515e8b7df 100644 --- a/arch/x86/include/asm/mmzone_32.h +++ b/arch/x86/include/asm/mmzone_32.h @@ -14,43 +14,4 @@ extern struct pglist_data *node_data[]; #define NODE_DATA(nid) (node_data[nid]) #endif /* CONFIG_NUMA */ -#ifdef CONFIG_DISCONTIGMEM - -/* - * generic node memory support, the following assumptions apply: - * - * 1) memory comes in 64Mb contiguous chunks which are either present or not - * 2) we will not have more than 64Gb in total - * - * for now assume that 64Gb is max amount of RAM for whole system - * 64Gb / 4096bytes/page = 16777216 pages - */ -#define MAX_NR_PAGES 16777216 -#define MAX_SECTIONS 1024 -#define PAGES_PER_SECTION (MAX_NR_PAGES/MAX_SECTIONS) - -extern s8 physnode_map[]; - -static inline int pfn_to_nid(unsigned long pfn) -{ -#ifdef CONFIG_NUMA - return((int) physnode_map[(pfn) / PAGES_PER_SECTION]); -#else - return 0; -#endif -} - -static inline int pfn_valid(int pfn) -{ - int nid = pfn_to_nid(pfn); - - if (nid >= 0) - return (pfn < node_end_pfn(nid)); - return 0; -} - -#define early_pfn_valid(pfn) pfn_valid((pfn)) - -#endif /* CONFIG_DISCONTIGMEM */ - #endif /* _ASM_X86_MMZONE_32_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 12c9684d59ba..ef452b817f44 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -301,6 +301,9 @@ #define MSR_PP1_ENERGY_STATUS 0x00000641 #define MSR_PP1_POLICY 0x00000642 +#define MSR_AMD_PKG_ENERGY_STATUS 0xc001029b +#define MSR_AMD_RAPL_POWER_UNIT 0xc0010299 + /* Config TDP MSRs */ #define MSR_CONFIG_TDP_NOMINAL 0x00000648 #define MSR_CONFIG_TDP_LEVEL_1 0x00000649 diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index b809f117f3f4..73d997aa2966 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -20,8 +20,10 @@ #define MWAIT_ECX_INTERRUPT_BREAK 0x1 #define MWAITX_ECX_TIMER_ENABLE BIT(1) -#define MWAITX_MAX_LOOPS ((u32)-1) +#define MWAITX_MAX_WAIT_CYCLES UINT_MAX #define MWAITX_DISABLE_CSTATES 0xf0 +#define TPAUSE_C01_STATE 1 +#define TPAUSE_C02_STATE 0 u32 get_umwait_control_msr(void); @@ -122,4 +124,24 @@ static inline void mwait_idle_with_hints(unsigned long eax, unsigned long ecx) current_clr_polling(); } +/* + * Caller can specify whether to enter C0.1 (low latency, less + * power saving) or C0.2 state (saves more power, but longer wakeup + * latency). This may be overridden by the IA32_UMWAIT_CONTROL MSR + * which can force requests for C0.2 to be downgraded to C0.1. + */ +static inline void __tpause(u32 ecx, u32 edx, u32 eax) +{ + /* "tpause %ecx, %edx, %eax;" */ + #ifdef CONFIG_AS_TPAUSE + asm volatile("tpause %%ecx\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #else + asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n" + : + : "c"(ecx), "d"(edx), "a"(eax)); + #endif +} + #endif /* _ASM_X86_MWAIT_H */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7e9a281e2660..d52d1aacdd97 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -4,20 +4,13 @@ #define _ASM_X86_NOSPEC_BRANCH_H_ #include <linux/static_key.h> +#include <linux/frame.h> #include <asm/alternative.h> #include <asm/alternative-asm.h> #include <asm/cpufeatures.h> #include <asm/msr-index.h> - -/* - * This should be used immediately before a retpoline alternative. It tells - * objtool where the retpolines are so that it can make sense of the control - * flow by just reading the original instruction(s) and ignoring the - * alternatives. - */ -#define ANNOTATE_NOSPEC_ALTERNATIVE \ - ANNOTATE_IGNORE_ALTERNATIVE +#include <asm/unwind_hints.h> /* * Fill the CPU return stack buffer. @@ -46,21 +39,25 @@ #define __FILL_RETURN_BUFFER(reg, nr, sp) \ mov $(nr/2), reg; \ 771: \ + ANNOTATE_INTRA_FUNCTION_CALL; \ call 772f; \ 773: /* speculation trap */ \ + UNWIND_HINT_EMPTY; \ pause; \ lfence; \ jmp 773b; \ 772: \ + ANNOTATE_INTRA_FUNCTION_CALL; \ call 774f; \ 775: /* speculation trap */ \ + UNWIND_HINT_EMPTY; \ pause; \ lfence; \ jmp 775b; \ 774: \ + add $(BITS_PER_LONG/8) * 2, sp; \ dec reg; \ - jnz 771b; \ - add $(BITS_PER_LONG/8) * nr, sp; + jnz 771b; #ifdef __ASSEMBLY__ @@ -77,57 +74,27 @@ .endm /* - * These are the bare retpoline primitives for indirect jmp and call. - * Do not use these directly; they only exist to make the ALTERNATIVE - * invocation below less ugly. - */ -.macro RETPOLINE_JMP reg:req - call .Ldo_rop_\@ -.Lspec_trap_\@: - pause - lfence - jmp .Lspec_trap_\@ -.Ldo_rop_\@: - mov \reg, (%_ASM_SP) - ret -.endm - -/* - * This is a wrapper around RETPOLINE_JMP so the called function in reg - * returns to the instruction after the macro. - */ -.macro RETPOLINE_CALL reg:req - jmp .Ldo_call_\@ -.Ldo_retpoline_jmp_\@: - RETPOLINE_JMP \reg -.Ldo_call_\@: - call .Ldo_retpoline_jmp_\@ -.endm - -/* * JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple * indirect jmp/call which may be susceptible to the Spectre variant 2 * attack. */ .macro JMP_NOSPEC reg:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *\reg), \ - __stringify(RETPOLINE_JMP \reg), X86_FEATURE_RETPOLINE, \ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *\reg), X86_FEATURE_RETPOLINE_AMD + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \ + __stringify(jmp __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), X86_FEATURE_RETPOLINE_AMD #else - jmp *\reg + jmp *%\reg #endif .endm .macro CALL_NOSPEC reg:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *\reg), \ - __stringify(RETPOLINE_CALL \reg), X86_FEATURE_RETPOLINE,\ - __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *\reg), X86_FEATURE_RETPOLINE_AMD + ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; call *%\reg), \ + __stringify(call __x86_retpoline_\reg), X86_FEATURE_RETPOLINE, \ + __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; call *%\reg), X86_FEATURE_RETPOLINE_AMD #else - call *\reg + call *%\reg #endif .endm @@ -137,10 +104,8 @@ */ .macro FILL_RETURN_BUFFER reg:req nr:req ftr:req #ifdef CONFIG_RETPOLINE - ANNOTATE_NOSPEC_ALTERNATIVE - ALTERNATIVE "jmp .Lskip_rsb_\@", \ - __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)) \ - \ftr + ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr + __FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP) .Lskip_rsb_\@: #endif .endm @@ -161,16 +126,16 @@ * which is ensured when CONFIG_RETPOLINE is defined. */ # define CALL_NOSPEC \ - ANNOTATE_NOSPEC_ALTERNATIVE \ ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ - "call __x86_indirect_thunk_%V[thunk_target]\n", \ + "call __x86_retpoline_%V[thunk_target]\n", \ X86_FEATURE_RETPOLINE, \ "lfence;\n" \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ X86_FEATURE_RETPOLINE_AMD) + # define THUNK_TARGET(addr) [thunk_target] "r" (addr) #else /* CONFIG_X86_32 */ @@ -180,7 +145,6 @@ * here, anyway. */ # define CALL_NOSPEC \ - ANNOTATE_NOSPEC_ALTERNATIVE \ ALTERNATIVE_2( \ ANNOTATE_RETPOLINE_SAFE \ "call *%[thunk_target]\n", \ diff --git a/arch/x86/include/asm/orc_types.h b/arch/x86/include/asm/orc_types.h index 6e060907c163..d25534940bde 100644 --- a/arch/x86/include/asm/orc_types.h +++ b/arch/x86/include/asm/orc_types.h @@ -58,8 +58,7 @@ #define ORC_TYPE_CALL 0 #define ORC_TYPE_REGS 1 #define ORC_TYPE_REGS_IRET 2 -#define UNWIND_HINT_TYPE_SAVE 3 -#define UNWIND_HINT_TYPE_RESTORE 4 +#define UNWIND_HINT_TYPE_RET_OFFSET 3 #ifndef __ASSEMBLY__ /* diff --git a/arch/x86/include/asm/pgtable-2level_types.h b/arch/x86/include/asm/pgtable-2level_types.h index 6deb6cd236e3..7f6ccff0ba72 100644 --- a/arch/x86/include/asm/pgtable-2level_types.h +++ b/arch/x86/include/asm/pgtable-2level_types.h @@ -20,6 +20,8 @@ typedef union { #define SHARED_KERNEL_PMD 0 +#define ARCH_PAGE_TABLE_SYNC_MASK PGTBL_PMD_MODIFIED + /* * traditional i386 two-level paging structure: */ diff --git a/arch/x86/include/asm/pgtable-3level_types.h b/arch/x86/include/asm/pgtable-3level_types.h index 33845d36897c..80fbb4a9ed87 100644 --- a/arch/x86/include/asm/pgtable-3level_types.h +++ b/arch/x86/include/asm/pgtable-3level_types.h @@ -27,6 +27,8 @@ typedef union { #define SHARED_KERNEL_PMD (!static_cpu_has(X86_FEATURE_PTI)) #endif +#define ARCH_PAGE_TABLE_SYNC_MASK (SHARED_KERNEL_PMD ? 0 : PGTBL_PMD_MODIFIED) + /* * PGDIR_SHIFT determines what a top-level page table entry can map */ diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index 0dca7f7aeff2..be7b19646897 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -66,8 +66,7 @@ do { \ #endif /* !__ASSEMBLY__ */ /* - * kern_addr_valid() is (1) for FLATMEM and (0) for - * SPARSEMEM and DISCONTIGMEM + * kern_addr_valid() is (1) for FLATMEM and (0) for SPARSEMEM */ #ifdef CONFIG_FLATMEM #define kern_addr_valid(addr) (1) diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index 52e5f5f2240d..8f63efb2a2cc 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -159,4 +159,6 @@ extern unsigned int ptrs_per_p4d; #define PGD_KERNEL_START ((PAGE_SIZE / 2) / sizeof(pgd_t)) +#define ARCH_PAGE_TABLE_SYNC_MASK (pgtable_l5_enabled() ? PGTBL_PGD_MODIFIED : PGTBL_P4D_MODIFIED) + #endif /* _ASM_X86_PGTABLE_64_DEFS_H */ diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index b6606fe6cfdf..2e7c442cc618 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -194,7 +194,6 @@ enum page_cache_mode { #define _PAGE_TABLE_NOENC (__PP|__RW|_USR|___A| 0|___D| 0| 0) #define _PAGE_TABLE (__PP|__RW|_USR|___A| 0|___D| 0| 0| _ENC) #define __PAGE_KERNEL_RO (__PP| 0| 0|___A|__NX|___D| 0|___G) -#define __PAGE_KERNEL_RX (__PP| 0| 0|___A| 0|___D| 0|___G) #define __PAGE_KERNEL_NOCACHE (__PP|__RW| 0|___A|__NX|___D| 0|___G| __NC) #define __PAGE_KERNEL_VVAR (__PP| 0|_USR|___A|__NX|___D| 0|___G) #define __PAGE_KERNEL_LARGE (__PP|__RW| 0|___A|__NX|___D|_PSE|___G) @@ -220,7 +219,6 @@ enum page_cache_mode { #define PAGE_KERNEL_RO __pgprot_mask(__PAGE_KERNEL_RO | _ENC) #define PAGE_KERNEL_EXEC __pgprot_mask(__PAGE_KERNEL_EXEC | _ENC) #define PAGE_KERNEL_EXEC_NOENC __pgprot_mask(__PAGE_KERNEL_EXEC | 0) -#define PAGE_KERNEL_RX __pgprot_mask(__PAGE_KERNEL_RX | _ENC) #define PAGE_KERNEL_NOCACHE __pgprot_mask(__PAGE_KERNEL_NOCACHE | _ENC) #define PAGE_KERNEL_LARGE __pgprot_mask(__PAGE_KERNEL_LARGE | _ENC) #define PAGE_KERNEL_LARGE_EXEC __pgprot_mask(__PAGE_KERNEL_LARGE_EXEC | _ENC) @@ -284,6 +282,12 @@ typedef struct pgprot { pgprotval_t pgprot; } pgprot_t; typedef struct { pgdval_t pgd; } pgd_t; +static inline pgprot_t pgprot_nx(pgprot_t prot) +{ + return __pgprot(pgprot_val(prot) | _PAGE_NX); +} +#define pgprot_nx pgprot_nx + #ifdef CONFIG_X86_PAE /* diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 3bcf27caf6c9..29ee0c088009 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -113,9 +113,10 @@ struct cpuinfo_x86 { /* in KB - valid for CPUS which support this call: */ unsigned int x86_cache_size; int x86_cache_alignment; /* In bytes */ - /* Cache QoS architectural values: */ + /* Cache QoS architectural values, valid only on the BSP: */ int x86_cache_max_rmid; /* max index */ int x86_cache_occ_scale; /* scale to bytes */ + int x86_cache_mbm_width_offset; int x86_power; unsigned long loops_per_jiffy; /* cpuid returned max cores value: */ @@ -727,7 +728,6 @@ static inline void sync_core(void) unsigned int tmp; asm volatile ( - UNWIND_HINT_SAVE "mov %%ss, %0\n\t" "pushq %q0\n\t" "pushq %%rsp\n\t" @@ -737,7 +737,6 @@ static inline void sync_core(void) "pushq %q0\n\t" "pushq $1f\n\t" "iretq\n\t" - UNWIND_HINT_RESTORE "1:" : "=&r" (tmp), ASM_CALL_CONSTRAINT : : "cc", "memory"); #endif diff --git a/arch/x86/include/asm/resctrl_sched.h b/arch/x86/include/asm/resctrl.h index f6b7fe2833cc..07603064df8f 100644 --- a/arch/x86/include/asm/resctrl_sched.h +++ b/arch/x86/include/asm/resctrl.h @@ -1,6 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_RESCTRL_SCHED_H -#define _ASM_X86_RESCTRL_SCHED_H +#ifndef _ASM_X86_RESCTRL_H +#define _ASM_X86_RESCTRL_H #ifdef CONFIG_X86_CPU_RESCTRL @@ -84,10 +84,13 @@ static inline void resctrl_sched_in(void) __resctrl_sched_in(); } +void resctrl_cpu_detect(struct cpuinfo_x86 *c); + #else static inline void resctrl_sched_in(void) {} +static inline void resctrl_cpu_detect(struct cpuinfo_x86 *c) {} #endif /* CONFIG_X86_CPU_RESCTRL */ -#endif /* _ASM_X86_RESCTRL_SCHED_H */ +#endif /* _ASM_X86_RESCTRL_H */ diff --git a/arch/x86/include/asm/smap.h b/arch/x86/include/asm/smap.h index 27c47d183f4b..8b58d6975d5d 100644 --- a/arch/x86/include/asm/smap.h +++ b/arch/x86/include/asm/smap.h @@ -57,8 +57,10 @@ static __always_inline unsigned long smap_save(void) { unsigned long flags; - asm volatile (ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC, - X86_FEATURE_SMAP) + asm volatile ("# smap_save\n\t" + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) + "pushf; pop %0; " __ASM_CLAC "\n\t" + "1:" : "=rm" (flags) : : "memory", "cc"); return flags; @@ -66,7 +68,10 @@ static __always_inline unsigned long smap_save(void) static __always_inline void smap_restore(unsigned long flags) { - asm volatile (ALTERNATIVE("", "push %0; popf", X86_FEATURE_SMAP) + asm volatile ("# smap_restore\n\t" + ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP) + "push %0; popf\n\t" + "1:" : : "g" (flags) : "memory", "cc"); } diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index bf3e34b25afc..323db6c5852a 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -3,29 +3,7 @@ #define _ASM_X86_SPINLOCK_TYPES_H #include <linux/types.h> - -#ifdef CONFIG_PARAVIRT_SPINLOCKS -#define __TICKET_LOCK_INC 2 -#define TICKET_SLOWPATH_FLAG ((__ticket_t)1) -#else -#define __TICKET_LOCK_INC 1 -#define TICKET_SLOWPATH_FLAG ((__ticket_t)0) -#endif - -#if (CONFIG_NR_CPUS < (256 / __TICKET_LOCK_INC)) -typedef u8 __ticket_t; -typedef u16 __ticketpair_t; -#else -typedef u16 __ticket_t; -typedef u32 __ticketpair_t; -#endif - -#define TICKET_LOCK_INC ((__ticket_t)__TICKET_LOCK_INC) - -#define TICKET_SHIFT (sizeof(__ticket_t) * 8) - #include <asm-generic/qspinlock_types.h> - #include <asm-generic/qrwlock_types.h> #endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h index 6ece8561ba66..8a1f5382a4ea 100644 --- a/arch/x86/include/asm/svm.h +++ b/arch/x86/include/asm/svm.h @@ -96,7 +96,6 @@ struct __attribute__ ((__packed__)) vmcb_control_area { u8 reserved_6[8]; /* Offset 0xe8 */ u64 avic_logical_id; /* Offset 0xf0 */ u64 avic_physical_id; /* Offset 0xf8 */ - u8 reserved_7[768]; }; @@ -203,8 +202,16 @@ struct __attribute__ ((__packed__)) vmcb_save_area { u64 last_excp_to; }; + +static inline void __unused_size_checks(void) +{ + BUILD_BUG_ON(sizeof(struct vmcb_save_area) != 0x298); + BUILD_BUG_ON(sizeof(struct vmcb_control_area) != 256); +} + struct __attribute__ ((__packed__)) vmcb { struct vmcb_control_area control; + u8 reserved_control[1024 - sizeof(struct vmcb_control_area)]; struct vmcb_save_area save; }; diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 0e059b73437b..9f69cc497f4b 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -12,27 +12,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -/* This runs runs on the previous thread's stack. */ -static inline void prepare_switch_to(struct task_struct *next) -{ -#ifdef CONFIG_VMAP_STACK - /* - * If we switch to a stack that has a top-level paging entry - * that is not present in the current mm, the resulting #PF will - * will be promoted to a double-fault and we'll panic. Probe - * the new stack now so that vmalloc_fault can fix up the page - * tables if needed. This can only happen if we use a stack - * in vmap space. - * - * We assume that the stack is aligned so that it never spans - * more than one top-level paging entry. - * - * To minimize cache pollution, just follow the stack pointer. - */ - READ_ONCE(*(unsigned char *)next->thread.sp); -#endif -} - asmlinkage void ret_from_fork(void); /* @@ -67,8 +46,6 @@ struct fork_frame { #define switch_to(prev, next, last) \ do { \ - prepare_switch_to(next); \ - \ ((last) = __switch_to_asm((prev), (next))); \ } while (0) diff --git a/arch/x86/include/asm/traps.h b/arch/x86/include/asm/traps.h index c26a7e1d8a2c..2ae904bf25e4 100644 --- a/arch/x86/include/asm/traps.h +++ b/arch/x86/include/asm/traps.h @@ -69,9 +69,7 @@ dotraplinkage void do_overflow(struct pt_regs *regs, long error_code); dotraplinkage void do_bounds(struct pt_regs *regs, long error_code); dotraplinkage void do_invalid_op(struct pt_regs *regs, long error_code); dotraplinkage void do_device_not_available(struct pt_regs *regs, long error_code); -#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT) dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsigned long cr2); -#endif dotraplinkage void do_coprocessor_segment_overrun(struct pt_regs *regs, long error_code); dotraplinkage void do_invalid_TSS(struct pt_regs *regs, long error_code); dotraplinkage void do_segment_not_present(struct pt_regs *regs, long error_code); @@ -118,11 +116,6 @@ void smp_spurious_interrupt(struct pt_regs *regs); void smp_error_interrupt(struct pt_regs *regs); asmlinkage void smp_irq_move_cleanup_interrupt(void); -extern void ist_enter(struct pt_regs *regs); -extern void ist_exit(struct pt_regs *regs); -extern void ist_begin_non_atomic(struct pt_regs *regs); -extern void ist_end_non_atomic(void); - #ifdef CONFIG_VMAP_STACK void __noreturn handle_stack_overflow(const char *message, struct pt_regs *regs, diff --git a/arch/x86/include/asm/unwind_hints.h b/arch/x86/include/asm/unwind_hints.h index f5e2eb12cb71..7d903fdb3f43 100644 --- a/arch/x86/include/asm/unwind_hints.h +++ b/arch/x86/include/asm/unwind_hints.h @@ -86,32 +86,15 @@ UNWIND_HINT sp_offset=\sp_offset .endm -.macro UNWIND_HINT_SAVE - UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE -.endm - -.macro UNWIND_HINT_RESTORE - UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE +/* + * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN + * and sibling calls. On these, sp_offset denotes the expected offset from + * initial_func_cfi. + */ +.macro UNWIND_HINT_RET_OFFSET sp_offset=8 + UNWIND_HINT type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset .endm -#else /* !__ASSEMBLY__ */ - -#define UNWIND_HINT(sp_reg, sp_offset, type, end) \ - "987: \n\t" \ - ".pushsection .discard.unwind_hints\n\t" \ - /* struct unwind_hint */ \ - ".long 987b - .\n\t" \ - ".short " __stringify(sp_offset) "\n\t" \ - ".byte " __stringify(sp_reg) "\n\t" \ - ".byte " __stringify(type) "\n\t" \ - ".byte " __stringify(end) "\n\t" \ - ".balign 4 \n\t" \ - ".popsection\n\t" - -#define UNWIND_HINT_SAVE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_SAVE, 0) - -#define UNWIND_HINT_RESTORE UNWIND_HINT(0, 0, UNWIND_HINT_TYPE_RESTORE, 0) - #endif /* __ASSEMBLY__ */ #endif /* _ASM_X86_UNWIND_HINTS_H */ diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 389174eaec79..2fcc3ac12e76 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -123,12 +123,6 @@ enum uv_memprotect { UV_MEMPROT_ALLOW_RW }; -/* - * bios calls have 6 parameters - */ -extern s64 uv_bios_call(enum uv_bios_cmd, u64, u64, u64, u64, u64); -extern s64 uv_bios_call_irqsave(enum uv_bios_cmd, u64, u64, u64, u64, u64); - extern s64 uv_bios_get_sn_info(int, int *, long *, long *, long *, long *); extern s64 uv_bios_freq_base(u64, u64 *); extern int uv_bios_mq_watchlist_alloc(unsigned long, unsigned int, @@ -146,7 +140,6 @@ extern long sn_partition_id; extern long sn_coherency_id; extern long sn_region_size; extern long system_serial_number; -#define uv_partition_coherence_id() (sn_coherency_id) extern struct kobject *sgi_uv_kobj; /* /sys/firmware/sgi_uv */ diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 45ea95ce79b4..ae587ce544f4 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -31,7 +31,6 @@ static inline bool is_early_uv_system(void) } extern int is_uv_system(void); extern int is_uv_hubbed(int uvtype); -extern int is_uv_hubless(int uvtype); extern void uv_cpu_init(void); extern void uv_nmi_init(void); extern void uv_system_init(void); @@ -44,7 +43,6 @@ static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline bool is_early_uv_system(void) { return 0; } static inline int is_uv_system(void) { return 0; } static inline int is_uv_hubbed(int uv) { return 0; } -static inline int is_uv_hubless(int uv) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } static inline const struct cpumask * diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 950cd1395d5d..60ca0afdeaf9 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -219,20 +219,6 @@ static inline struct uv_hub_info_s *uv_cpu_hub_info(int cpu) return (struct uv_hub_info_s *)uv_cpu_info_per(cpu)->p_uv_hub_info; } -#define UV_HUB_INFO_VERSION 0x7150 -extern int uv_hub_info_version(void); -static inline int uv_hub_info_check(int version) -{ - if (uv_hub_info_version() == version) - return 0; - - pr_crit("UV: uv_hub_info version(%x) mismatch, expecting(%x)\n", - uv_hub_info_version(), version); - - BUG(); /* Catastrophic - cannot continue on unknown UV system */ -} -#define _uv_hub_info_check() uv_hub_info_check(UV_HUB_INFO_VERSION) - /* * HUB revision ranges for each UV HUB architecture. * This is a software convention - NOT the hardware revision numbers in @@ -244,51 +230,32 @@ static inline int uv_hub_info_check(int version) #define UV4_HUB_REVISION_BASE 7 #define UV4A_HUB_REVISION_BASE 8 /* UV4 (fixed) rev 2 */ -/* WARNING: UVx_HUB_IS_SUPPORTED defines are deprecated and will be removed */ static inline int is_uv1_hub(void) { -#ifdef UV1_HUB_IS_SUPPORTED return is_uv_hubbed(uv(1)); -#else - return 0; -#endif } static inline int is_uv2_hub(void) { -#ifdef UV2_HUB_IS_SUPPORTED return is_uv_hubbed(uv(2)); -#else - return 0; -#endif } static inline int is_uv3_hub(void) { -#ifdef UV3_HUB_IS_SUPPORTED return is_uv_hubbed(uv(3)); -#else - return 0; -#endif } /* First test "is UV4A", then "is UV4" */ static inline int is_uv4a_hub(void) { -#ifdef UV4A_HUB_IS_SUPPORTED if (is_uv_hubbed(uv(4))) return (uv_hub_info->hub_revision == UV4A_HUB_REVISION_BASE); -#endif return 0; } static inline int is_uv4_hub(void) { -#ifdef UV4_HUB_IS_SUPPORTED return is_uv_hubbed(uv(4)); -#else - return 0; -#endif } static inline int is_uvx_hub(void) @@ -692,7 +659,6 @@ static inline int uv_cpu_blade_processor_id(int cpu) { return uv_cpu_info_per(cpu)->blade_cpu_id; } -#define _uv_cpu_blade_processor_id 1 /* indicate function available */ /* Blade number to Node number (UV1..UV4 is 1:1) */ static inline int uv_blade_to_node(int blade) @@ -856,26 +822,6 @@ static inline void uv_set_cpu_scir_bits(int cpu, unsigned char value) } extern unsigned int uv_apicid_hibits; -static unsigned long uv_hub_ipi_value(int apicid, int vector, int mode) -{ - apicid |= uv_apicid_hibits; - return (1UL << UVH_IPI_INT_SEND_SHFT) | - ((apicid) << UVH_IPI_INT_APIC_ID_SHFT) | - (mode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | - (vector << UVH_IPI_INT_VECTOR_SHFT); -} - -static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) -{ - unsigned long val; - unsigned long dmode = dest_Fixed; - - if (vector == NMI_VECTOR) - dmode = dest_NMI; - - val = uv_hub_ipi_value(apicid, vector, dmode); - uv_write_global_mmr64(pnode, UVH_IPI_INT, val); -} /* * Get the minimum revision number of the hub chips within the partition. diff --git a/arch/x86/include/asm/uv/uv_mmrs.h b/arch/x86/include/asm/uv/uv_mmrs.h index 62c79e26a59a..9ee5ed6e8b34 100644 --- a/arch/x86/include/asm/uv/uv_mmrs.h +++ b/arch/x86/include/asm/uv/uv_mmrs.h @@ -99,13 +99,6 @@ #define UV3_HUB_PART_NUMBER_X 0x4321 #define UV4_HUB_PART_NUMBER 0x99a1 -/* Compat: Indicate which UV Hubs are supported. */ -#define UV1_HUB_IS_SUPPORTED 1 -#define UV2_HUB_IS_SUPPORTED 1 -#define UV3_HUB_IS_SUPPORTED 1 -#define UV4_HUB_IS_SUPPORTED 1 -#define UV4A_HUB_IS_SUPPORTED 1 - /* Error function to catch undefined references */ extern unsigned long uv_undefined(char *str); diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 5e090d1f03f8..cd7de4b401fe 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -527,10 +527,12 @@ struct vmx_msr_entry { /* * Exit Qualifications for entry failure during or after loading guest state */ -#define ENTRY_FAIL_DEFAULT 0 -#define ENTRY_FAIL_PDPTE 2 -#define ENTRY_FAIL_NMI 3 -#define ENTRY_FAIL_VMCS_LINK_PTR 4 +enum vm_entry_failure_code { + ENTRY_FAIL_DEFAULT = 0, + ENTRY_FAIL_PDPTE = 2, + ENTRY_FAIL_NMI = 3, + ENTRY_FAIL_VMCS_LINK_PTR = 4, +}; /* * Exit Qualifications for EPT Violations diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 96d9cd208610..6807153c0410 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -50,14 +50,12 @@ struct x86_init_resources { * @pre_vector_init: init code to run before interrupt vectors * are set up. * @intr_init: interrupt init code - * @trap_init: platform specific trap setup * @intr_mode_select: interrupt delivery mode selection * @intr_mode_init: interrupt delivery mode setup */ struct x86_init_irqs { void (*pre_vector_init)(void); void (*intr_init)(void); - void (*trap_init)(void); void (*intr_mode_select)(void); void (*intr_mode_init)(void); }; diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h index 3f3f780c8c65..17c5a038f42d 100644 --- a/arch/x86/include/uapi/asm/kvm.h +++ b/arch/x86/include/uapi/asm/kvm.h @@ -385,32 +385,48 @@ struct kvm_sync_regs { #define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4) #define KVM_STATE_NESTED_FORMAT_VMX 0 -#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */ +#define KVM_STATE_NESTED_FORMAT_SVM 1 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_EVMCS 0x00000004 #define KVM_STATE_NESTED_MTF_PENDING 0x00000008 +#define KVM_STATE_NESTED_GIF_SET 0x00000100 #define KVM_STATE_NESTED_SMM_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_SMM_VMXON 0x00000002 #define KVM_STATE_NESTED_VMX_VMCS_SIZE 0x1000 +#define KVM_STATE_NESTED_SVM_VMCB_SIZE 0x1000 + +#define KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE 0x00000001 + struct kvm_vmx_nested_state_data { __u8 vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; __u8 shadow_vmcs12[KVM_STATE_NESTED_VMX_VMCS_SIZE]; }; struct kvm_vmx_nested_state_hdr { + __u32 flags; __u64 vmxon_pa; __u64 vmcs12_pa; + __u64 preemption_timer_deadline; struct { __u16 flags; } smm; }; +struct kvm_svm_nested_state_data { + /* Save area only used if KVM_STATE_NESTED_RUN_PENDING. */ + __u8 vmcb12[KVM_STATE_NESTED_SVM_VMCB_SIZE]; +}; + +struct kvm_svm_nested_state_hdr { + __u64 vmcb_pa; +}; + /* for KVM_CAP_NESTED_STATE */ struct kvm_nested_state { __u16 flags; @@ -419,6 +435,7 @@ struct kvm_nested_state { union { struct kvm_vmx_nested_state_hdr vmx; + struct kvm_svm_nested_state_hdr svm; /* Pad the header to 128 bytes. */ __u8 pad[120]; @@ -431,6 +448,7 @@ struct kvm_nested_state { */ union { struct kvm_vmx_nested_state_data vmx[0]; + struct kvm_svm_nested_state_data svm[0]; } data; }; diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 2a8e0b6b9805..812e9b4c1114 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -31,6 +31,7 @@ #define KVM_FEATURE_PV_SEND_IPI 11 #define KVM_FEATURE_POLL_CONTROL 12 #define KVM_FEATURE_PV_SCHED_YIELD 13 +#define KVM_FEATURE_ASYNC_PF_INT 14 #define KVM_HINTS_REALTIME 0 @@ -50,6 +51,8 @@ #define MSR_KVM_STEAL_TIME 0x4b564d03 #define MSR_KVM_PV_EOI_EN 0x4b564d04 #define MSR_KVM_POLL_CONTROL 0x4b564d05 +#define MSR_KVM_ASYNC_PF_INT 0x4b564d06 +#define MSR_KVM_ASYNC_PF_ACK 0x4b564d07 struct kvm_steal_time { __u64 steal; @@ -81,6 +84,11 @@ struct kvm_clock_pairing { #define KVM_ASYNC_PF_ENABLED (1 << 0) #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) #define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2) +#define KVM_ASYNC_PF_DELIVERY_AS_INT (1 << 3) + +/* MSR_KVM_ASYNC_PF_INT */ +#define KVM_ASYNC_PF_VEC_MASK GENMASK(7, 0) + /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 @@ -112,8 +120,13 @@ struct kvm_mmu_op_release_pt { #define KVM_PV_REASON_PAGE_READY 2 struct kvm_vcpu_pv_apf_data { - __u32 reason; - __u8 pad[60]; + /* Used for 'page not present' events delivered via #PF */ + __u32 flags; + + /* Used for 'page ready' events delivered via interrupt notification */ + __u32 token; + + __u8 pad[56]; __u32 enabled; }; diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index e95b72ec19bc..b8ff9e8ac0d5 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -150,6 +150,9 @@ { EXIT_REASON_UMWAIT, "UMWAIT" }, \ { EXIT_REASON_TPAUSE, "TPAUSE" } +#define VMX_EXIT_REASON_FLAGS \ + { VMX_EXIT_REASONS_FAILED_VMENTRY, "FAILED_VMENTRY" } + #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index ba89cabe5fcf..2a7c3afa62e2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -102,9 +102,7 @@ obj-$(CONFIG_KEXEC_FILE) += kexec-bzimage64.o obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o obj-y += kprobes/ obj-$(CONFIG_MODULES) += module.o -ifeq ($(CONFIG_X86_32),y) -obj-$(CONFIG_DOUBLEFAULT) += doublefault_32.o -endif +obj-$(CONFIG_X86_32) += doublefault_32.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_VM86) += vm86_32.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c index fe698f96617c..263eeaddb0aa 100644 --- a/arch/x86/kernel/apb_timer.c +++ b/arch/x86/kernel/apb_timer.c @@ -345,56 +345,3 @@ out_noapbt: apb_timer_block_enabled = 0; panic("failed to enable APB timer\n"); } - -/* called before apb_timer_enable, use early map */ -unsigned long apbt_quick_calibrate(void) -{ - int i, scale; - u64 old, new; - u64 t1, t2; - unsigned long khz = 0; - u32 loop, shift; - - apbt_set_mapping(); - dw_apb_clocksource_start(clocksource_apbt); - - /* check if the timer can count down, otherwise return */ - old = dw_apb_clocksource_read(clocksource_apbt); - i = 10000; - while (--i) { - if (old != dw_apb_clocksource_read(clocksource_apbt)) - break; - } - if (!i) - goto failed; - - /* count 16 ms */ - loop = (apbt_freq / 1000) << 4; - - /* restart the timer to ensure it won't get to 0 in the calibration */ - dw_apb_clocksource_start(clocksource_apbt); - - old = dw_apb_clocksource_read(clocksource_apbt); - old += loop; - - t1 = rdtsc(); - - do { - new = dw_apb_clocksource_read(clocksource_apbt); - } while (new < old); - - t2 = rdtsc(); - - shift = 5; - if (unlikely(loop >> shift == 0)) { - printk(KERN_INFO - "APBT TSC calibration failed, not enough resolution\n"); - return 0; - } - scale = (int)div_u64((t2 - t1), loop >> shift); - khz = (scale * (apbt_freq / 1000)) >> shift; - printk(KERN_INFO "TSC freq calculated by APB timer is %lu khz\n", khz); - return khz; -failed: - return 0; -} diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index e53dda210cd7..4b1d31be50b4 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -544,46 +544,20 @@ static struct clock_event_device lapic_clockevent = { }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); -static __init u32 hsx_deadline_rev(void) -{ - switch (boot_cpu_data.x86_stepping) { - case 0x02: return 0x3a; /* EP */ - case 0x04: return 0x0f; /* EX */ - } - - return ~0U; -} - -static __init u32 bdx_deadline_rev(void) -{ - switch (boot_cpu_data.x86_stepping) { - case 0x02: return 0x00000011; - case 0x03: return 0x0700000e; - case 0x04: return 0x0f00000c; - case 0x05: return 0x0e000003; - } - - return ~0U; -} - -static __init u32 skx_deadline_rev(void) -{ - switch (boot_cpu_data.x86_stepping) { - case 0x03: return 0x01000136; - case 0x04: return 0x02000014; - } +static const struct x86_cpu_id deadline_match[] __initconst = { + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x2, 0x2), 0x3a), /* EP */ + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(HASWELL_X, X86_STEPPINGS(0x4, 0x4), 0x0f), /* EX */ - if (boot_cpu_data.x86_stepping > 4) - return 0; + X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020), - return ~0U; -} + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x2, 0x2), 0x00000011), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x3, 0x3), 0x0700000e), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x4, 0x4), 0x0f00000c), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(BROADWELL_D, X86_STEPPINGS(0x5, 0x5), 0x0e000003), -static const struct x86_cpu_id deadline_match[] __initconst = { - X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev), - X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020), - X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev), - X86_MATCH_INTEL_FAM6_MODEL( SKYLAKE_X, &skx_deadline_rev), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x3, 0x3), 0x01000136), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x4, 0x4), 0x02000014), + X86_MATCH_INTEL_FAM6_MODEL_STEPPINGS(SKYLAKE_X, X86_STEPPINGS(0x5, 0xf), 0), X86_MATCH_INTEL_FAM6_MODEL( HASWELL, 0x22), X86_MATCH_INTEL_FAM6_MODEL( HASWELL_L, 0x20), @@ -615,14 +589,7 @@ static __init bool apic_validate_deadline_timer(void) if (!m) return true; - /* - * Function pointers will have the MSB set due to address layout, - * immediate revisions will not. - */ - if ((long)m->driver_data < 0) - rev = ((u32 (*)(void))(m->driver_data))(); - else - rev = (u32)m->driver_data; + rev = (u32)m->driver_data; if (boot_cpu_data.microcode >= rev) return true; diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 913c88617848..ce61e3e7d399 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -154,19 +154,6 @@ static inline bool mp_is_legacy_irq(int irq) return irq >= 0 && irq < nr_legacy_irqs(); } -/* - * Initialize all legacy IRQs and all pins on the first IOAPIC - * if we have legacy interrupt controller. Kernel boot option "pirq=" - * may rely on non-legacy pins on the first IOAPIC. - */ -static inline int mp_init_irq_at_boot(int ioapic, int irq) -{ - if (!nr_legacy_irqs()) - return 0; - - return ioapic == 0 || mp_is_legacy_irq(irq); -} - static inline struct irq_domain *mp_ioapic_irqdomain(int ioapic) { return ioapics[ioapic].irqdomain; diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index ad53b2abc859..69e70ed0f5e6 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -30,8 +30,6 @@ static enum uv_system_type uv_system_type; static int uv_hubbed_system; static int uv_hubless_system; static u64 gru_start_paddr, gru_end_paddr; -static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr; -static u64 gru_dist_lmask, gru_dist_umask; static union uvh_apicid uvh_apicid; /* Unpack OEM/TABLE ID's to be NULL terminated strings */ @@ -48,11 +46,9 @@ static struct { unsigned int gnode_shift; } uv_cpuid; -int uv_min_hub_revision_id; -EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); +static int uv_min_hub_revision_id; unsigned int uv_apicid_hibits; -EXPORT_SYMBOL_GPL(uv_apicid_hibits); static struct apic apic_x2apic_uv_x; static struct uv_hub_info_s uv_hub_info_node0; @@ -85,20 +81,7 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr) static inline bool is_GRU_range(u64 start, u64 end) { - if (gru_dist_base) { - u64 su = start & gru_dist_umask; /* Upper (incl pnode) bits */ - u64 sl = start & gru_dist_lmask; /* Base offset bits */ - u64 eu = end & gru_dist_umask; - u64 el = end & gru_dist_lmask; - - /* Must reside completely within a single GRU range: */ - return (sl == gru_dist_base && el == gru_dist_base && - su >= gru_first_node_paddr && - su <= gru_last_node_paddr && - eu == su); - } else { - return start >= gru_start_paddr && end <= gru_end_paddr; - } + return start >= gru_start_paddr && end <= gru_end_paddr; } static bool uv_is_untracked_pat_range(u64 start, u64 end) @@ -385,11 +368,10 @@ int is_uv_hubbed(int uvtype) } EXPORT_SYMBOL_GPL(is_uv_hubbed); -int is_uv_hubless(int uvtype) +static int is_uv_hubless(int uvtype) { return (uv_hubless_system & uvtype); } -EXPORT_SYMBOL_GPL(is_uv_hubless); void **__uv_hub_info_list; EXPORT_SYMBOL_GPL(__uv_hub_info_list); @@ -417,12 +399,6 @@ static __initdata struct uv_gam_range_s *_gr_table; #define SOCK_EMPTY ((unsigned short)~0) -extern int uv_hub_info_version(void) -{ - return UV_HUB_INFO_VERSION; -} -EXPORT_SYMBOL(uv_hub_info_version); - /* Default UV memory block size is 2GB */ static unsigned long mem_block_size __initdata = (2UL << 30); @@ -590,12 +566,21 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) static void uv_send_IPI_one(int cpu, int vector) { - unsigned long apicid; - int pnode; + unsigned long apicid = per_cpu(x86_cpu_to_apicid, cpu); + int pnode = uv_apicid_to_pnode(apicid); + unsigned long dmode, val; + + if (vector == NMI_VECTOR) + dmode = dest_NMI; + else + dmode = dest_Fixed; + + val = (1UL << UVH_IPI_INT_SEND_SHFT) | + ((apicid | uv_apicid_hibits) << UVH_IPI_INT_APIC_ID_SHFT) | + (dmode << UVH_IPI_INT_DELIVERY_MODE_SHFT) | + (vector << UVH_IPI_INT_VECTOR_SHFT); - apicid = per_cpu(x86_cpu_to_apicid, cpu); - pnode = uv_apicid_to_pnode(apicid); - uv_hub_send_ipi(pnode, apicid, vector); + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); } static void uv_send_IPI_mask(const struct cpumask *mask, int vector) @@ -797,42 +782,6 @@ static __init void map_high(char *id, unsigned long base, int pshift, int bshift init_extra_mapping_wb(paddr, bytes); } -static __init void map_gru_distributed(unsigned long c) -{ - union uvh_rh_gam_gru_overlay_config_mmr_u gru; - u64 paddr; - unsigned long bytes; - int nid; - - gru.v = c; - - /* Only base bits 42:28 relevant in dist mode */ - gru_dist_base = gru.v & 0x000007fff0000000UL; - if (!gru_dist_base) { - pr_info("UV: Map GRU_DIST base address NULL\n"); - return; - } - - bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; - gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1); - gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1); - gru_dist_base &= gru_dist_lmask; /* Clear bits above M */ - - for_each_online_node(nid) { - paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) | - gru_dist_base; - init_extra_mapping_wb(paddr, bytes); - gru_first_node_paddr = min(paddr, gru_first_node_paddr); - gru_last_node_paddr = max(paddr, gru_last_node_paddr); - } - - /* Save upper (63:M) bits of address only for is_GRU_range */ - gru_first_node_paddr &= gru_dist_umask; - gru_last_node_paddr &= gru_dist_umask; - - pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n", gru_dist_base, gru_first_node_paddr, gru_last_node_paddr); -} - static __init void map_gru_high(int max_pnode) { union uvh_rh_gam_gru_overlay_config_mmr_u gru; @@ -846,12 +795,6 @@ static __init void map_gru_high(int max_pnode) return; } - /* Only UV3 has distributed GRU mode */ - if (is_uv3_hub() && gru.s3.mode) { - map_gru_distributed(gru.v); - return; - } - base = (gru.v & mask) >> shift; map_high("GRU", base, shift, shift, max_pnode, map_wb); gru_start_paddr = ((u64)base << shift); diff --git a/arch/x86/kernel/audit_64.c b/arch/x86/kernel/audit_64.c index e1efe44ebefc..83d9cad4e68b 100644 --- a/arch/x86/kernel/audit_64.c +++ b/arch/x86/kernel/audit_64.c @@ -3,6 +3,7 @@ #include <linux/types.h> #include <linux/audit.h> #include <asm/unistd.h> +#include <asm/audit.h> static unsigned dir_class[] = { #include <asm-generic/audit_dir_write.h> @@ -41,7 +42,6 @@ int audit_classify_arch(int arch) int audit_classify_syscall(int abi, unsigned syscall) { #ifdef CONFIG_IA32_EMULATION - extern int ia32_classify_syscall(unsigned); if (abi == AUDIT_ARCH_I386) return ia32_classify_syscall(syscall); #endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 547ad7bbf0e0..d4806eac9325 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -18,6 +18,7 @@ #include <asm/pci-direct.h> #include <asm/delay.h> #include <asm/debugreg.h> +#include <asm/resctrl.h> #ifdef CONFIG_X86_64 # include <asm/mmconfig.h> @@ -597,6 +598,8 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; } } + + resctrl_cpu_detect(c); } static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) @@ -1142,8 +1145,7 @@ static const int amd_erratum_383[] = /* #1054: Instructions Retired Performance Counter May Be Inaccurate */ static const int amd_erratum_1054[] = - AMD_OSVW_ERRATUM(0, AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); - + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0, 0, 0x2f, 0xf)); static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index bed0cb83fe24..d07809286b95 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -854,30 +854,6 @@ static void init_speculation_control(struct cpuinfo_x86 *c) } } -static void init_cqm(struct cpuinfo_x86 *c) -{ - if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { - c->x86_cache_max_rmid = -1; - c->x86_cache_occ_scale = -1; - return; - } - - /* will be overridden if occupancy monitoring exists */ - c->x86_cache_max_rmid = cpuid_ebx(0xf); - - if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || - cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || - cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { - u32 eax, ebx, ecx, edx; - - /* QoS sub-leaf, EAX=0Fh, ECX=1 */ - cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); - - c->x86_cache_max_rmid = ecx; - c->x86_cache_occ_scale = ebx; - } -} - void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -945,7 +921,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c) init_scattered_cpuid_features(c); init_speculation_control(c); - init_cqm(c); /* * Clear/Set all flags overridden by options, after probe. @@ -1377,20 +1352,6 @@ static void generic_identify(struct cpuinfo_x86 *c) #endif } -static void x86_init_cache_qos(struct cpuinfo_x86 *c) -{ - /* - * The heavy lifting of max_rmid and cache_occ_scale are handled - * in get_cpu_cap(). Here we just set the max_rmid for the boot_cpu - * in case CQM bits really aren't there in this CPU. - */ - if (c != &boot_cpu_data) { - boot_cpu_data.x86_cache_max_rmid = - min(boot_cpu_data.x86_cache_max_rmid, - c->x86_cache_max_rmid); - } -} - /* * Validate that ACPI/mptables have the same information about the * effective APIC id and update the package map. @@ -1503,7 +1464,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) #endif x86_init_rdrand(c); - x86_init_cache_qos(c); setup_pku(c); /* diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index a19a680542ce..166d7c355896 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -22,6 +22,7 @@ #include <asm/cpu_device_id.h> #include <asm/cmdline.h> #include <asm/traps.h> +#include <asm/resctrl.h> #ifdef CONFIG_X86_64 #include <linux/topology.h> @@ -322,6 +323,11 @@ static void early_init_intel(struct cpuinfo_x86 *c) detect_ht_early(c); } +static void bsp_init_intel(struct cpuinfo_x86 *c) +{ + resctrl_cpu_detect(c); +} + #ifdef CONFIG_X86_32 /* * Early probe support logic for ppro memory erratum #50 @@ -961,6 +967,7 @@ static const struct cpu_dev intel_cpu_dev = { #endif .c_detect_tlb = intel_detect_tlb, .c_early_init = early_init_intel, + .c_bsp_init = bsp_init_intel, .c_init = init_intel, .c_x86_vendor = X86_VENDOR_INTEL, }; diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c index d3482eb43ff3..ad6776081e60 100644 --- a/arch/x86/kernel/cpu/match.c +++ b/arch/x86/kernel/cpu/match.c @@ -39,13 +39,18 @@ const struct x86_cpu_id *x86_match_cpu(const struct x86_cpu_id *match) const struct x86_cpu_id *m; struct cpuinfo_x86 *c = &boot_cpu_data; - for (m = match; m->vendor | m->family | m->model | m->feature; m++) { + for (m = match; + m->vendor | m->family | m->model | m->steppings | m->feature; + m++) { if (m->vendor != X86_VENDOR_ANY && c->x86_vendor != m->vendor) continue; if (m->family != X86_FAMILY_ANY && c->x86 != m->family) continue; if (m->model != X86_MODEL_ANY && c->x86_model != m->model) continue; + if (m->steppings != X86_STEPPING_ANY && + !(BIT(c->x86_stepping) & m->steppings)) + continue; if (m->feature != X86_FEATURE_ANY && !cpu_has(c, m->feature)) continue; return m; diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 54165f3569e8..e9265e2f28c9 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -42,6 +42,8 @@ #include <linux/export.h> #include <linux/jump_label.h> #include <linux/set_memory.h> +#include <linux/task_work.h> +#include <linux/hardirq.h> #include <asm/intel-family.h> #include <asm/processor.h> @@ -1086,23 +1088,6 @@ static void mce_clear_state(unsigned long *toclear) } } -static int do_memory_failure(struct mce *m) -{ - int flags = MF_ACTION_REQUIRED; - int ret; - - pr_err("Uncorrected hardware memory error in user-access at %llx", m->addr); - if (!(m->mcgstatus & MCG_STATUS_RIPV)) - flags |= MF_MUST_KILL; - ret = memory_failure(m->addr >> PAGE_SHIFT, flags); - if (ret) - pr_err("Memory error not recovered"); - else - set_mce_nospec(m->addr >> PAGE_SHIFT); - return ret; -} - - /* * Cases where we avoid rendezvous handler timeout: * 1) If this CPU is offline. @@ -1204,6 +1189,29 @@ static void __mc_scan_banks(struct mce *m, struct mce *final, *m = *final; } +static void kill_me_now(struct callback_head *ch) +{ + force_sig(SIGBUS); +} + +static void kill_me_maybe(struct callback_head *cb) +{ + struct task_struct *p = container_of(cb, struct task_struct, mce_kill_me); + int flags = MF_ACTION_REQUIRED; + + pr_err("Uncorrected hardware memory error in user-access at %llx", p->mce_addr); + if (!(p->mce_status & MCG_STATUS_RIPV)) + flags |= MF_MUST_KILL; + + if (!memory_failure(p->mce_addr >> PAGE_SHIFT, flags)) { + set_mce_nospec(p->mce_addr >> PAGE_SHIFT); + return; + } + + pr_err("Memory error not recovered"); + kill_me_now(cb); +} + /* * The actual machine check handler. This only handles real * exceptions when something got corrupted coming in through int 18. @@ -1222,7 +1230,7 @@ static void __mc_scan_banks(struct mce *m, struct mce *final, * backing the user stack, tracing that reads the user stack will cause * potentially infinite recursion. */ -void notrace do_machine_check(struct pt_regs *regs, long error_code) +void noinstr do_machine_check(struct pt_regs *regs, long error_code) { DECLARE_BITMAP(valid_banks, MAX_NR_BANKS); DECLARE_BITMAP(toclear, MAX_NR_BANKS); @@ -1259,7 +1267,7 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code) if (__mc_check_crashing_cpu(cpu)) return; - ist_enter(regs); + nmi_enter(); this_cpu_inc(mce_exception_count); @@ -1352,23 +1360,24 @@ void notrace do_machine_check(struct pt_regs *regs, long error_code) /* Fault was in user mode and we need to take some action */ if ((m.cs & 3) == 3) { - ist_begin_non_atomic(regs); - local_irq_enable(); - - if (kill_it || do_memory_failure(&m)) - force_sig(SIGBUS); - local_irq_disable(); - ist_end_non_atomic(); + /* If this triggers there is no way to recover. Die hard. */ + BUG_ON(!on_thread_stack() || !user_mode(regs)); + + current->mce_addr = m.addr; + current->mce_status = m.mcgstatus; + current->mce_kill_me.func = kill_me_maybe; + if (kill_it) + current->mce_kill_me.func = kill_me_now; + task_work_add(current, ¤t->mce_kill_me, true); } else { if (!fixup_exception(regs, X86_TRAP_MC, error_code, 0)) mce_panic("Failed kernel mode recovery", &m, msg); } out_ist: - ist_exit(regs); + nmi_exit(); } EXPORT_SYMBOL_GPL(do_machine_check); -NOKPROBE_SYMBOL(do_machine_check); #ifndef CONFIG_MEMORY_FAILURE int memory_failure(unsigned long pfn, int flags) diff --git a/arch/x86/kernel/cpu/mce/p5.c b/arch/x86/kernel/cpu/mce/p5.c index 4ae6df556526..5ee94aa1b766 100644 --- a/arch/x86/kernel/cpu/mce/p5.c +++ b/arch/x86/kernel/cpu/mce/p5.c @@ -7,6 +7,7 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/smp.h> +#include <linux/hardirq.h> #include <asm/processor.h> #include <asm/traps.h> @@ -24,7 +25,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) { u32 loaddr, hi, lotype; - ist_enter(regs); + nmi_enter(); rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); @@ -39,7 +40,7 @@ static void pentium_machine_check(struct pt_regs *regs, long error_code) add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - ist_exit(regs); + nmi_exit(); } /* Set up machine check reporting for processors with Intel style MCE: */ diff --git a/arch/x86/kernel/cpu/mce/winchip.c b/arch/x86/kernel/cpu/mce/winchip.c index a30ea13cccc2..b3938c195365 100644 --- a/arch/x86/kernel/cpu/mce/winchip.c +++ b/arch/x86/kernel/cpu/mce/winchip.c @@ -6,6 +6,7 @@ #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/types.h> +#include <linux/hardirq.h> #include <asm/processor.h> #include <asm/traps.h> @@ -18,12 +19,12 @@ /* Machine check handler for WinChip C6: */ static void winchip_machine_check(struct pt_regs *regs, long error_code) { - ist_enter(regs); + nmi_enter(); pr_emerg("CPU0: Machine Check Exception.\n"); add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE); - ist_exit(regs); + nmi_exit(); } /* Set up machine check reporting on the Winchip C6 series */ diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 7019d4b2df0c..baec68b7e010 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -545,8 +545,7 @@ static int __wait_for_cpus(atomic_t *t, long long timeout) /* * Returns: * < 0 - on error - * 0 - no update done - * 1 - microcode was updated + * 0 - success (no update done or microcode was updated) */ static int __reload_late(void *info) { @@ -573,11 +572,11 @@ static int __reload_late(void *info) else goto wait_for_siblings; - if (err > UCODE_NFOUND) { - pr_warn("Error reloading microcode on CPU %d\n", cpu); + if (err >= UCODE_NFOUND) { + if (err == UCODE_ERROR) + pr_warn("Error reloading microcode on CPU %d\n", cpu); + ret = -1; - } else if (err == UCODE_UPDATED || err == UCODE_OK) { - ret = 1; } wait_for_siblings: @@ -608,7 +607,7 @@ static int microcode_reload_late(void) atomic_set(&late_cpus_out, 0); ret = stop_machine_cpuslocked(__reload_late, NULL, cpu_online_mask); - if (ret > 0) + if (ret == 0) microcode_check(); pr_info("Reload completed, microcode revision: 0x%x\n", boot_cpu_data.microcode); @@ -649,7 +648,7 @@ static ssize_t reload_store(struct device *dev, put: put_online_cpus(); - if (ret >= 0) + if (ret == 0) ret = size; return ret; diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 9556930cd8c1..a5ee607a3b89 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -63,6 +63,10 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) case 15: return msr - MSR_P4_BPU_PERFCTR0; } + fallthrough; + case X86_VENDOR_ZHAOXIN: + case X86_VENDOR_CENTAUR: + return msr - MSR_ARCH_PERFMON_PERFCTR0; } return 0; } @@ -92,6 +96,10 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) case 15: return msr - MSR_P4_BSU_ESCR0; } + fallthrough; + case X86_VENDOR_ZHAOXIN: + case X86_VENDOR_CENTAUR: + return msr - MSR_ARCH_PERFMON_EVENTSEL0; } return 0; diff --git a/arch/x86/kernel/cpu/resctrl/core.c b/arch/x86/kernel/cpu/resctrl/core.c index d8cc5223b7ce..12f967c6b603 100644 --- a/arch/x86/kernel/cpu/resctrl/core.c +++ b/arch/x86/kernel/cpu/resctrl/core.c @@ -22,7 +22,7 @@ #include <linux/cpuhotplug.h> #include <asm/intel-family.h> -#include <asm/resctrl_sched.h> +#include <asm/resctrl.h> #include "internal.h" /* Mutex to protect rdtgroup access. */ @@ -958,6 +958,36 @@ static __init void rdt_init_res_defs(void) static enum cpuhp_state rdt_online; +/* Runs once on the BSP during boot. */ +void resctrl_cpu_detect(struct cpuinfo_x86 *c) +{ + if (!cpu_has(c, X86_FEATURE_CQM_LLC)) { + c->x86_cache_max_rmid = -1; + c->x86_cache_occ_scale = -1; + c->x86_cache_mbm_width_offset = -1; + return; + } + + /* will be overridden if occupancy monitoring exists */ + c->x86_cache_max_rmid = cpuid_ebx(0xf); + + if (cpu_has(c, X86_FEATURE_CQM_OCCUP_LLC) || + cpu_has(c, X86_FEATURE_CQM_MBM_TOTAL) || + cpu_has(c, X86_FEATURE_CQM_MBM_LOCAL)) { + u32 eax, ebx, ecx, edx; + + /* QoS sub-leaf, EAX=0Fh, ECX=1 */ + cpuid_count(0xf, 1, &eax, &ebx, &ecx, &edx); + + c->x86_cache_max_rmid = ecx; + c->x86_cache_occ_scale = ebx; + if (c->x86_vendor == X86_VENDOR_INTEL) + c->x86_cache_mbm_width_offset = eax & 0xff; + else + c->x86_cache_mbm_width_offset = -1; + } +} + static int __init resctrl_late_init(void) { struct rdt_resource *r; diff --git a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c index 055c8613b531..934c8fb8a64a 100644 --- a/arch/x86/kernel/cpu/resctrl/ctrlmondata.c +++ b/arch/x86/kernel/cpu/resctrl/ctrlmondata.c @@ -495,14 +495,16 @@ int rdtgroup_schemata_show(struct kernfs_open_file *of, return ret; } -void mon_event_read(struct rmid_read *rr, struct rdt_domain *d, - struct rdtgroup *rdtgrp, int evtid, int first) +void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, + struct rdt_domain *d, struct rdtgroup *rdtgrp, + int evtid, int first) { /* * setup the parameters to send to the IPI to read the data. */ rr->rgrp = rdtgrp; rr->evtid = evtid; + rr->r = r; rr->d = d; rr->val = 0; rr->first = first; @@ -539,7 +541,7 @@ int rdtgroup_mondata_show(struct seq_file *m, void *arg) goto out; } - mon_event_read(&rr, d, rdtgrp, evtid, false); + mon_event_read(&rr, r, d, rdtgrp, evtid, false); if (rr.val & RMID_VAL_ERROR) seq_puts(m, "Error\n"); diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h index 3dd13f3a8b23..f20a47d120b1 100644 --- a/arch/x86/kernel/cpu/resctrl/internal.h +++ b/arch/x86/kernel/cpu/resctrl/internal.h @@ -31,7 +31,7 @@ #define CQM_LIMBOCHECK_INTERVAL 1000 -#define MBM_CNTR_WIDTH 24 +#define MBM_CNTR_WIDTH_BASE 24 #define MBM_OVERFLOW_INTERVAL 1000 #define MAX_MBA_BW 100u #define MBA_IS_LINEAR 0x4 @@ -40,6 +40,12 @@ #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) +/* + * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for + * data to be returned. The counter width is discovered from the hardware + * as an offset from MBM_CNTR_WIDTH_BASE. + */ +#define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE) struct rdt_fs_context { @@ -87,6 +93,7 @@ union mon_data_bits { struct rmid_read { struct rdtgroup *rgrp; + struct rdt_resource *r; struct rdt_domain *d; int evtid; bool first; @@ -460,6 +467,7 @@ struct rdt_resource { struct list_head evt_list; int num_rmid; unsigned int mon_scale; + unsigned int mbm_width; unsigned long fflags; }; @@ -587,8 +595,9 @@ void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, unsigned int dom_id); void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r, struct rdt_domain *d); -void mon_event_read(struct rmid_read *rr, struct rdt_domain *d, - struct rdtgroup *rdtgrp, int evtid, int first); +void mon_event_read(struct rmid_read *rr, struct rdt_resource *r, + struct rdt_domain *d, struct rdtgroup *rdtgrp, + int evtid, int first); void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms); void mbm_handle_overflow(struct work_struct *work); diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c index 773124b0e18a..837d7d012b7b 100644 --- a/arch/x86/kernel/cpu/resctrl/monitor.c +++ b/arch/x86/kernel/cpu/resctrl/monitor.c @@ -214,9 +214,9 @@ void free_rmid(u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } -static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr) +static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) { - u64 shift = 64 - MBM_CNTR_WIDTH, chunks; + u64 shift = 64 - width, chunks; chunks = (cur_msr << shift) - (prev_msr << shift); return chunks >>= shift; @@ -256,7 +256,7 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) return 0; } - chunks = mbm_overflow_count(m->prev_msr, tval); + chunks = mbm_overflow_count(m->prev_msr, tval, rr->r->mbm_width); m->chunks += chunks; m->prev_msr = tval; @@ -278,7 +278,7 @@ static void mbm_bw_count(u32 rmid, struct rmid_read *rr) if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) return; - chunks = mbm_overflow_count(m->prev_bw_msr, tval); + chunks = mbm_overflow_count(m->prev_bw_msr, tval, rr->r->mbm_width); m->chunks_bw += chunks; m->chunks = m->chunks_bw; cur_bw = (chunks * r->mon_scale) >> 20; @@ -433,11 +433,12 @@ static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) } } -static void mbm_update(struct rdt_domain *d, int rmid) +static void mbm_update(struct rdt_resource *r, struct rdt_domain *d, int rmid) { struct rmid_read rr; rr.first = false; + rr.r = r; rr.d = d; /* @@ -510,6 +511,7 @@ void mbm_handle_overflow(struct work_struct *work) struct rdtgroup *prgrp, *crgrp; int cpu = smp_processor_id(); struct list_head *head; + struct rdt_resource *r; struct rdt_domain *d; mutex_lock(&rdtgroup_mutex); @@ -517,16 +519,18 @@ void mbm_handle_overflow(struct work_struct *work) if (!static_branch_likely(&rdt_mon_enable_key)) goto out_unlock; - d = get_domain_from_cpu(cpu, &rdt_resources_all[RDT_RESOURCE_L3]); + r = &rdt_resources_all[RDT_RESOURCE_L3]; + + d = get_domain_from_cpu(cpu, r); if (!d) goto out_unlock; list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) { - mbm_update(d, prgrp->mon.rmid); + mbm_update(r, d, prgrp->mon.rmid); head = &prgrp->mon.crdtgrp_list; list_for_each_entry(crgrp, head, mon.crdtgrp_list) - mbm_update(d, crgrp->mon.rmid); + mbm_update(r, d, crgrp->mon.rmid); if (is_mba_sc(NULL)) update_mba_bw(prgrp, d); @@ -614,11 +618,18 @@ static void l3_mon_evt_init(struct rdt_resource *r) int rdt_get_mon_l3_config(struct rdt_resource *r) { + unsigned int mbm_offset = boot_cpu_data.x86_cache_mbm_width_offset; unsigned int cl_size = boot_cpu_data.x86_cache_size; int ret; r->mon_scale = boot_cpu_data.x86_cache_occ_scale; r->num_rmid = boot_cpu_data.x86_cache_max_rmid + 1; + r->mbm_width = MBM_CNTR_WIDTH_BASE; + + if (mbm_offset > 0 && mbm_offset <= MBM_CNTR_WIDTH_OFFSET_MAX) + r->mbm_width += mbm_offset; + else if (mbm_offset > MBM_CNTR_WIDTH_OFFSET_MAX) + pr_warn("Ignoring impossible MBM counter offset\n"); /* * A reasonable upper limit on the max threshold is the number diff --git a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c index d7623e1b927d..4bd28b388a1a 100644 --- a/arch/x86/kernel/cpu/resctrl/pseudo_lock.c +++ b/arch/x86/kernel/cpu/resctrl/pseudo_lock.c @@ -24,7 +24,7 @@ #include <asm/cacheflush.h> #include <asm/intel-family.h> -#include <asm/resctrl_sched.h> +#include <asm/resctrl.h> #include <asm/perf_event.h> #include "../../events/perf_event.h" /* For X86_CONFIG() */ diff --git a/arch/x86/kernel/cpu/resctrl/rdtgroup.c b/arch/x86/kernel/cpu/resctrl/rdtgroup.c index 5a359d9fcc05..d7cb5ab0d1f0 100644 --- a/arch/x86/kernel/cpu/resctrl/rdtgroup.c +++ b/arch/x86/kernel/cpu/resctrl/rdtgroup.c @@ -29,7 +29,7 @@ #include <uapi/linux/magic.h> -#include <asm/resctrl_sched.h> +#include <asm/resctrl.h> #include "internal.h" DEFINE_STATIC_KEY_FALSE(rdt_enable_key); @@ -2472,7 +2472,7 @@ static int mkdir_mondata_subdir(struct kernfs_node *parent_kn, goto out_destroy; if (is_mbm_event(mevt->evtid)) - mon_event_read(&rr, d, prgrp, mevt->evtid, true); + mon_event_read(&rr, r, d, prgrp, mevt->evtid, true); } kernfs_activate(kn); return 0; diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index 8e3a8fedfa4d..722fd712e1cf 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -87,7 +87,6 @@ static bool in_softirq_stack(unsigned long *stack, struct stack_info *info) static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info) { -#ifdef CONFIG_DOUBLEFAULT struct cpu_entry_area *cea = get_cpu_entry_area(raw_smp_processor_id()); struct doublefault_stack *ss = &cea->doublefault_stack; @@ -103,9 +102,6 @@ static bool in_doublefault_stack(unsigned long *stack, struct stack_info *info) info->next_sp = (unsigned long *)this_cpu_read(cpu_tss_rw.x86_tss.sp); return true; -#else - return false; -#endif } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index c5399e80c59c..4d13c57f370a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -910,14 +910,6 @@ static int __init parse_memmap_one(char *p) return -EINVAL; if (!strncmp(p, "exactmap", 8)) { -#ifdef CONFIG_CRASH_DUMP - /* - * If we are doing a crash dump, we still need to know - * the real memory size before the original memory map is - * reset. - */ - saved_max_pfn = e820__end_of_ram_pfn(); -#endif e820_table->nr_entries = 0; userdef = 1; return 0; diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c index 9b33904251a9..93fbdff2974f 100644 --- a/arch/x86/kernel/early_printk.c +++ b/arch/x86/kernel/early_printk.c @@ -15,12 +15,9 @@ #include <xen/hvc-console.h> #include <asm/pci-direct.h> #include <asm/fixmap.h> -#include <asm/intel-mid.h> #include <asm/pgtable.h> #include <linux/usb/ehci_def.h> #include <linux/usb/xhci-dbgp.h> -#include <linux/efi.h> -#include <asm/efi.h> #include <asm/pci_x86.h> /* Simple VGA output */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 12c70840980e..06c818967bb6 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -291,15 +291,13 @@ void fpu__drop(struct fpu *fpu) } /* - * Clear FPU registers by setting them up from - * the init fpstate: + * Clear FPU registers by setting them up from the init fpstate. + * Caller must do fpregs_[un]lock() around it. */ -static inline void copy_init_fpstate_to_fpregs(void) +static inline void copy_init_fpstate_to_fpregs(u64 features_mask) { - fpregs_lock(); - if (use_xsave()) - copy_kernel_to_xregs(&init_fpstate.xsave, -1); + copy_kernel_to_xregs(&init_fpstate.xsave, features_mask); else if (static_cpu_has(X86_FEATURE_FXSR)) copy_kernel_to_fxregs(&init_fpstate.fxsave); else @@ -307,9 +305,6 @@ static inline void copy_init_fpstate_to_fpregs(void) if (boot_cpu_has(X86_FEATURE_OSPKE)) copy_init_pkru_to_fpregs(); - - fpregs_mark_activate(); - fpregs_unlock(); } /* @@ -318,18 +313,40 @@ static inline void copy_init_fpstate_to_fpregs(void) * Called by sys_execve(), by the signal handler code and by various * error paths. */ -void fpu__clear(struct fpu *fpu) +static void fpu__clear(struct fpu *fpu, bool user_only) { - WARN_ON_FPU(fpu != ¤t->thread.fpu); /* Almost certainly an anomaly */ + WARN_ON_FPU(fpu != ¤t->thread.fpu); - fpu__drop(fpu); + if (!static_cpu_has(X86_FEATURE_FPU)) { + fpu__drop(fpu); + fpu__initialize(fpu); + return; + } - /* - * Make sure fpstate is cleared and initialized. - */ - fpu__initialize(fpu); - if (static_cpu_has(X86_FEATURE_FPU)) - copy_init_fpstate_to_fpregs(); + fpregs_lock(); + + if (user_only) { + if (!fpregs_state_valid(fpu, smp_processor_id()) && + xfeatures_mask_supervisor()) + copy_kernel_to_xregs(&fpu->state.xsave, + xfeatures_mask_supervisor()); + copy_init_fpstate_to_fpregs(xfeatures_mask_user()); + } else { + copy_init_fpstate_to_fpregs(xfeatures_mask_all); + } + + fpregs_mark_activate(); + fpregs_unlock(); +} + +void fpu__clear_user_states(struct fpu *fpu) +{ + fpu__clear(fpu, true); +} + +void fpu__clear_all(struct fpu *fpu) +{ + fpu__clear(fpu, false); } /* diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 6ce7e0a23268..61ddc3a5e5c2 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -224,7 +224,8 @@ static void __init fpu__init_system_xstate_size_legacy(void) */ u64 __init fpu__get_supported_xfeatures_mask(void) { - return XCNTXT_MASK; + return XFEATURE_MASK_USER_SUPPORTED | + XFEATURE_MASK_SUPERVISOR_SUPPORTED; } /* Legacy code to initialize eager fpu mode. */ diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index d652b939ccfb..bd1d0649f8ce 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -139,7 +139,7 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset, } else { ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1); if (!ret) - ret = validate_xstate_header(&xsave->header); + ret = validate_user_xstate_header(&xsave->header); } /* diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 400a05e1c1c5..9393a445d73c 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -211,9 +211,9 @@ retry: } static inline void -sanitize_restored_xstate(union fpregs_state *state, - struct user_i387_ia32_struct *ia32_env, - u64 xfeatures, int fx_only) +sanitize_restored_user_xstate(union fpregs_state *state, + struct user_i387_ia32_struct *ia32_env, + u64 user_xfeatures, int fx_only) { struct xregs_state *xsave = &state->xsave; struct xstate_header *header = &xsave->header; @@ -226,13 +226,22 @@ sanitize_restored_xstate(union fpregs_state *state, */ /* - * Init the state that is not present in the memory - * layout and not enabled by the OS. + * 'user_xfeatures' might have bits clear which are + * set in header->xfeatures. This represents features that + * were in init state prior to a signal delivery, and need + * to be reset back to the init state. Clear any user + * feature bits which are set in the kernel buffer to get + * them back to the init state. + * + * Supervisor state is unchanged by input from userspace. + * Ensure supervisor state bits stay set and supervisor + * state is not modified. */ if (fx_only) header->xfeatures = XFEATURE_MASK_FPSSE; else - header->xfeatures &= xfeatures; + header->xfeatures &= user_xfeatures | + xfeatures_mask_supervisor(); } if (use_fxsr()) { @@ -252,16 +261,24 @@ sanitize_restored_xstate(union fpregs_state *state, */ static int copy_user_to_fpregs_zeroing(void __user *buf, u64 xbv, int fx_only) { + u64 init_bv; + int r; + if (use_xsave()) { if (fx_only) { - u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; - copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); - return copy_user_to_fxregs(buf); + init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE; + + r = copy_user_to_fxregs(buf); + if (!r) + copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); + return r; } else { - u64 init_bv = xfeatures_mask & ~xbv; - if (unlikely(init_bv)) + init_bv = xfeatures_mask_user() & ~xbv; + + r = copy_user_to_xregs(buf, xbv); + if (!r && unlikely(init_bv)) copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); - return copy_user_to_xregs(buf, xbv); + return r; } } else if (use_fxsr()) { return copy_user_to_fxregs(buf); @@ -277,7 +294,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) struct task_struct *tsk = current; struct fpu *fpu = &tsk->thread.fpu; struct user_i387_ia32_struct env; - u64 xfeatures = 0; + u64 user_xfeatures = 0; int fx_only = 0; int ret = 0; @@ -285,7 +302,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) IS_ENABLED(CONFIG_IA32_EMULATION)); if (!buf) { - fpu__clear(fpu); + fpu__clear_user_states(fpu); return 0; } @@ -310,32 +327,14 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) trace_x86_fpu_xstate_check_failed(fpu); } else { state_size = fx_sw_user.xstate_size; - xfeatures = fx_sw_user.xfeatures; + user_xfeatures = fx_sw_user.xfeatures; } } - /* - * The current state of the FPU registers does not matter. By setting - * TIF_NEED_FPU_LOAD unconditionally it is ensured that the our xstate - * is not modified on context switch and that the xstate is considered - * to be loaded again on return to userland (overriding last_cpu avoids - * the optimisation). - */ - set_thread_flag(TIF_NEED_FPU_LOAD); - __fpu_invalidate_fpregs_state(fpu); - if ((unsigned long)buf_fx % 64) fx_only = 1; - /* - * For 32-bit frames with fxstate, copy the fxstate so it can be - * reconstructed later. - */ - if (ia32_fxstate) { - ret = __copy_from_user(&env, buf, sizeof(env)); - if (ret) - goto err_out; - envp = &env; - } else { + + if (!ia32_fxstate) { /* * Attempt to restore the FPU registers directly from user * memory. For that to succeed, the user access cannot cause @@ -345,20 +344,65 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) */ fpregs_lock(); pagefault_disable(); - ret = copy_user_to_fpregs_zeroing(buf_fx, xfeatures, fx_only); + ret = copy_user_to_fpregs_zeroing(buf_fx, user_xfeatures, fx_only); pagefault_enable(); if (!ret) { + + /* + * Restore supervisor states: previous context switch + * etc has done XSAVES and saved the supervisor states + * in the kernel buffer from which they can be restored + * now. + * + * We cannot do a single XRSTORS here - which would + * be nice - because the rest of the FPU registers are + * being restored from a user buffer directly. The + * single XRSTORS happens below, when the user buffer + * has been copied to the kernel one. + */ + if (test_thread_flag(TIF_NEED_FPU_LOAD) && + xfeatures_mask_supervisor()) + copy_kernel_to_xregs(&fpu->state.xsave, + xfeatures_mask_supervisor()); fpregs_mark_activate(); fpregs_unlock(); return 0; } - fpregs_deactivate(fpu); fpregs_unlock(); + } else { + /* + * For 32-bit frames with fxstate, copy the fxstate so it can + * be reconstructed later. + */ + ret = __copy_from_user(&env, buf, sizeof(env)); + if (ret) + goto err_out; + envp = &env; } + /* + * By setting TIF_NEED_FPU_LOAD it is ensured that our xstate is + * not modified on context switch and that the xstate is considered + * to be loaded again on return to userland (overriding last_cpu avoids + * the optimisation). + */ + fpregs_lock(); + + if (!test_thread_flag(TIF_NEED_FPU_LOAD)) { + + /* + * Supervisor states are not modified by user space input. Save + * current supervisor states first and invalidate the FPU regs. + */ + if (xfeatures_mask_supervisor()) + copy_supervisor_to_kernel(&fpu->state.xsave); + set_thread_flag(TIF_NEED_FPU_LOAD); + } + __fpu_invalidate_fpregs_state(fpu); + fpregs_unlock(); if (use_xsave() && !fx_only) { - u64 init_bv = xfeatures_mask & ~xfeatures; + u64 init_bv = xfeatures_mask_user() & ~user_xfeatures; if (using_compacted_format()) { ret = copy_user_to_xstate(&fpu->state.xsave, buf_fx); @@ -366,17 +410,24 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) ret = __copy_from_user(&fpu->state.xsave, buf_fx, state_size); if (!ret && state_size > offsetof(struct xregs_state, header)) - ret = validate_xstate_header(&fpu->state.xsave.header); + ret = validate_user_xstate_header(&fpu->state.xsave.header); } if (ret) goto err_out; - sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); + sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, + fx_only); fpregs_lock(); if (unlikely(init_bv)) copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); - ret = copy_kernel_to_xregs_err(&fpu->state.xsave, xfeatures); + + /* + * Restore previously saved supervisor xstates along with + * copied-in user xstates. + */ + ret = copy_kernel_to_xregs_err(&fpu->state.xsave, + user_xfeatures | xfeatures_mask_supervisor()); } else if (use_fxsr()) { ret = __copy_from_user(&fpu->state.fxsave, buf_fx, state_size); @@ -385,11 +436,14 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) goto err_out; } - sanitize_restored_xstate(&fpu->state, envp, xfeatures, fx_only); + sanitize_restored_user_xstate(&fpu->state, envp, user_xfeatures, + fx_only); fpregs_lock(); if (use_xsave()) { - u64 init_bv = xfeatures_mask & ~XFEATURE_MASK_FPSSE; + u64 init_bv; + + init_bv = xfeatures_mask_user() & ~XFEATURE_MASK_FPSSE; copy_kernel_to_xregs(&init_fpstate.xsave, init_bv); } @@ -410,7 +464,7 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) err_out: if (ret) - fpu__clear(fpu); + fpu__clear_user_states(fpu); return ret; } @@ -465,7 +519,7 @@ void fpu__init_prepare_fx_sw_frame(void) fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1; fx_sw_reserved.extended_size = size; - fx_sw_reserved.xfeatures = xfeatures_mask; + fx_sw_reserved.xfeatures = xfeatures_mask_user(); fx_sw_reserved.xstate_size = fpu_user_xstate_size; if (IS_ENABLED(CONFIG_IA32_EMULATION) || diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 6a54e83d5589..bda2e5eaca0e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -54,13 +54,15 @@ static short xsave_cpuid_features[] __initdata = { }; /* - * Mask of xstate features supported by the CPU and the kernel: + * This represents the full set of bits that should ever be set in a kernel + * XSAVE buffer, both supervisor and user xstates. */ -u64 xfeatures_mask __read_mostly; +u64 xfeatures_mask_all __read_mostly; static unsigned int xstate_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_sizes[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; static unsigned int xstate_comp_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; +static unsigned int xstate_supervisor_only_offsets[XFEATURE_MAX] = { [ 0 ... XFEATURE_MAX - 1] = -1}; /* * The XSAVE area of kernel can be in standard or compacted format; @@ -76,7 +78,7 @@ unsigned int fpu_user_xstate_size; */ int cpu_has_xfeatures(u64 xfeatures_needed, const char **feature_name) { - u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask; + u64 xfeatures_missing = xfeatures_needed & ~xfeatures_mask_all; if (unlikely(feature_name)) { long xfeature_idx, max_idx; @@ -150,7 +152,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) * None of the feature bits are in init state. So nothing else * to do for us, as the memory layout is up to date. */ - if ((xfeatures & xfeatures_mask) == xfeatures_mask) + if ((xfeatures & xfeatures_mask_all) == xfeatures_mask_all) return; /* @@ -177,7 +179,7 @@ void fpstate_sanitize_xstate(struct fpu *fpu) * in a special way already: */ feature_bit = 0x2; - xfeatures = (xfeatures_mask & ~xfeatures) >> 2; + xfeatures = (xfeatures_mask_user() & ~xfeatures) >> 2; /* * Update all the remaining memory layouts according to their @@ -205,30 +207,39 @@ void fpstate_sanitize_xstate(struct fpu *fpu) */ void fpu__init_cpu_xstate(void) { - if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask) + u64 unsup_bits; + + if (!boot_cpu_has(X86_FEATURE_XSAVE) || !xfeatures_mask_all) return; /* - * Make it clear that XSAVES supervisor states are not yet - * implemented should anyone expect it to work by changing - * bits in XFEATURE_MASK_* macros and XCR0. + * Unsupported supervisor xstates should not be found in + * the xfeatures mask. */ - WARN_ONCE((xfeatures_mask & XFEATURE_MASK_SUPERVISOR), - "x86/fpu: XSAVES supervisor states are not yet implemented.\n"); + unsup_bits = xfeatures_mask_all & XFEATURE_MASK_SUPERVISOR_UNSUPPORTED; + WARN_ONCE(unsup_bits, "x86/fpu: Found unsupported supervisor xstates: 0x%llx\n", + unsup_bits); - xfeatures_mask &= ~XFEATURE_MASK_SUPERVISOR; + xfeatures_mask_all &= ~XFEATURE_MASK_SUPERVISOR_UNSUPPORTED; cr4_set_bits(X86_CR4_OSXSAVE); - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); + + /* + * XCR_XFEATURE_ENABLED_MASK (aka. XCR0) sets user features + * managed by XSAVE{C, OPT, S} and XRSTOR{S}. Only XSAVE user + * states can be set here. + */ + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user()); + + /* + * MSR_IA32_XSS sets supervisor states managed by XSAVES. + */ + if (boot_cpu_has(X86_FEATURE_XSAVES)) + wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); } -/* - * Note that in the future we will likely need a pair of - * functions here: one for user xstates and the other for - * system xstates. For now, they are the same. - */ -static int xfeature_enabled(enum xfeature xfeature) +static bool xfeature_enabled(enum xfeature xfeature) { - return !!(xfeatures_mask & (1UL << xfeature)); + return xfeatures_mask_all & BIT_ULL(xfeature); } /* @@ -383,6 +394,33 @@ static void __init setup_xstate_comp_offsets(void) } /* + * Setup offsets of a supervisor-state-only XSAVES buffer: + * + * The offsets stored in xstate_comp_offsets[] only work for one specific + * value of the Requested Feature BitMap (RFBM). In cases where a different + * RFBM value is used, a different set of offsets is required. This set of + * offsets is for when RFBM=xfeatures_mask_supervisor(). + */ +static void __init setup_supervisor_only_offsets(void) +{ + unsigned int next_offset; + int i; + + next_offset = FXSAVE_SIZE + XSAVE_HDR_SIZE; + + for (i = FIRST_EXTENDED_XFEATURE; i < XFEATURE_MAX; i++) { + if (!xfeature_enabled(i) || !xfeature_is_supervisor(i)) + continue; + + if (xfeature_is_aligned(i)) + next_offset = ALIGN(next_offset, 64); + + xstate_supervisor_only_offsets[i] = next_offset; + next_offset += xstate_sizes[i]; + } +} + +/* * Print out xstate component offsets and sizes */ static void __init print_xstate_offset_size(void) @@ -415,7 +453,7 @@ static void __init setup_init_fpu_buf(void) if (boot_cpu_has(X86_FEATURE_XSAVES)) init_fpstate.xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | - xfeatures_mask; + xfeatures_mask_all; /* * Init all the features state with header.xfeatures being 0x0 @@ -438,7 +476,7 @@ static int xfeature_uncompacted_offset(int xfeature_nr) * format. Checking a supervisor state's uncompacted offset is * an error. */ - if (XFEATURE_MASK_SUPERVISOR & BIT_ULL(xfeature_nr)) { + if (XFEATURE_MASK_SUPERVISOR_ALL & BIT_ULL(xfeature_nr)) { WARN_ONCE(1, "No fixed offset for xstate %d\n", xfeature_nr); return -1; } @@ -472,10 +510,10 @@ int using_compacted_format(void) } /* Validate an xstate header supplied by userspace (ptrace or sigreturn) */ -int validate_xstate_header(const struct xstate_header *hdr) +int validate_user_xstate_header(const struct xstate_header *hdr) { /* No unknown or supervisor features may be set */ - if (hdr->xfeatures & (~xfeatures_mask | XFEATURE_MASK_SUPERVISOR)) + if (hdr->xfeatures & ~xfeatures_mask_user()) return -EINVAL; /* Userspace must use the uncompacted format */ @@ -610,15 +648,12 @@ static void do_extra_xstate_size_checks(void) /* - * Get total size of enabled xstates in XCR0/xfeatures_mask. + * Get total size of enabled xstates in XCR0 | IA32_XSS. * * Note the SDM's wording here. "sub-function 0" only enumerates * the size of the *user* states. If we use it to size a buffer * that we use 'XSAVES' on, we could potentially overflow the * buffer because 'XSAVES' saves system states too. - * - * Note that we do not currently set any bits on IA32_XSS so - * 'XCR0 | IA32_XSS == XCR0' for now. */ static unsigned int __init get_xsaves_size(void) { @@ -700,7 +735,7 @@ static int __init init_xstate_size(void) */ static void fpu__init_disable_system_xstate(void) { - xfeatures_mask = 0; + xfeatures_mask_all = 0; cr4_clear_bits(X86_CR4_OSXSAVE); setup_clear_cpu_cap(X86_FEATURE_XSAVE); } @@ -735,16 +770,26 @@ void __init fpu__init_system_xstate(void) return; } + /* + * Find user xstates supported by the processor. + */ cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); - xfeatures_mask = eax + ((u64)edx << 32); + xfeatures_mask_all = eax + ((u64)edx << 32); - if ((xfeatures_mask & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { + /* + * Find supervisor xstates supported by the processor. + */ + cpuid_count(XSTATE_CPUID, 1, &eax, &ebx, &ecx, &edx); + xfeatures_mask_all |= ecx + ((u64)edx << 32); + + if ((xfeatures_mask_user() & XFEATURE_MASK_FPSSE) != XFEATURE_MASK_FPSSE) { /* * This indicates that something really unexpected happened * with the enumeration. Disable XSAVE and try to continue * booting without it. This is too early to BUG(). */ - pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", xfeatures_mask); + pr_err("x86/fpu: FP/SSE not present amongst the CPU's xstate features: 0x%llx.\n", + xfeatures_mask_all); goto out_disable; } @@ -753,10 +798,10 @@ void __init fpu__init_system_xstate(void) */ for (i = 0; i < ARRAY_SIZE(xsave_cpuid_features); i++) { if (!boot_cpu_has(xsave_cpuid_features[i])) - xfeatures_mask &= ~BIT(i); + xfeatures_mask_all &= ~BIT_ULL(i); } - xfeatures_mask &= fpu__get_supported_xfeatures_mask(); + xfeatures_mask_all &= fpu__get_supported_xfeatures_mask(); /* Enable xstate instructions to be able to continue with initialization: */ fpu__init_cpu_xstate(); @@ -768,15 +813,16 @@ void __init fpu__init_system_xstate(void) * Update info used for ptrace frames; use standard-format size and no * supervisor xstates: */ - update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask & ~XFEATURE_MASK_SUPERVISOR); + update_regset_xstate_info(fpu_user_xstate_size, xfeatures_mask_user()); fpu__init_prepare_fx_sw_frame(); setup_init_fpu_buf(); setup_xstate_comp_offsets(); + setup_supervisor_only_offsets(); print_xstate_offset_size(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", - xfeatures_mask, + xfeatures_mask_all, fpu_kernel_xstate_size, boot_cpu_has(X86_FEATURE_XSAVES) ? "compacted" : "standard"); return; @@ -795,7 +841,14 @@ void fpu__resume_cpu(void) * Restore XCR0 on xsave capable CPUs: */ if (boot_cpu_has(X86_FEATURE_XSAVE)) - xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask); + xsetbv(XCR_XFEATURE_ENABLED_MASK, xfeatures_mask_user()); + + /* + * Restore IA32_XSS. The same CPUID bit enumerates support + * of XSAVES and MSR_IA32_XSS. + */ + if (boot_cpu_has(X86_FEATURE_XSAVES)) + wrmsrl(MSR_IA32_XSS, xfeatures_mask_supervisor()); } /* @@ -840,10 +893,9 @@ void *get_xsave_addr(struct xregs_state *xsave, int xfeature_nr) /* * We should not ever be requesting features that we - * have not enabled. Remember that xfeatures_mask is - * what we write to the XCR0 register. + * have not enabled. */ - WARN_ONCE(!(xfeatures_mask & BIT_ULL(xfeature_nr)), + WARN_ONCE(!(xfeatures_mask_all & BIT_ULL(xfeature_nr)), "get of unsupported state"); /* * This assumes the last 'xsave*' instruction to @@ -1010,7 +1062,7 @@ int copy_xstate_to_kernel(void *kbuf, struct xregs_state *xsave, unsigned int of */ memset(&header, 0, sizeof(header)); header.xfeatures = xsave->header.xfeatures; - header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; + header.xfeatures &= xfeatures_mask_user(); if (header.xfeatures & XFEATURE_MASK_FP) copy_part(0, off_mxcsr, @@ -1090,7 +1142,7 @@ int copy_xstate_to_user(void __user *ubuf, struct xregs_state *xsave, unsigned i */ memset(&header, 0, sizeof(header)); header.xfeatures = xsave->header.xfeatures; - header.xfeatures &= ~XFEATURE_MASK_SUPERVISOR; + header.xfeatures &= xfeatures_mask_user(); /* * Copy xregs_state->header: @@ -1157,7 +1209,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) memcpy(&hdr, kbuf + offset, size); - if (validate_xstate_header(&hdr)) + if (validate_user_xstate_header(&hdr)) return -EINVAL; for (i = 0; i < XFEATURE_MAX; i++) { @@ -1183,7 +1235,7 @@ int copy_kernel_to_xstate(struct xregs_state *xsave, const void *kbuf) * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures': */ - xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR; + xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL; /* * Add back in the features that came in from userspace: @@ -1211,7 +1263,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) if (__copy_from_user(&hdr, ubuf + offset, size)) return -EFAULT; - if (validate_xstate_header(&hdr)) + if (validate_user_xstate_header(&hdr)) return -EINVAL; for (i = 0; i < XFEATURE_MAX; i++) { @@ -1239,7 +1291,7 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) * The state that came in from userspace was user-state only. * Mask all the user states out of 'xfeatures': */ - xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR; + xsave->header.xfeatures &= XFEATURE_MASK_SUPERVISOR_ALL; /* * Add back in the features that came in from userspace: @@ -1249,6 +1301,61 @@ int copy_user_to_xstate(struct xregs_state *xsave, const void __user *ubuf) return 0; } +/* + * Save only supervisor states to the kernel buffer. This blows away all + * old states, and is intended to be used only in __fpu__restore_sig(), where + * user states are restored from the user buffer. + */ +void copy_supervisor_to_kernel(struct xregs_state *xstate) +{ + struct xstate_header *header; + u64 max_bit, min_bit; + u32 lmask, hmask; + int err, i; + + if (WARN_ON(!boot_cpu_has(X86_FEATURE_XSAVES))) + return; + + if (!xfeatures_mask_supervisor()) + return; + + max_bit = __fls(xfeatures_mask_supervisor()); + min_bit = __ffs(xfeatures_mask_supervisor()); + + lmask = xfeatures_mask_supervisor(); + hmask = xfeatures_mask_supervisor() >> 32; + XSTATE_OP(XSAVES, xstate, lmask, hmask, err); + + /* We should never fault when copying to a kernel buffer: */ + if (WARN_ON_FPU(err)) + return; + + /* + * At this point, the buffer has only supervisor states and must be + * converted back to normal kernel format. + */ + header = &xstate->header; + header->xcomp_bv |= xfeatures_mask_all; + + /* + * This only moves states up in the buffer. Start with + * the last state and move backwards so that states are + * not overwritten until after they are moved. Note: + * memmove() allows overlapping src/dst buffers. + */ + for (i = max_bit; i >= min_bit; i--) { + u8 *xbuf = (u8 *)xstate; + + if (!((header->xfeatures >> i) & 1)) + continue; + + /* Move xfeature 'i' into its normal location */ + memmove(xbuf + xstate_comp_offsets[i], + xbuf + xstate_supervisor_only_offsets[i], + xstate_sizes[i]); + } +} + #ifdef CONFIG_PROC_PID_ARCH_STATUS /* * Report the amount of time elapsed in millisecond since last AVX512 diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index b0e641793be4..c84d28e90a58 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -282,7 +282,8 @@ static inline void tramp_free(void *tramp) { } /* Defined as markers to the end of the ftrace default trampolines */ extern void ftrace_regs_caller_end(void); -extern void ftrace_epilogue(void); +extern void ftrace_regs_caller_ret(void); +extern void ftrace_caller_end(void); extern void ftrace_caller_op_ptr(void); extern void ftrace_regs_caller_op_ptr(void); @@ -334,7 +335,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) call_offset = (unsigned long)ftrace_regs_call; } else { start_offset = (unsigned long)ftrace_caller; - end_offset = (unsigned long)ftrace_epilogue; + end_offset = (unsigned long)ftrace_caller_end; op_offset = (unsigned long)ftrace_caller_op_ptr; call_offset = (unsigned long)ftrace_call; } @@ -366,6 +367,13 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) if (WARN_ON(ret < 0)) goto fail; + if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) { + ip = trampoline + (ftrace_regs_caller_ret - ftrace_regs_caller); + ret = probe_kernel_read(ip, (void *)retq, RET_SIZE); + if (WARN_ON(ret < 0)) + goto fail; + } + /* * The address of the ftrace_ops that is used for this trampoline * is stored at the end of the trampoline. This will be used to @@ -433,7 +441,7 @@ void set_ftrace_ops_ro(void) end_offset = (unsigned long)ftrace_regs_caller_end; } else { start_offset = (unsigned long)ftrace_caller; - end_offset = (unsigned long)ftrace_epilogue; + end_offset = (unsigned long)ftrace_caller_end; } size = end_offset - start_offset; size = size + RET_SIZE + sizeof(void *); diff --git a/arch/x86/kernel/ftrace_32.S b/arch/x86/kernel/ftrace_32.S index e8a9f8370112..e405fe1a8bf4 100644 --- a/arch/x86/kernel/ftrace_32.S +++ b/arch/x86/kernel/ftrace_32.S @@ -189,5 +189,5 @@ return_to_handler: movl %eax, %ecx popl %edx popl %eax - JMP_NOSPEC %ecx + JMP_NOSPEC ecx #endif diff --git a/arch/x86/kernel/ftrace_64.S b/arch/x86/kernel/ftrace_64.S index 369e61faacfe..aa5d28aeb31e 100644 --- a/arch/x86/kernel/ftrace_64.S +++ b/arch/x86/kernel/ftrace_64.S @@ -23,7 +23,7 @@ #endif /* CONFIG_FRAME_POINTER */ /* Size of stack used to save mcount regs in save_mcount_regs */ -#define MCOUNT_REG_SIZE (SS+8 + MCOUNT_FRAME_SIZE) +#define MCOUNT_REG_SIZE (FRAME_SIZE + MCOUNT_FRAME_SIZE) /* * gcc -pg option adds a call to 'mcount' in most functions. @@ -77,7 +77,7 @@ /* * We add enough stack to save all regs. */ - subq $(MCOUNT_REG_SIZE - MCOUNT_FRAME_SIZE), %rsp + subq $(FRAME_SIZE), %rsp movq %rax, RAX(%rsp) movq %rcx, RCX(%rsp) movq %rdx, RDX(%rsp) @@ -157,8 +157,12 @@ SYM_INNER_LABEL(ftrace_call, SYM_L_GLOBAL) * think twice before adding any new code or changing the * layout here. */ -SYM_INNER_LABEL(ftrace_epilogue, SYM_L_GLOBAL) +SYM_INNER_LABEL(ftrace_caller_end, SYM_L_GLOBAL) + jmp ftrace_epilogue +SYM_FUNC_END(ftrace_caller); + +SYM_FUNC_START(ftrace_epilogue) #ifdef CONFIG_FUNCTION_GRAPH_TRACER SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) jmp ftrace_stub @@ -170,14 +174,12 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL) */ SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK) retq -SYM_FUNC_END(ftrace_caller) +SYM_FUNC_END(ftrace_epilogue) SYM_FUNC_START(ftrace_regs_caller) /* Save the current flags before any operations that can change them */ pushfq - UNWIND_HINT_SAVE - /* added 8 bytes to save flags */ save_mcount_regs 8 /* save_mcount_regs fills in first two parameters */ @@ -233,10 +235,13 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) movq ORIG_RAX(%rsp), %rax movq %rax, MCOUNT_REG_SIZE-8(%rsp) - /* If ORIG_RAX is anything but zero, make this a call to that */ + /* + * If ORIG_RAX is anything but zero, make this a call to that. + * See arch_ftrace_set_direct_caller(). + */ movq ORIG_RAX(%rsp), %rax - cmpq $0, %rax - je 1f + testq %rax, %rax + jz 1f /* Swap the flags with orig_rax */ movq MCOUNT_REG_SIZE(%rsp), %rdi @@ -244,20 +249,14 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) movq %rax, MCOUNT_REG_SIZE(%rsp) restore_mcount_regs 8 + /* Restore flags */ + popfq - jmp 2f +SYM_INNER_LABEL(ftrace_regs_caller_ret, SYM_L_GLOBAL); + UNWIND_HINT_RET_OFFSET + jmp ftrace_epilogue 1: restore_mcount_regs - - -2: - /* - * The stack layout is nondetermistic here, depending on which path was - * taken. This confuses objtool and ORC, rightfully so. For now, - * pretend the stack always looks like the non-direct case. - */ - UNWIND_HINT_RESTORE - /* Restore flags */ popfq @@ -268,7 +267,6 @@ SYM_INNER_LABEL(ftrace_regs_call, SYM_L_GLOBAL) * to the return. */ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL) - jmp ftrace_epilogue SYM_FUNC_END(ftrace_regs_caller) @@ -303,7 +301,7 @@ trace: * function tracing is enabled. */ movq ftrace_trace_function, %r8 - CALL_NOSPEC %r8 + CALL_NOSPEC r8 restore_mcount_regs jmp fgraph_trace @@ -340,6 +338,6 @@ SYM_CODE_START(return_to_handler) movq 8(%rsp), %rdx movq (%rsp), %rax addq $24, %rsp - JMP_NOSPEC %rdi + JMP_NOSPEC rdi SYM_CODE_END(return_to_handler) #endif diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 12df3a4abfdd..6b32ab009c19 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -43,7 +43,7 @@ static int map_irq_stack(unsigned int cpu) pages[i] = pfn_to_page(pa >> PAGE_SHIFT); } - va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); + va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, VM_MAP, PAGE_KERNEL); if (!va) return -ENOMEM; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 6efe0410fb72..d6f22a3a1f7d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -35,6 +35,8 @@ #include <asm/tlb.h> #include <asm/cpuidle_haltpoll.h> +DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled); + static int kvmapf = 1; static int __init parse_no_kvmapf(char *arg) @@ -73,7 +75,6 @@ struct kvm_task_sleep_node { struct swait_queue_head wq; u32 token; int cpu; - bool halted; }; static struct kvm_task_sleep_head { @@ -96,77 +97,64 @@ static struct kvm_task_sleep_node *_find_apf_task(struct kvm_task_sleep_head *b, return NULL; } -/* - * @interrupt_kernel: Is this called from a routine which interrupts the kernel - * (other than user space)? - */ -void kvm_async_pf_task_wait(u32 token, int interrupt_kernel) +static bool kvm_async_pf_queue_task(u32 token, struct kvm_task_sleep_node *n) { u32 key = hash_32(token, KVM_TASK_SLEEP_HASHBITS); struct kvm_task_sleep_head *b = &async_pf_sleepers[key]; - struct kvm_task_sleep_node n, *e; - DECLARE_SWAITQUEUE(wait); - - rcu_irq_enter(); + struct kvm_task_sleep_node *e; raw_spin_lock(&b->lock); e = _find_apf_task(b, token); if (e) { /* dummy entry exist -> wake up was delivered ahead of PF */ hlist_del(&e->link); - kfree(e); raw_spin_unlock(&b->lock); - - rcu_irq_exit(); - return; + kfree(e); + return false; } - n.token = token; - n.cpu = smp_processor_id(); - n.halted = is_idle_task(current) || - (IS_ENABLED(CONFIG_PREEMPT_COUNT) - ? preempt_count() > 1 || rcu_preempt_depth() - : interrupt_kernel); - init_swait_queue_head(&n.wq); - hlist_add_head(&n.link, &b->list); + n->token = token; + n->cpu = smp_processor_id(); + init_swait_queue_head(&n->wq); + hlist_add_head(&n->link, &b->list); raw_spin_unlock(&b->lock); + return true; +} + +/* + * kvm_async_pf_task_wait_schedule - Wait for pagefault to be handled + * @token: Token to identify the sleep node entry + * + * Invoked from the async pagefault handling code or from the VM exit page + * fault handler. In both cases RCU is watching. + */ +void kvm_async_pf_task_wait_schedule(u32 token) +{ + struct kvm_task_sleep_node n; + DECLARE_SWAITQUEUE(wait); + + lockdep_assert_irqs_disabled(); + + if (!kvm_async_pf_queue_task(token, &n)) + return; for (;;) { - if (!n.halted) - prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE); + prepare_to_swait_exclusive(&n.wq, &wait, TASK_UNINTERRUPTIBLE); if (hlist_unhashed(&n.link)) break; - rcu_irq_exit(); - - if (!n.halted) { - local_irq_enable(); - schedule(); - local_irq_disable(); - } else { - /* - * We cannot reschedule. So halt. - */ - native_safe_halt(); - local_irq_disable(); - } - - rcu_irq_enter(); + local_irq_enable(); + schedule(); + local_irq_disable(); } - if (!n.halted) - finish_swait(&n.wq, &wait); - - rcu_irq_exit(); - return; + finish_swait(&n.wq, &wait); } -EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); +EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait_schedule); static void apf_task_wake_one(struct kvm_task_sleep_node *n) { hlist_del_init(&n->link); - if (n->halted) - smp_send_reschedule(n->cpu); - else if (swq_has_sleeper(&n->wq)) + if (swq_has_sleeper(&n->wq)) swake_up_one(&n->wq); } @@ -175,12 +163,13 @@ static void apf_task_wake_all(void) int i; for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) { - struct hlist_node *p, *next; struct kvm_task_sleep_head *b = &async_pf_sleepers[i]; + struct kvm_task_sleep_node *n; + struct hlist_node *p, *next; + raw_spin_lock(&b->lock); hlist_for_each_safe(p, next, &b->list) { - struct kvm_task_sleep_node *n = - hlist_entry(p, typeof(*n), link); + n = hlist_entry(p, typeof(*n), link); if (n->cpu == smp_processor_id()) apf_task_wake_one(n); } @@ -221,46 +210,61 @@ again: n->cpu = smp_processor_id(); init_swait_queue_head(&n->wq); hlist_add_head(&n->link, &b->list); - } else + } else { apf_task_wake_one(n); + } raw_spin_unlock(&b->lock); return; } EXPORT_SYMBOL_GPL(kvm_async_pf_task_wake); -u32 kvm_read_and_reset_pf_reason(void) +u32 kvm_read_and_reset_apf_flags(void) { - u32 reason = 0; + u32 flags = 0; if (__this_cpu_read(apf_reason.enabled)) { - reason = __this_cpu_read(apf_reason.reason); - __this_cpu_write(apf_reason.reason, 0); + flags = __this_cpu_read(apf_reason.flags); + __this_cpu_write(apf_reason.flags, 0); } - return reason; + return flags; } -EXPORT_SYMBOL_GPL(kvm_read_and_reset_pf_reason); -NOKPROBE_SYMBOL(kvm_read_and_reset_pf_reason); +EXPORT_SYMBOL_GPL(kvm_read_and_reset_apf_flags); +NOKPROBE_SYMBOL(kvm_read_and_reset_apf_flags); -dotraplinkage void -do_async_page_fault(struct pt_regs *regs, unsigned long error_code, unsigned long address) +bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token) { - switch (kvm_read_and_reset_pf_reason()) { - default: - do_page_fault(regs, error_code, address); - break; + u32 reason = kvm_read_and_reset_apf_flags(); + + switch (reason) { case KVM_PV_REASON_PAGE_NOT_PRESENT: - /* page is swapped out by the host. */ - kvm_async_pf_task_wait((u32)address, !user_mode(regs)); - break; case KVM_PV_REASON_PAGE_READY: + break; + default: + return false; + } + + /* + * If the host managed to inject an async #PF into an interrupt + * disabled region, then die hard as this is not going to end well + * and the host side is seriously broken. + */ + if (unlikely(!(regs->flags & X86_EFLAGS_IF))) + panic("Host injected async #PF in interrupt disabled region\n"); + + if (reason == KVM_PV_REASON_PAGE_NOT_PRESENT) { + if (unlikely(!(user_mode(regs)))) + panic("Host injected async #PF in kernel mode\n"); + /* Page is swapped out by the host. */ + kvm_async_pf_task_wait_schedule(token); + } else { rcu_irq_enter(); - kvm_async_pf_task_wake((u32)address); + kvm_async_pf_task_wake(token); rcu_irq_exit(); - break; } + return true; } -NOKPROBE_SYMBOL(do_async_page_fault); +NOKPROBE_SYMBOL(__kvm_handle_async_pf); static void __init paravirt_ops_setup(void) { @@ -306,11 +310,11 @@ static notrace void kvm_guest_apic_eoi_write(u32 reg, u32 val) static void kvm_guest_cpu_init(void) { if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { - u64 pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); + u64 pa; -#ifdef CONFIG_PREEMPTION - pa |= KVM_ASYNC_PF_SEND_ALWAYS; -#endif + WARN_ON_ONCE(!static_branch_likely(&kvm_async_pf_enabled)); + + pa = slow_virt_to_phys(this_cpu_ptr(&apf_reason)); pa |= KVM_ASYNC_PF_ENABLED; if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF_VMEXIT)) @@ -318,12 +322,12 @@ static void kvm_guest_cpu_init(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, pa); __this_cpu_write(apf_reason.enabled, 1); - printk(KERN_INFO"KVM setup async PF for cpu %d\n", - smp_processor_id()); + pr_info("KVM setup async PF for cpu %d\n", smp_processor_id()); } if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) { unsigned long pa; + /* Size alignment is implied but just to make it explicit. */ BUILD_BUG_ON(__alignof__(kvm_apic_eoi) < 4); __this_cpu_write(kvm_apic_eoi, 0); @@ -344,8 +348,7 @@ static void kvm_pv_disable_apf(void) wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); __this_cpu_write(apf_reason.enabled, 0); - printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", - smp_processor_id()); + pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id()); } static void kvm_pv_guest_cpu_reboot(void *unused) @@ -592,12 +595,6 @@ static int kvm_cpu_down_prepare(unsigned int cpu) } #endif -static void __init kvm_apf_trap_init(void) -{ - update_intr_gate(X86_TRAP_PF, async_page_fault); -} - - static void kvm_flush_tlb_others(const struct cpumask *cpumask, const struct flush_tlb_info *info) { @@ -632,8 +629,6 @@ static void __init kvm_guest_init(void) register_reboot_notifier(&kvm_pv_reboot_nb); for (i = 0; i < KVM_TASK_SLEEP_HASHSIZE; i++) raw_spin_lock_init(&async_pf_sleepers[i].lock); - if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF)) - x86_init.irqs.trap_init = kvm_apf_trap_init; if (kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { has_steal_clock = 1; @@ -649,6 +644,9 @@ static void __init kvm_guest_init(void) if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) apic_set_eoi_write(kvm_guest_apic_eoi_write); + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) + static_branch_enable(&kvm_async_pf_enabled); + #ifdef CONFIG_SMP smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus; smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 6407ea21fa1b..bdcc5146de96 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -25,10 +25,6 @@ #include <linux/atomic.h> #include <linux/sched/clock.h> -#if defined(CONFIG_EDAC) -#include <linux/edac.h> -#endif - #include <asm/cpu_entry_area.h> #include <asm/traps.h> #include <asm/mach_traps.h> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 35638f1c5791..ce6cd220f722 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -191,7 +191,7 @@ void flush_thread(void) flush_ptrace_hw_breakpoint(tsk); memset(tsk->thread.tls_array, 0, sizeof(tsk->thread.tls_array)); - fpu__clear(&tsk->thread.fpu); + fpu__clear_all(&tsk->thread.fpu); } void disable_TSC(void) diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 954b013cc585..538d4e8d6589 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -52,7 +52,7 @@ #include <asm/debugreg.h> #include <asm/switch_to.h> #include <asm/vm86.h> -#include <asm/resctrl_sched.h> +#include <asm/resctrl.h> #include <asm/proto.h> #include "process.h" diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 5ef9d8f25b0e..0c169a5687e1 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -52,7 +52,7 @@ #include <asm/switch_to.h> #include <asm/xen/hypervisor.h> #include <asm/vdso.h> -#include <asm/resctrl_sched.h> +#include <asm/resctrl.h> #include <asm/unistd.h> #include <asm/fsgsbase.h> #ifdef CONFIG_IA32_EMULATION diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4b3fa6cd3106..a3767e74c758 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -237,6 +237,9 @@ static u64 __init get_ramdisk_image(void) ramdisk_image |= (u64)boot_params.ext_ramdisk_image << 32; + if (ramdisk_image == 0) + ramdisk_image = phys_initrd_start; + return ramdisk_image; } static u64 __init get_ramdisk_size(void) @@ -245,6 +248,9 @@ static u64 __init get_ramdisk_size(void) ramdisk_size |= (u64)boot_params.ext_ramdisk_size << 32; + if (ramdisk_size == 0) + ramdisk_size = phys_initrd_size; + return ramdisk_size; } diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index e6d7894ad127..fd945ce78554 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c @@ -287,9 +287,9 @@ void __init setup_per_cpu_areas(void) /* * Sync back kernel address range again. We already did this in * setup_arch(), but percpu data also needs to be available in - * the smpboot asm. We can't reliably pick up percpu mappings - * using vmalloc_fault(), because exception dispatch needs - * percpu data. + * the smpboot asm and arch_sync_kernel_mappings() doesn't sync to + * swapper_pg_dir on 32-bit. The per-cpu mappings need to be available + * there too. * * FIXME: Can the later sync in setup_cpu_entry_areas() replace * this call? diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 83b74fb38c8f..399f97abee02 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -37,6 +37,7 @@ #include <asm/vm86.h> #ifdef CONFIG_X86_64 +#include <linux/compat.h> #include <asm/proto.h> #include <asm/ia32_unistd.h> #endif /* CONFIG_X86_64 */ @@ -511,6 +512,31 @@ Efault: } #endif /* CONFIG_X86_32 */ +#ifdef CONFIG_X86_X32_ABI +static int x32_copy_siginfo_to_user(struct compat_siginfo __user *to, + const struct kernel_siginfo *from) +{ + struct compat_siginfo new; + + copy_siginfo_to_external32(&new, from); + if (from->si_signo == SIGCHLD) { + new._sifields._sigchld_x32._utime = from->si_utime; + new._sifields._sigchld_x32._stime = from->si_stime; + } + if (copy_to_user(to, &new, sizeof(struct compat_siginfo))) + return -EFAULT; + return 0; +} + +int copy_siginfo_to_user32(struct compat_siginfo __user *to, + const struct kernel_siginfo *from) +{ + if (in_x32_syscall()) + return x32_copy_siginfo_to_user(to, from); + return __copy_siginfo_to_user32(to, from); +} +#endif /* CONFIG_X86_X32_ABI */ + static int x32_setup_rt_frame(struct ksignal *ksig, compat_sigset_t *set, struct pt_regs *regs) @@ -543,7 +569,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, user_access_end(); if (ksig->ka.sa.sa_flags & SA_SIGINFO) { - if (__copy_siginfo_to_user32(&frame->info, &ksig->info, true)) + if (x32_copy_siginfo_to_user(&frame->info, &ksig->info)) return -EFAULT; } @@ -732,7 +758,7 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs) /* * Ensure the signal handler starts with the new fpu state. */ - fpu__clear(fpu); + fpu__clear_user_states(fpu); } signal_setup_done(failed, ksig, stepping); } diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2f24c334a938..2467f3dd35d3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1384,12 +1384,12 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) speculative_store_bypass_ht_init(); } -void arch_enable_nonboot_cpus_begin(void) +void arch_thaw_secondary_cpus_begin(void) { set_mtrr_aps_delayed_init(); } -void arch_enable_nonboot_cpus_end(void) +void arch_thaw_secondary_cpus_end(void) { mtrr_aps_init(); } @@ -1857,24 +1857,25 @@ static bool slv_set_max_freq_ratio(u64 *base_freq, u64 *turbo_freq) #include <asm/cpu_device_id.h> #include <asm/intel-family.h> -#define ICPU(model) \ - {X86_VENDOR_INTEL, 6, model, X86_FEATURE_APERFMPERF, 0} +#define X86_MATCH(model) \ + X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, \ + INTEL_FAM6_##model, X86_FEATURE_APERFMPERF, NULL) static const struct x86_cpu_id has_knl_turbo_ratio_limits[] = { - ICPU(INTEL_FAM6_XEON_PHI_KNL), - ICPU(INTEL_FAM6_XEON_PHI_KNM), + X86_MATCH(XEON_PHI_KNL), + X86_MATCH(XEON_PHI_KNM), {} }; static const struct x86_cpu_id has_skx_turbo_ratio_limits[] = { - ICPU(INTEL_FAM6_SKYLAKE_X), + X86_MATCH(SKYLAKE_X), {} }; static const struct x86_cpu_id has_glm_turbo_ratio_limits[] = { - ICPU(INTEL_FAM6_ATOM_GOLDMONT), - ICPU(INTEL_FAM6_ATOM_GOLDMONT_D), - ICPU(INTEL_FAM6_ATOM_GOLDMONT_PLUS), + X86_MATCH(ATOM_GOLDMONT), + X86_MATCH(ATOM_GOLDMONT_D), + X86_MATCH(ATOM_GOLDMONT_PLUS), {} }; diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index b89f6ac6a0c0..b2942b2dbfcf 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -35,8 +35,7 @@ #include "../realmode/rm/wakeup.h" /* Global pointer to shared data; NULL means no measured launch. */ -struct tboot *tboot __read_mostly; -EXPORT_SYMBOL(tboot); +static struct tboot *tboot __read_mostly; /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ #define AP_WAIT_TIMEOUT 1 @@ -46,6 +45,11 @@ EXPORT_SYMBOL(tboot); static u8 tboot_uuid[16] __initdata = TBOOT_UUID; +bool tboot_enabled(void) +{ + return tboot != NULL; +} + void __init tboot_probe(void) { /* Look for valid page-aligned address for shared page. */ diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c index 106e7f87f534..371a6b348e44 100644 --- a/arch/x86/kernel/time.c +++ b/arch/x86/kernel/time.c @@ -103,6 +103,9 @@ static __init void x86_late_time_init(void) */ x86_init.irqs.intr_mode_init(); tsc_init(); + + if (static_cpu_has(X86_FEATURE_WAITPKG)) + use_tpause_delay(); } /* diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d54cffdc7cac..4cc541051994 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -37,10 +37,12 @@ #include <linux/mm.h> #include <linux/smp.h> #include <linux/io.h> +#include <linux/hardirq.h> +#include <linux/atomic.h> + #include <asm/stacktrace.h> #include <asm/processor.h> #include <asm/debugreg.h> -#include <linux/atomic.h> #include <asm/text-patching.h> #include <asm/ftrace.h> #include <asm/traps.h> @@ -82,78 +84,6 @@ static inline void cond_local_irq_disable(struct pt_regs *regs) local_irq_disable(); } -/* - * In IST context, we explicitly disable preemption. This serves two - * purposes: it makes it much less likely that we would accidentally - * schedule in IST context and it will force a warning if we somehow - * manage to schedule by accident. - */ -void ist_enter(struct pt_regs *regs) -{ - if (user_mode(regs)) { - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); - } else { - /* - * We might have interrupted pretty much anything. In - * fact, if we're a machine check, we can even interrupt - * NMI processing. We don't want in_nmi() to return true, - * but we need to notify RCU. - */ - rcu_nmi_enter(); - } - - preempt_disable(); - - /* This code is a bit fragile. Test it. */ - RCU_LOCKDEP_WARN(!rcu_is_watching(), "ist_enter didn't work"); -} -NOKPROBE_SYMBOL(ist_enter); - -void ist_exit(struct pt_regs *regs) -{ - preempt_enable_no_resched(); - - if (!user_mode(regs)) - rcu_nmi_exit(); -} - -/** - * ist_begin_non_atomic() - begin a non-atomic section in an IST exception - * @regs: regs passed to the IST exception handler - * - * IST exception handlers normally cannot schedule. As a special - * exception, if the exception interrupted userspace code (i.e. - * user_mode(regs) would return true) and the exception was not - * a double fault, it can be safe to schedule. ist_begin_non_atomic() - * begins a non-atomic section within an ist_enter()/ist_exit() region. - * Callers are responsible for enabling interrupts themselves inside - * the non-atomic section, and callers must call ist_end_non_atomic() - * before ist_exit(). - */ -void ist_begin_non_atomic(struct pt_regs *regs) -{ - BUG_ON(!user_mode(regs)); - - /* - * Sanity check: we need to be on the normal thread stack. This - * will catch asm bugs and any attempt to use ist_preempt_enable - * from double_fault. - */ - BUG_ON(!on_thread_stack()); - - preempt_enable_no_resched(); -} - -/** - * ist_end_non_atomic() - begin a non-atomic section in an IST exception - * - * Ends a non-atomic section started with ist_begin_non_atomic(). - */ -void ist_end_non_atomic(void) -{ - preempt_disable(); -} - int is_valid_bugaddr(unsigned long addr) { unsigned short ud; @@ -326,7 +256,6 @@ __visible void __noreturn handle_stack_overflow(const char *message, } #endif -#if defined(CONFIG_X86_64) || defined(CONFIG_DOUBLEFAULT) /* * Runs on an IST stack for x86_64 and on a special task stack for x86_32. * @@ -363,7 +292,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign * The net result is that our #GP handler will think that we * entered from usermode with the bad user context. * - * No need for ist_enter here because we don't use RCU. + * No need for nmi_enter() here because we don't use RCU. */ if (((long)regs->sp >> P4D_SHIFT) == ESPFIX_PGD_ENTRY && regs->cs == __KERNEL_CS && @@ -398,7 +327,7 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign } #endif - ist_enter(regs); + nmi_enter(); notify_die(DIE_TRAP, str, regs, error_code, X86_TRAP_DF, SIGSEGV); tsk->thread.error_code = error_code; @@ -450,7 +379,6 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code, unsign die("double fault", regs, error_code); panic("Machine halted."); } -#endif dotraplinkage void do_bounds(struct pt_regs *regs, long error_code) { @@ -592,19 +520,13 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) return; /* - * Unlike any other non-IST entry, we can be called from a kprobe in - * non-CONTEXT_KERNEL kernel mode or even during context tracking - * state changes. Make sure that we wake up RCU even if we're coming - * from kernel code. - * - * This means that we can't schedule even if we came from a - * preemptible kernel context. That's okay. + * Unlike any other non-IST entry, we can be called from pretty much + * any location in the kernel through kprobes -- text_poke() will most + * likely be handled by poke_int3_handler() above. This means this + * handler is effectively NMI-like. */ - if (!user_mode(regs)) { - rcu_nmi_enter(); - preempt_disable(); - } - RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); + if (!user_mode(regs)) + nmi_enter(); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, X86_TRAP_BP, @@ -626,10 +548,8 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) cond_local_irq_disable(regs); exit: - if (!user_mode(regs)) { - preempt_enable_no_resched(); - rcu_nmi_exit(); - } + if (!user_mode(regs)) + nmi_exit(); } NOKPROBE_SYMBOL(do_int3); @@ -733,7 +653,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) unsigned long dr6; int si_code; - ist_enter(regs); + nmi_enter(); get_debugreg(dr6, 6); /* @@ -826,7 +746,7 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_dec(); exit: - ist_exit(regs); + nmi_exit(); } NOKPROBE_SYMBOL(do_debug); @@ -983,7 +903,5 @@ void __init trap_init(void) idt_setup_ist_traps(); - x86_init.irqs.trap_init(); - idt_setup_debugidt_traps(); } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index fdd4c1078632..49d925043171 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -41,6 +41,7 @@ EXPORT_SYMBOL(tsc_khz); * TSC can be unstable due to cpufreq or due to unsynced TSCs */ static int __read_mostly tsc_unstable; +static unsigned int __initdata tsc_early_khz; static DEFINE_STATIC_KEY_FALSE(__use_tsc); @@ -59,6 +60,12 @@ struct cyc2ns { static DEFINE_PER_CPU_ALIGNED(struct cyc2ns, cyc2ns); +static int __init tsc_early_khz_setup(char *buf) +{ + return kstrtouint(buf, 0, &tsc_early_khz); +} +early_param("tsc_early_khz", tsc_early_khz_setup); + __always_inline void cyc2ns_read_begin(struct cyc2ns_data *data) { int seq, idx; @@ -1412,7 +1419,10 @@ static bool __init determine_cpu_tsc_frequencies(bool early) if (early) { cpu_khz = x86_platform.calibrate_cpu(); - tsc_khz = x86_platform.calibrate_tsc(); + if (tsc_early_khz) + tsc_khz = tsc_early_khz; + else + tsc_khz = x86_platform.calibrate_tsc(); } else { /* We should not be here with non-native cpu calibration */ WARN_ON(x86_platform.calibrate_cpu != native_calibrate_cpu); diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 85f1a90c55cd..123f1c1f1788 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -79,7 +79,6 @@ struct x86_init_ops x86_init __initdata = { .irqs = { .pre_vector_init = init_ISA_irqs, .intr_init = native_init_IRQ, - .trap_init = x86_init_noop, .intr_mode_select = apic_intr_mode_select, .intr_mode_init = apic_intr_mode_init }, diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 901cd1fdecd9..253b8e875ccd 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -86,12 +86,10 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) best = kvm_find_cpuid_entry(vcpu, 0xD, 0); if (!best) { vcpu->arch.guest_supported_xcr0 = 0; - vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; } else { vcpu->arch.guest_supported_xcr0 = (best->eax | ((u64)best->edx << 32)) & supported_xcr0; - vcpu->arch.guest_xstate_size = best->ebx = - xstate_required_size(vcpu->arch.xcr0, false); + best->ebx = xstate_required_size(vcpu->arch.xcr0, false); } best = kvm_find_cpuid_entry(vcpu, 0xD, 1); @@ -124,8 +122,9 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) MSR_IA32_MISC_ENABLE_MWAIT); } - /* Update physical-address width */ + /* Note, maxphyaddr must be updated before tdp_level. */ vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu); kvm_mmu_reset_context(vcpu); kvm_pmu_refresh(vcpu); @@ -297,7 +296,7 @@ void kvm_set_cpu_caps(void) F(XMM3) | F(PCLMULQDQ) | 0 /* DTES64, MONITOR */ | 0 /* DS-CPL, VMX, SMX, EST */ | 0 /* TM2 */ | F(SSSE3) | 0 /* CNXT-ID */ | 0 /* Reserved */ | - F(FMA) | F(CX16) | 0 /* xTPR Update, PDCM */ | + F(FMA) | F(CX16) | 0 /* xTPR Update */ | F(PDCM) | F(PCID) | 0 /* Reserved, DCA */ | F(XMM4_1) | F(XMM4_2) | F(X2APIC) | F(MOVBE) | F(POPCNT) | 0 /* Reserved*/ | F(AES) | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX) | @@ -712,7 +711,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) (1 << KVM_FEATURE_ASYNC_PF_VMEXIT) | (1 << KVM_FEATURE_PV_SEND_IPI) | (1 << KVM_FEATURE_POLL_CONTROL) | - (1 << KVM_FEATURE_PV_SCHED_YIELD); + (1 << KVM_FEATURE_PV_SCHED_YIELD) | + (1 << KVM_FEATURE_ASYNC_PF_INT); if (sched_info_on()) entry->eax |= (1 << KVM_FEATURE_STEAL_TIME); @@ -728,6 +728,9 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) cpuid_entry_override(entry, CPUID_8000_0001_EDX); cpuid_entry_override(entry, CPUID_8000_0001_ECX); break; + case 0x80000006: + /* L2 cache and TLB: pass through host info. */ + break; case 0x80000007: /* Advanced power management */ /* invariant TSC is CPUID.80000007H:EDX[8] */ entry->edx &= (1 << 8); diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h index 63a70f6a3df3..05434cd9342f 100644 --- a/arch/x86/kvm/cpuid.h +++ b/arch/x86/kvm/cpuid.h @@ -303,4 +303,9 @@ static __always_inline void kvm_cpu_cap_check_and_set(unsigned int x86_feature) kvm_cpu_cap_set(x86_feature); } +static inline bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa) +{ + return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu)); +} + #endif diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index bddaba9c68dd..de5476f8683e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5798,6 +5798,8 @@ writeback: } ctxt->eip = ctxt->_eip; + if (ctxt->mode != X86EMUL_MODE_PROT64) + ctxt->eip = (u32)ctxt->_eip; done: if (rc == X86EMUL_PROPAGATE_FAULT) { diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 54d4b98b49e1..238b78e069fe 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -21,6 +21,7 @@ #include "x86.h" #include "lapic.h" #include "ioapic.h" +#include "cpuid.h" #include "hyperv.h" #include <linux/cpu.h> @@ -266,6 +267,123 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic, return ret; } +static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu) +{ + struct kvm_cpuid_entry2 *entry; + + entry = kvm_find_cpuid_entry(vcpu, + HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, + 0); + if (!entry) + return false; + + return entry->eax & HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; +} + +static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = vcpu->kvm; + struct kvm_hv *hv = &kvm->arch.hyperv; + + if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL) + hv->hv_syndbg.control.status = + vcpu->run->hyperv.u.syndbg.status; + return 1; +} + +static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr) +{ + struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); + struct kvm_vcpu_hv *hv_vcpu = &vcpu->arch.hyperv; + + hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG; + hv_vcpu->exit.u.syndbg.msr = msr; + hv_vcpu->exit.u.syndbg.control = syndbg->control.control; + hv_vcpu->exit.u.syndbg.send_page = syndbg->control.send_page; + hv_vcpu->exit.u.syndbg.recv_page = syndbg->control.recv_page; + hv_vcpu->exit.u.syndbg.pending_page = syndbg->control.pending_page; + vcpu->arch.complete_userspace_io = + kvm_hv_syndbg_complete_userspace; + + kvm_make_request(KVM_REQ_HV_EXIT, vcpu); +} + +static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) +{ + struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); + + if (!kvm_hv_is_syndbg_enabled(vcpu) && !host) + return 1; + + trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id, + vcpu_to_hv_vcpu(vcpu)->vp_index, msr, data); + switch (msr) { + case HV_X64_MSR_SYNDBG_CONTROL: + syndbg->control.control = data; + if (!host) + syndbg_exit(vcpu, msr); + break; + case HV_X64_MSR_SYNDBG_STATUS: + syndbg->control.status = data; + break; + case HV_X64_MSR_SYNDBG_SEND_BUFFER: + syndbg->control.send_page = data; + break; + case HV_X64_MSR_SYNDBG_RECV_BUFFER: + syndbg->control.recv_page = data; + break; + case HV_X64_MSR_SYNDBG_PENDING_BUFFER: + syndbg->control.pending_page = data; + if (!host) + syndbg_exit(vcpu, msr); + break; + case HV_X64_MSR_SYNDBG_OPTIONS: + syndbg->options = data; + break; + default: + break; + } + + return 0; +} + +static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) +{ + struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); + + if (!kvm_hv_is_syndbg_enabled(vcpu) && !host) + return 1; + + switch (msr) { + case HV_X64_MSR_SYNDBG_CONTROL: + *pdata = syndbg->control.control; + break; + case HV_X64_MSR_SYNDBG_STATUS: + *pdata = syndbg->control.status; + break; + case HV_X64_MSR_SYNDBG_SEND_BUFFER: + *pdata = syndbg->control.send_page; + break; + case HV_X64_MSR_SYNDBG_RECV_BUFFER: + *pdata = syndbg->control.recv_page; + break; + case HV_X64_MSR_SYNDBG_PENDING_BUFFER: + *pdata = syndbg->control.pending_page; + break; + case HV_X64_MSR_SYNDBG_OPTIONS: + *pdata = syndbg->options; + break; + default: + break; + } + + trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id, + vcpu_to_hv_vcpu(vcpu)->vp_index, msr, + *pdata); + + return 0; +} + static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata, bool host) { @@ -800,6 +918,8 @@ static bool kvm_hv_msr_partition_wide(u32 msr) case HV_X64_MSR_REENLIGHTENMENT_CONTROL: case HV_X64_MSR_TSC_EMULATION_CONTROL: case HV_X64_MSR_TSC_EMULATION_STATUS: + case HV_X64_MSR_SYNDBG_OPTIONS: + case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: r = true; break; } @@ -900,7 +1020,7 @@ static int kvm_hv_msr_set_crash_data(struct kvm_vcpu *vcpu, * These two equivalencies are implemented in this function. */ static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock, - HV_REFERENCE_TSC_PAGE *tsc_ref) + struct ms_hyperv_tsc_page *tsc_ref) { u64 max_mul; @@ -941,7 +1061,7 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm, u64 gfn; BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence)); - BUILD_BUG_ON(offsetof(HV_REFERENCE_TSC_PAGE, tsc_sequence) != 0); + BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0); if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)) return; @@ -1061,6 +1181,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, if (!host) return 1; break; + case HV_X64_MSR_SYNDBG_OPTIONS: + case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: + return syndbg_set_msr(vcpu, msr, data, host); default: vcpu_unimpl(vcpu, "Hyper-V unhandled wrmsr: 0x%x data 0x%llx\n", msr, data); @@ -1190,7 +1313,8 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) return 0; } -static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) +static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, + bool host) { u64 data = 0; struct kvm *kvm = vcpu->kvm; @@ -1227,6 +1351,9 @@ static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case HV_X64_MSR_TSC_EMULATION_STATUS: data = hv->hv_tsc_emulation_status; break; + case HV_X64_MSR_SYNDBG_OPTIONS: + case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: + return syndbg_get_msr(vcpu, msr, pdata, host); default: vcpu_unimpl(vcpu, "Hyper-V unhandled rdmsr: 0x%x\n", msr); return 1; @@ -1316,7 +1443,7 @@ int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host) int r; mutex_lock(&vcpu->kvm->arch.hyperv.hv_lock); - r = kvm_hv_get_msr_pw(vcpu, msr, pdata); + r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host); mutex_unlock(&vcpu->kvm->arch.hyperv.hv_lock); return r; } else @@ -1425,8 +1552,7 @@ static u64 kvm_hv_flush_tlb(struct kvm_vcpu *current_vcpu, u64 ingpa, * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't * analyze it here, flush TLB regardless of the specified address space. */ - kvm_make_vcpus_request_mask(kvm, - KVM_REQ_TLB_FLUSH | KVM_REQUEST_NO_WAKEUP, + kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, NULL, vcpu_mask, &hv_vcpu->tlb_flush); ret_success: @@ -1530,7 +1656,7 @@ ret_success: bool kvm_hv_hypercall_enabled(struct kvm *kvm) { - return READ_ONCE(kvm->arch.hyperv.hv_hypercall) & HV_X64_MSR_HYPERCALL_ENABLE; + return READ_ONCE(kvm->arch.hyperv.hv_guest_os_id) != 0; } static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) @@ -1709,6 +1835,34 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) } ret = kvm_hv_send_ipi(vcpu, ingpa, outgpa, true, false); break; + case HVCALL_POST_DEBUG_DATA: + case HVCALL_RETRIEVE_DEBUG_DATA: + if (unlikely(fast)) { + ret = HV_STATUS_INVALID_PARAMETER; + break; + } + fallthrough; + case HVCALL_RESET_DEBUG_SESSION: { + struct kvm_hv_syndbg *syndbg = vcpu_to_hv_syndbg(vcpu); + + if (!kvm_hv_is_syndbg_enabled(vcpu)) { + ret = HV_STATUS_INVALID_HYPERCALL_CODE; + break; + } + + if (!(syndbg->options & HV_X64_SYNDBG_OPTION_USE_HCALLS)) { + ret = HV_STATUS_OPERATION_DENIED; + break; + } + vcpu->run->exit_reason = KVM_EXIT_HYPERV; + vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL; + vcpu->run->hyperv.u.hcall.input = param; + vcpu->run->hyperv.u.hcall.params[0] = ingpa; + vcpu->run->hyperv.u.hcall.params[1] = outgpa; + vcpu->arch.complete_userspace_io = + kvm_hv_hypercall_complete_userspace; + return 0; + } default: ret = HV_STATUS_INVALID_HYPERCALL_CODE; break; @@ -1796,12 +1950,15 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, { .function = HYPERV_CPUID_FEATURES }, { .function = HYPERV_CPUID_ENLIGHTMENT_INFO }, { .function = HYPERV_CPUID_IMPLEMENT_LIMITS }, + { .function = HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS }, + { .function = HYPERV_CPUID_SYNDBG_INTERFACE }, + { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES }, { .function = HYPERV_CPUID_NESTED_FEATURES }, }; int i, nent = ARRAY_SIZE(cpuid_entries); - if (kvm_x86_ops.nested_get_evmcs_version) - evmcs_ver = kvm_x86_ops.nested_get_evmcs_version(vcpu); + if (kvm_x86_ops.nested_ops->get_evmcs_version) + evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu); /* Skip NESTED_FEATURES if eVMCS is not supported */ if (!evmcs_ver) @@ -1821,7 +1978,7 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS: memcpy(signature, "Linux KVM Hv", 12); - ent->eax = HYPERV_CPUID_NESTED_FEATURES; + ent->eax = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES; ent->ebx = signature[0]; ent->ecx = signature[1]; ent->edx = signature[2]; @@ -1860,6 +2017,10 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE; ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE; + ent->ebx |= HV_DEBUGGING; + ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE; + ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; + /* * Direct Synthetic timers only make sense with in-kernel * LAPIC @@ -1903,6 +2064,24 @@ int kvm_vcpu_ioctl_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid, break; + case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS: + memcpy(signature, "Linux KVM Hv", 12); + + ent->eax = 0; + ent->ebx = signature[0]; + ent->ecx = signature[1]; + ent->edx = signature[2]; + break; + + case HYPERV_CPUID_SYNDBG_INTERFACE: + memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12); + ent->eax = signature[0]; + break; + + case HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES: + ent->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; + break; + default: break; } diff --git a/arch/x86/kvm/hyperv.h b/arch/x86/kvm/hyperv.h index 757cb578101c..e68c6c2e9649 100644 --- a/arch/x86/kvm/hyperv.h +++ b/arch/x86/kvm/hyperv.h @@ -23,6 +23,33 @@ #include <linux/kvm_host.h> +/* + * The #defines related to the synthetic debugger are required by KDNet, but + * they are not documented in the Hyper-V TLFS because the synthetic debugger + * functionality has been deprecated and is subject to removal in future + * versions of Windows. + */ +#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080 +#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081 +#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082 + +/* + * Hyper-V synthetic debugger platform capabilities + * These are HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX bits. + */ +#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING BIT(1) + +/* Hyper-V Synthetic debug options MSR */ +#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1 +#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2 +#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3 +#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4 +#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5 +#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF + +/* Hyper-V HV_X64_MSR_SYNDBG_OPTIONS bits */ +#define HV_X64_SYNDBG_OPTION_USE_HCALLS BIT(2) + static inline struct kvm_vcpu_hv *vcpu_to_hv_vcpu(struct kvm_vcpu *vcpu) { return &vcpu->arch.hyperv; @@ -46,6 +73,11 @@ static inline struct kvm_vcpu *synic_to_vcpu(struct kvm_vcpu_hv_synic *synic) return hv_vcpu_to_vcpu(container_of(synic, struct kvm_vcpu_hv, synic)); } +static inline struct kvm_hv_syndbg *vcpu_to_hv_syndbg(struct kvm_vcpu *vcpu) +{ + return &vcpu->kvm->arch.hyperv.hv_syndbg; +} + int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host); int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host); diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index 2fb2e3c80724..660401700075 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -3,8 +3,8 @@ #define __KVM_IO_APIC_H #include <linux/kvm_host.h> - #include <kvm/iodev.h> +#include "irq.h" struct kvm; struct kvm_vcpu; @@ -108,11 +108,7 @@ do { \ static inline int ioapic_in_kernel(struct kvm *kvm) { - int mode = kvm->arch.irqchip_mode; - - /* Matches smp_wmb() when setting irqchip_mode */ - smp_rmb(); - return mode == KVM_IRQCHIP_KERNEL; + return irqchip_kernel(kvm); } void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c index e330e7d125f7..99d118ffc67d 100644 --- a/arch/x86/kvm/irq.c +++ b/arch/x86/kvm/irq.c @@ -83,6 +83,7 @@ int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v) return kvm_apic_has_interrupt(v) != -1; /* LAPIC */ } +EXPORT_SYMBOL_GPL(kvm_cpu_has_injectable_intr); /* * check if there is pending interrupt without @@ -159,6 +160,8 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu) { __kvm_migrate_apic_timer(vcpu); __kvm_migrate_pit_timer(vcpu); + if (kvm_x86_ops.migrate_timers) + kvm_x86_ops.migrate_timers(vcpu); } bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args) diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index f173ab6b407e..9b64abf9b3f1 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -16,7 +16,6 @@ #include <linux/spinlock.h> #include <kvm/iodev.h> -#include "ioapic.h" #include "lapic.h" #define PIC_NUM_PINS 16 @@ -66,15 +65,6 @@ void kvm_pic_destroy(struct kvm *kvm); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); -static inline int pic_in_kernel(struct kvm *kvm) -{ - int mode = kvm->arch.irqchip_mode; - - /* Matches smp_wmb() when setting irqchip_mode */ - smp_rmb(); - return mode == KVM_IRQCHIP_KERNEL; -} - static inline int irqchip_split(struct kvm *kvm) { int mode = kvm->arch.irqchip_mode; @@ -93,6 +83,11 @@ static inline int irqchip_kernel(struct kvm *kvm) return mode == KVM_IRQCHIP_KERNEL; } +static inline int pic_in_kernel(struct kvm *kvm) +{ + return irqchip_kernel(kvm); +} + static inline int irqchip_in_kernel(struct kvm *kvm) { int mode = kvm->arch.irqchip_mode; diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 62558b9bdda7..ff2d0e9ca3bc 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -116,8 +116,9 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; - if (tmask & vcpu->arch.cr0_guest_owned_bits) - kvm_x86_ops.decache_cr0_guest_bits(vcpu); + if ((tmask & vcpu->arch.cr0_guest_owned_bits) && + !kvm_register_is_available(vcpu, VCPU_EXREG_CR0)) + kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_CR0); return vcpu->arch.cr0 & mask; } @@ -129,8 +130,9 @@ static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu) static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS; - if (tmask & vcpu->arch.cr4_guest_owned_bits) - kvm_x86_ops.decache_cr4_guest_bits(vcpu); + if ((tmask & vcpu->arch.cr4_guest_owned_bits) && + !kvm_register_is_available(vcpu, VCPU_EXREG_CR4)) + kvm_x86_ops.cache_reg(vcpu, VCPU_EXREG_CR4); return vcpu->arch.cr4 & mask; } diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9af25c97612a..34a7e0533dad 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -36,6 +36,7 @@ #include <linux/jump_label.h> #include "kvm_cache_regs.h" #include "irq.h" +#include "ioapic.h" #include "trace.h" #include "x86.h" #include "cpuid.h" @@ -110,11 +111,18 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) return apic->vcpu->vcpu_id; } -bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) +static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu) { return pi_inject_timer && kvm_vcpu_apicv_active(vcpu); } -EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt); + +bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu) +{ + return kvm_x86_ops.set_hv_timer + && !(kvm_mwait_in_guest(vcpu->kvm) || + kvm_can_post_timer_interrupt(vcpu)); +} +EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer); static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu) { @@ -1593,7 +1601,7 @@ static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic) } } -static void apic_timer_expired(struct kvm_lapic *apic) +static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn) { struct kvm_vcpu *vcpu = apic->vcpu; struct kvm_timer *ktimer = &apic->lapic_timer; @@ -1604,6 +1612,12 @@ static void apic_timer_expired(struct kvm_lapic *apic) if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use) ktimer->expired_tscdeadline = ktimer->tscdeadline; + if (!from_timer_fn && vcpu->arch.apicv_active) { + WARN_ON(kvm_get_running_vcpu() != vcpu); + kvm_apic_inject_pending_timer_irqs(apic); + return; + } + if (kvm_use_posted_timer_interrupt(apic->vcpu)) { if (apic->lapic_timer.timer_advance_ns) __kvm_wait_lapic_expire(vcpu); @@ -1643,18 +1657,23 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD); } else - apic_timer_expired(apic); + apic_timer_expired(apic, false); local_irq_restore(flags); } +static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict) +{ + return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count; +} + static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) { ktime_t now, remaining; u64 ns_remaining_old, ns_remaining_new; - apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) - * APIC_BUS_CYCLE_NS * apic->divide_count; + apic->lapic_timer.period = + tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); limit_periodic_timer_frequency(apic); now = ktime_get(); @@ -1672,14 +1691,15 @@ static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_diviso apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new); } -static bool set_target_expiration(struct kvm_lapic *apic) +static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg) { ktime_t now; u64 tscl = rdtsc(); + s64 deadline; now = ktime_get(); - apic->lapic_timer.period = (u64)kvm_lapic_get_reg(apic, APIC_TMICT) - * APIC_BUS_CYCLE_NS * apic->divide_count; + apic->lapic_timer.period = + tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT)); if (!apic->lapic_timer.period) { apic->lapic_timer.tscdeadline = 0; @@ -1687,10 +1707,32 @@ static bool set_target_expiration(struct kvm_lapic *apic) } limit_periodic_timer_frequency(apic); + deadline = apic->lapic_timer.period; + + if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) { + if (unlikely(count_reg != APIC_TMICT)) { + deadline = tmict_to_ns(apic, + kvm_lapic_get_reg(apic, count_reg)); + if (unlikely(deadline <= 0)) + deadline = apic->lapic_timer.period; + else if (unlikely(deadline > apic->lapic_timer.period)) { + pr_info_ratelimited( + "kvm: vcpu %i: requested lapic timer restore with " + "starting count register %#x=%u (%lld ns) > initial count (%lld ns). " + "Using initial count to start timer.\n", + apic->vcpu->vcpu_id, + count_reg, + kvm_lapic_get_reg(apic, count_reg), + deadline, apic->lapic_timer.period); + kvm_lapic_set_reg(apic, count_reg, 0); + deadline = apic->lapic_timer.period; + } + } + } apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + - nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); - apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period); + nsec_to_cycles(apic->vcpu, deadline); + apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline); return true; } @@ -1723,7 +1765,7 @@ static void start_sw_period(struct kvm_lapic *apic) if (ktime_after(ktime_get(), apic->lapic_timer.target_expiration)) { - apic_timer_expired(apic); + apic_timer_expired(apic, false); if (apic_lvtt_oneshot(apic)) return; @@ -1760,7 +1802,7 @@ static bool start_hv_timer(struct kvm_lapic *apic) bool expired; WARN_ON(preemptible()); - if (!kvm_x86_ops.set_hv_timer) + if (!kvm_can_use_hv_timer(vcpu)) return false; if (!ktimer->tscdeadline) @@ -1785,7 +1827,7 @@ static bool start_hv_timer(struct kvm_lapic *apic) if (atomic_read(&ktimer->pending)) { cancel_hv_timer(apic); } else if (expired) { - apic_timer_expired(apic); + apic_timer_expired(apic, false); cancel_hv_timer(apic); } } @@ -1833,9 +1875,9 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu) /* If the preempt notifier has already run, it also called apic_timer_expired */ if (!apic->lapic_timer.hv_timer_in_use) goto out; - WARN_ON(swait_active(&vcpu->wq)); + WARN_ON(rcuwait_active(&vcpu->wait)); cancel_hv_timer(apic); - apic_timer_expired(apic); + apic_timer_expired(apic, false); if (apic_lvtt_period(apic) && apic->lapic_timer.period) { advance_periodic_target_expiration(apic); @@ -1872,17 +1914,22 @@ void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu) restart_apic_timer(apic); } -static void start_apic_timer(struct kvm_lapic *apic) +static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg) { atomic_set(&apic->lapic_timer.pending, 0); if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) - && !set_target_expiration(apic)) + && !set_target_expiration(apic, count_reg)) return; restart_apic_timer(apic); } +static void start_apic_timer(struct kvm_lapic *apic) +{ + __start_apic_timer(apic, APIC_TMICT); +} + static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val) { bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val); @@ -2336,7 +2383,7 @@ static enum hrtimer_restart apic_timer_fn(struct hrtimer *data) struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer); struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer); - apic_timer_expired(apic); + apic_timer_expired(apic, true); if (lapic_is_periodic(apic)) { advance_periodic_target_expiration(apic); @@ -2493,6 +2540,14 @@ static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu, int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s)); + + /* + * Get calculated timer current count for remaining timer period (if + * any) and store it in the returned register set. + */ + __kvm_lapic_set_reg(s->regs, APIC_TMCCT, + __apic_read(vcpu->arch.apic, APIC_TMCCT)); + return kvm_apic_state_fixup(vcpu, s, false); } @@ -2520,7 +2575,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) apic_update_lvtt(apic); apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0)); update_divide_count(apic); - start_apic_timer(apic); + __start_apic_timer(apic, APIC_TMCCT); kvm_apic_update_apicv(vcpu); apic->highest_isr_cache = -1; if (vcpu->arch.apicv_active) { diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index a0ffb4331418..754f29beb83e 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -161,9 +161,14 @@ static inline u32 kvm_lapic_get_reg(struct kvm_lapic *apic, int reg_off) return *((u32 *) (apic->regs + reg_off)); } +static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val) +{ + *((u32 *) (regs + reg_off)) = val; +} + static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val) { - *((u32 *) (apic->regs + reg_off)) = val; + __kvm_lapic_set_reg(apic->regs, reg_off, val); } extern struct static_key kvm_no_apic_vcpu; @@ -245,7 +250,7 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); -bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu); +bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu); static inline enum lapic_mode kvm_apic_mode(u64 apic_base) { diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index 8a3b1bce722a..0ad06bfe2c2c 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -51,13 +51,13 @@ static inline u64 rsvd_bits(int s, int e) return ((1ULL << (e - s + 1)) - 1) << s; } -void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask); +void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask); void reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context); void kvm_init_mmu(struct kvm_vcpu *vcpu, bool reset_roots); -void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); +void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer); void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, bool accessed_dirty, gpa_t new_eptp); bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index 8071952e9cf2..fdd05c233308 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -16,6 +16,7 @@ */ #include "irq.h" +#include "ioapic.h" #include "mmu.h" #include "x86.h" #include "kvm_cache_regs.h" @@ -78,6 +79,9 @@ module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops, &nx_huge_pages_recovery_ratio, 0644); __MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint"); +static bool __read_mostly force_flush_and_sync_on_reuse; +module_param_named(flush_on_reuse, force_flush_and_sync_on_reuse, bool, 0644); + /* * When setting this variable to true it enables Two-Dimensional-Paging * where the hardware walks 2 page tables: @@ -244,7 +248,6 @@ static u64 __read_mostly shadow_x_mask; /* mutual exclusive with nx_mask */ static u64 __read_mostly shadow_user_mask; static u64 __read_mostly shadow_accessed_mask; static u64 __read_mostly shadow_dirty_mask; -static u64 __read_mostly shadow_mmio_mask; static u64 __read_mostly shadow_mmio_value; static u64 __read_mostly shadow_mmio_access_mask; static u64 __read_mostly shadow_present_mask; @@ -331,19 +334,19 @@ static void kvm_flush_remote_tlbs_with_address(struct kvm *kvm, kvm_flush_remote_tlbs_with_range(kvm, &range); } -void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value, u64 access_mask) +void kvm_mmu_set_mmio_spte_mask(u64 mmio_value, u64 access_mask) { BUG_ON((u64)(unsigned)access_mask != access_mask); - BUG_ON((mmio_mask & mmio_value) != mmio_value); + WARN_ON(mmio_value & (shadow_nonpresent_or_rsvd_mask << shadow_nonpresent_or_rsvd_mask_len)); + WARN_ON(mmio_value & shadow_nonpresent_or_rsvd_lower_gfn_mask); shadow_mmio_value = mmio_value | SPTE_MMIO_MASK; - shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; shadow_mmio_access_mask = access_mask; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); static bool is_mmio_spte(u64 spte) { - return (spte & shadow_mmio_mask) == shadow_mmio_value; + return (spte & SPTE_SPECIAL_MASK) == SPTE_MMIO_MASK; } static inline bool sp_ad_disabled(struct kvm_mmu_page *sp) @@ -566,7 +569,6 @@ static void kvm_mmu_reset_all_pte_masks(void) shadow_dirty_mask = 0; shadow_nx_mask = 0; shadow_x_mask = 0; - shadow_mmio_mask = 0; shadow_present_mask = 0; shadow_acc_track_mask = 0; @@ -583,16 +585,15 @@ static void kvm_mmu_reset_all_pte_masks(void) * the most significant bits of legal physical address space. */ shadow_nonpresent_or_rsvd_mask = 0; - low_phys_bits = boot_cpu_data.x86_cache_bits; - if (boot_cpu_data.x86_cache_bits < - 52 - shadow_nonpresent_or_rsvd_mask_len) { + low_phys_bits = boot_cpu_data.x86_phys_bits; + if (boot_cpu_has_bug(X86_BUG_L1TF) && + !WARN_ON_ONCE(boot_cpu_data.x86_cache_bits >= + 52 - shadow_nonpresent_or_rsvd_mask_len)) { + low_phys_bits = boot_cpu_data.x86_cache_bits + - shadow_nonpresent_or_rsvd_mask_len; shadow_nonpresent_or_rsvd_mask = - rsvd_bits(boot_cpu_data.x86_cache_bits - - shadow_nonpresent_or_rsvd_mask_len, - boot_cpu_data.x86_cache_bits - 1); - low_phys_bits -= shadow_nonpresent_or_rsvd_mask_len; - } else - WARN_ON_ONCE(boot_cpu_has_bug(X86_BUG_L1TF)); + rsvd_bits(low_phys_bits, boot_cpu_data.x86_cache_bits - 1); + } shadow_nonpresent_or_rsvd_lower_gfn_mask = GENMASK_ULL(low_phys_bits - 1, PAGE_SHIFT); @@ -620,7 +621,7 @@ static int is_large_pte(u64 pte) static int is_last_spte(u64 pte, int level) { - if (level == PT_PAGE_TABLE_LEVEL) + if (level == PG_LEVEL_4K) return 1; if (is_large_pte(pte)) return 1; @@ -1196,7 +1197,7 @@ static void update_gfn_disallow_lpage_count(struct kvm_memory_slot *slot, struct kvm_lpage_info *linfo; int i; - for (i = PT_DIRECTORY_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { + for (i = PG_LEVEL_2M; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { linfo = lpage_info_slot(gfn, slot, i); linfo->disallow_lpage += count; WARN_ON(linfo->disallow_lpage < 0); @@ -1225,7 +1226,7 @@ static void account_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) slot = __gfn_to_memslot(slots, gfn); /* the non-leaf shadow pages are keeping readonly. */ - if (sp->role.level > PT_PAGE_TABLE_LEVEL) + if (sp->role.level > PG_LEVEL_4K) return kvm_slot_page_track_add_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); @@ -1253,7 +1254,7 @@ static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) gfn = sp->gfn; slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, gfn); - if (sp->role.level > PT_PAGE_TABLE_LEVEL) + if (sp->role.level > PG_LEVEL_4K) return kvm_slot_page_track_remove_page(kvm, slot, gfn, KVM_PAGE_TRACK_WRITE); @@ -1398,7 +1399,7 @@ static struct kvm_rmap_head *__gfn_to_rmap(gfn_t gfn, int level, unsigned long idx; idx = gfn_to_index(gfn, slot->base_gfn, level); - return &slot->arch.rmap[level - PT_PAGE_TABLE_LEVEL][idx]; + return &slot->arch.rmap[level - PG_LEVEL_4K][idx]; } static struct kvm_rmap_head *gfn_to_rmap(struct kvm *kvm, gfn_t gfn, @@ -1529,8 +1530,7 @@ static void drop_spte(struct kvm *kvm, u64 *sptep) static bool __drop_large_spte(struct kvm *kvm, u64 *sptep) { if (is_large_pte(*sptep)) { - WARN_ON(page_header(__pa(sptep))->role.level == - PT_PAGE_TABLE_LEVEL); + WARN_ON(page_header(__pa(sptep))->role.level == PG_LEVEL_4K); drop_spte(kvm, sptep); --kvm->stat.lpages; return true; @@ -1682,7 +1682,7 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm, while (mask) { rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PT_PAGE_TABLE_LEVEL, slot); + PG_LEVEL_4K, slot); __rmap_write_protect(kvm, rmap_head, false); /* clear the first set bit */ @@ -1708,7 +1708,7 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm, while (mask) { rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask), - PT_PAGE_TABLE_LEVEL, slot); + PG_LEVEL_4K, slot); __rmap_clear_dirty(kvm, rmap_head); /* clear the first set bit */ @@ -1760,7 +1760,7 @@ bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, int i; bool write_protected = false; - for (i = PT_PAGE_TABLE_LEVEL; i <= PT_MAX_HUGEPAGE_LEVEL; ++i) { + for (i = PG_LEVEL_4K; i <= KVM_MAX_HUGEPAGE_LEVEL; ++i) { rmap_head = __gfn_to_rmap(gfn, i, slot); write_protected |= __rmap_write_protect(kvm, rmap_head, true); } @@ -1948,8 +1948,8 @@ static int kvm_handle_hva_range(struct kvm *kvm, gfn_start = hva_to_gfn_memslot(hva_start, memslot); gfn_end = hva_to_gfn_memslot(hva_end + PAGE_SIZE - 1, memslot); - for_each_slot_rmap_range(memslot, PT_PAGE_TABLE_LEVEL, - PT_MAX_HUGEPAGE_LEVEL, + for_each_slot_rmap_range(memslot, PG_LEVEL_4K, + KVM_MAX_HUGEPAGE_LEVEL, gfn_start, gfn_end - 1, &iterator) ret |= handler(kvm, iterator.rmap, memslot, @@ -2153,10 +2153,6 @@ static int nonpaging_sync_page(struct kvm_vcpu *vcpu, return 0; } -static void nonpaging_invlpg(struct kvm_vcpu *vcpu, gva_t gva, hpa_t root) -{ -} - static void nonpaging_update_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte) @@ -2313,7 +2309,7 @@ static void kvm_mmu_flush_or_zap(struct kvm_vcpu *vcpu, return; if (local_flush) - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } #ifdef CONFIG_KVM_MMU_AUDIT @@ -2347,7 +2343,7 @@ static bool kvm_sync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, if (!s->unsync) continue; - WARN_ON(s->role.level != PT_PAGE_TABLE_LEVEL); + WARN_ON(s->role.level != PG_LEVEL_4K); ret |= kvm_sync_page(vcpu, s, invalid_list); } @@ -2376,7 +2372,7 @@ static int mmu_pages_next(struct kvm_mmu_pages *pvec, int level = sp->role.level; parents->idx[level-1] = idx; - if (level == PT_PAGE_TABLE_LEVEL) + if (level == PG_LEVEL_4K) break; parents->parent[level-2] = sp; @@ -2398,7 +2394,7 @@ static int mmu_pages_first(struct kvm_mmu_pages *pvec, sp = pvec->page[0].sp; level = sp->role.level; - WARN_ON(level == PT_PAGE_TABLE_LEVEL); + WARN_ON(level == PG_LEVEL_4K); parents->parent[level-2] = sp; @@ -2520,11 +2516,11 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, break; WARN_ON(!list_empty(&invalid_list)); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } if (sp->unsync_children) - kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); __clear_sp_write_flooding_count(sp); trace_kvm_mmu_get_page(sp, false); @@ -2546,11 +2542,10 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, * be inconsistent with guest page table. */ account_shadowed(vcpu->kvm, sp); - if (level == PT_PAGE_TABLE_LEVEL && - rmap_write_protect(vcpu, gfn)) + if (level == PG_LEVEL_4K && rmap_write_protect(vcpu, gfn)) kvm_flush_remote_tlbs_with_address(vcpu->kvm, gfn, 1); - if (level > PT_PAGE_TABLE_LEVEL && need_sync) + if (level > PG_LEVEL_4K && need_sync) flush |= kvm_sync_pages(vcpu, gfn, &invalid_list); } clear_page(sp->spt); @@ -2601,7 +2596,7 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, static bool shadow_walk_okay(struct kvm_shadow_walk_iterator *iterator) { - if (iterator->level < PT_PAGE_TABLE_LEVEL) + if (iterator->level < PG_LEVEL_4K) return false; iterator->index = SHADOW_PT_INDEX(iterator->addr, iterator->level); @@ -2722,7 +2717,7 @@ static int mmu_zap_unsync_children(struct kvm *kvm, struct mmu_page_path parents; struct kvm_mmu_pages pages; - if (parent->role.level == PT_PAGE_TABLE_LEVEL) + if (parent->role.level == PG_LEVEL_4K) return 0; while (mmu_unsync_walk(parent, &pages)) { @@ -2921,7 +2916,7 @@ static bool mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn, if (sp->unsync) continue; - WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL); + WARN_ON(sp->role.level != PG_LEVEL_4K); kvm_unsync_page(vcpu, sp); } @@ -3020,7 +3015,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (!speculative) spte |= spte_shadow_accessed_mask(spte); - if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) && + if (level > PG_LEVEL_4K && (pte_access & ACC_EXEC_MASK) && is_nx_huge_page_enabled()) { pte_access &= ~ACC_EXEC_MASK; } @@ -3033,7 +3028,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (pte_access & ACC_USER_MASK) spte |= shadow_user_mask; - if (level > PT_PAGE_TABLE_LEVEL) + if (level > PG_LEVEL_4K) spte |= PT_PAGE_SIZE_MASK; if (tdp_enabled) spte |= kvm_x86_ops.get_mt_mask(vcpu, gfn, @@ -3103,8 +3098,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, * If we overwrite a PTE page pointer with a 2MB PMD, unlink * the parent of the now unreachable PTE. */ - if (level > PT_PAGE_TABLE_LEVEL && - !is_large_pte(*sptep)) { + if (level > PG_LEVEL_4K && !is_large_pte(*sptep)) { struct kvm_mmu_page *child; u64 pte = *sptep; @@ -3125,7 +3119,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, if (set_spte_ret & SET_SPTE_WRITE_PROTECTED_PT) { if (write_fault) ret = RET_PF_EMULATE; - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } if (set_spte_ret & SET_SPTE_NEED_REMOTE_TLB_FLUSH || flush) @@ -3228,7 +3222,7 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep) if (sp_ad_disabled(sp)) return; - if (sp->role.level > PT_PAGE_TABLE_LEVEL) + if (sp->role.level > PG_LEVEL_4K) return; __direct_pte_prefetch(vcpu, sp, sptep); @@ -3241,12 +3235,8 @@ static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn, pte_t *pte; int level; - BUILD_BUG_ON(PT_PAGE_TABLE_LEVEL != (int)PG_LEVEL_4K || - PT_DIRECTORY_LEVEL != (int)PG_LEVEL_2M || - PT_PDPE_LEVEL != (int)PG_LEVEL_1G); - if (!PageCompound(pfn_to_page(pfn)) && !kvm_is_zone_device_pfn(pfn)) - return PT_PAGE_TABLE_LEVEL; + return PG_LEVEL_4K; /* * Note, using the already-retrieved memslot and __gfn_to_hva_memslot() @@ -3260,7 +3250,7 @@ static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn, pte = lookup_address_in_mm(vcpu->kvm->mm, hva, &level); if (unlikely(!pte)) - return PT_PAGE_TABLE_LEVEL; + return PG_LEVEL_4K; return level; } @@ -3274,28 +3264,28 @@ static int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t mask; int level; - if (unlikely(max_level == PT_PAGE_TABLE_LEVEL)) - return PT_PAGE_TABLE_LEVEL; + if (unlikely(max_level == PG_LEVEL_4K)) + return PG_LEVEL_4K; if (is_error_noslot_pfn(pfn) || kvm_is_reserved_pfn(pfn)) - return PT_PAGE_TABLE_LEVEL; + return PG_LEVEL_4K; slot = gfn_to_memslot_dirty_bitmap(vcpu, gfn, true); if (!slot) - return PT_PAGE_TABLE_LEVEL; + return PG_LEVEL_4K; max_level = min(max_level, max_page_level); - for ( ; max_level > PT_PAGE_TABLE_LEVEL; max_level--) { + for ( ; max_level > PG_LEVEL_4K; max_level--) { linfo = lpage_info_slot(gfn, slot, max_level); if (!linfo->disallow_lpage) break; } - if (max_level == PT_PAGE_TABLE_LEVEL) - return PT_PAGE_TABLE_LEVEL; + if (max_level == PG_LEVEL_4K) + return PG_LEVEL_4K; level = host_pfn_mapping_level(vcpu, gfn, pfn, slot); - if (level == PT_PAGE_TABLE_LEVEL) + if (level == PG_LEVEL_4K) return level; level = min(level, max_level); @@ -3317,7 +3307,7 @@ static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, int level = *levelp; u64 spte = *it.sptep; - if (it.level == level && level > PT_PAGE_TABLE_LEVEL && + if (it.level == level && level > PG_LEVEL_4K && is_nx_huge_page_enabled() && is_shadow_present_pte(spte) && !is_large_pte(spte)) { @@ -3574,7 +3564,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, * * See the comments in kvm_arch_commit_memory_region(). */ - if (sp->role.level > PT_PAGE_TABLE_LEVEL) + if (sp->role.level > PG_LEVEL_4K) break; } @@ -3586,7 +3576,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, /* * Currently, fast page fault only works for direct mapping * since the gfn is not stable for indirect shadow page. See - * Documentation/virt/kvm/locking.txt to get more detail. + * Documentation/virt/kvm/locking.rst to get more detail. */ fault_handled = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte, @@ -3666,7 +3656,7 @@ void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, &invalid_list); mmu->root_hpa = INVALID_PAGE; } - mmu->root_cr3 = 0; + mmu->root_pgd = 0; } kvm_mmu_commit_zap_page(vcpu->kvm, &invalid_list); @@ -3686,58 +3676,64 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) return ret; } -static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) +static hpa_t mmu_alloc_root(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva, + u8 level, bool direct) { struct kvm_mmu_page *sp; + + spin_lock(&vcpu->kvm->mmu_lock); + + if (make_mmu_pages_available(vcpu)) { + spin_unlock(&vcpu->kvm->mmu_lock); + return INVALID_PAGE; + } + sp = kvm_mmu_get_page(vcpu, gfn, gva, level, direct, ACC_ALL); + ++sp->root_count; + + spin_unlock(&vcpu->kvm->mmu_lock); + return __pa(sp->spt); +} + +static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) +{ + u8 shadow_root_level = vcpu->arch.mmu->shadow_root_level; + hpa_t root; unsigned i; - if (vcpu->arch.mmu->shadow_root_level >= PT64_ROOT_4LEVEL) { - spin_lock(&vcpu->kvm->mmu_lock); - if(make_mmu_pages_available(vcpu) < 0) { - spin_unlock(&vcpu->kvm->mmu_lock); + if (shadow_root_level >= PT64_ROOT_4LEVEL) { + root = mmu_alloc_root(vcpu, 0, 0, shadow_root_level, true); + if (!VALID_PAGE(root)) return -ENOSPC; - } - sp = kvm_mmu_get_page(vcpu, 0, 0, - vcpu->arch.mmu->shadow_root_level, 1, ACC_ALL); - ++sp->root_count; - spin_unlock(&vcpu->kvm->mmu_lock); - vcpu->arch.mmu->root_hpa = __pa(sp->spt); - } else if (vcpu->arch.mmu->shadow_root_level == PT32E_ROOT_LEVEL) { + vcpu->arch.mmu->root_hpa = root; + } else if (shadow_root_level == PT32E_ROOT_LEVEL) { for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu->pae_root[i]; + MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->pae_root[i])); - MMU_WARN_ON(VALID_PAGE(root)); - spin_lock(&vcpu->kvm->mmu_lock); - if (make_mmu_pages_available(vcpu) < 0) { - spin_unlock(&vcpu->kvm->mmu_lock); + root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT), + i << 30, PT32_ROOT_LEVEL, true); + if (!VALID_PAGE(root)) return -ENOSPC; - } - sp = kvm_mmu_get_page(vcpu, i << (30 - PAGE_SHIFT), - i << 30, PT32_ROOT_LEVEL, 1, ACC_ALL); - root = __pa(sp->spt); - ++sp->root_count; - spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu->pae_root[i] = root | PT_PRESENT_MASK; } vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); } else BUG(); - /* root_cr3 is ignored for direct MMUs. */ - vcpu->arch.mmu->root_cr3 = 0; + /* root_pgd is ignored for direct MMUs. */ + vcpu->arch.mmu->root_pgd = 0; return 0; } static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) { - struct kvm_mmu_page *sp; u64 pdptr, pm_mask; - gfn_t root_gfn, root_cr3; + gfn_t root_gfn, root_pgd; + hpa_t root; int i; - root_cr3 = vcpu->arch.mmu->get_guest_pgd(vcpu); - root_gfn = root_cr3 >> PAGE_SHIFT; + root_pgd = vcpu->arch.mmu->get_guest_pgd(vcpu); + root_gfn = root_pgd >> PAGE_SHIFT; if (mmu_check_root(vcpu, root_gfn)) return 1; @@ -3747,22 +3743,14 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) * write-protect the guests page table root. */ if (vcpu->arch.mmu->root_level >= PT64_ROOT_4LEVEL) { - hpa_t root = vcpu->arch.mmu->root_hpa; - - MMU_WARN_ON(VALID_PAGE(root)); + MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->root_hpa)); - spin_lock(&vcpu->kvm->mmu_lock); - if (make_mmu_pages_available(vcpu) < 0) { - spin_unlock(&vcpu->kvm->mmu_lock); + root = mmu_alloc_root(vcpu, root_gfn, 0, + vcpu->arch.mmu->shadow_root_level, false); + if (!VALID_PAGE(root)) return -ENOSPC; - } - sp = kvm_mmu_get_page(vcpu, root_gfn, 0, - vcpu->arch.mmu->shadow_root_level, 0, ACC_ALL); - root = __pa(sp->spt); - ++sp->root_count; - spin_unlock(&vcpu->kvm->mmu_lock); vcpu->arch.mmu->root_hpa = root; - goto set_root_cr3; + goto set_root_pgd; } /* @@ -3775,9 +3763,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; for (i = 0; i < 4; ++i) { - hpa_t root = vcpu->arch.mmu->pae_root[i]; - - MMU_WARN_ON(VALID_PAGE(root)); + MMU_WARN_ON(VALID_PAGE(vcpu->arch.mmu->pae_root[i])); if (vcpu->arch.mmu->root_level == PT32E_ROOT_LEVEL) { pdptr = vcpu->arch.mmu->get_pdptr(vcpu, i); if (!(pdptr & PT_PRESENT_MASK)) { @@ -3788,17 +3774,11 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) if (mmu_check_root(vcpu, root_gfn)) return 1; } - spin_lock(&vcpu->kvm->mmu_lock); - if (make_mmu_pages_available(vcpu) < 0) { - spin_unlock(&vcpu->kvm->mmu_lock); - return -ENOSPC; - } - sp = kvm_mmu_get_page(vcpu, root_gfn, i << 30, PT32_ROOT_LEVEL, - 0, ACC_ALL); - root = __pa(sp->spt); - ++sp->root_count; - spin_unlock(&vcpu->kvm->mmu_lock); + root = mmu_alloc_root(vcpu, root_gfn, i << 30, + PT32_ROOT_LEVEL, false); + if (!VALID_PAGE(root)) + return -ENOSPC; vcpu->arch.mmu->pae_root[i] = root | pm_mask; } vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->pae_root); @@ -3828,8 +3808,8 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) vcpu->arch.mmu->root_hpa = __pa(vcpu->arch.mmu->lm_root); } -set_root_cr3: - vcpu->arch.mmu->root_cr3 = root_cr3; +set_root_pgd: + vcpu->arch.mmu->root_pgd = root_pgd; return 0; } @@ -4083,18 +4063,16 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write, bool *writable) { - struct kvm_memory_slot *slot; + struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); bool async; - /* - * Don't expose private memslots to L2. - */ - if (is_guest_mode(vcpu) && !kvm_is_visible_gfn(vcpu->kvm, gfn)) { + /* Don't expose private memslots to L2. */ + if (is_guest_mode(vcpu) && !kvm_is_visible_memslot(slot)) { *pfn = KVM_PFN_NOSLOT; + *writable = false; return false; } - slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn); async = false; *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable); if (!async) @@ -4135,7 +4113,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, return r; if (lpage_disallowed) - max_level = PT_PAGE_TABLE_LEVEL; + max_level = PG_LEVEL_4K; if (fast_page_fault(vcpu, gpa, error_code)) return RET_PF_RETRY; @@ -4171,7 +4149,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, /* This path builds a PAE pagetable, we can map 2mb pages at maximum. */ return direct_page_fault(vcpu, gpa & PAGE_MASK, error_code, prefault, - PT_DIRECTORY_LEVEL, false); + PG_LEVEL_2M, false); } int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, @@ -4186,7 +4164,7 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, #endif vcpu->arch.l1tf_flush_l1d = true; - switch (vcpu->arch.apf.host_apf_reason) { + switch (vcpu->arch.apf.host_apf_flags) { default: trace_kvm_page_fault(fault_address, error_code); @@ -4196,13 +4174,13 @@ int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code, insn_len); break; case KVM_PV_REASON_PAGE_NOT_PRESENT: - vcpu->arch.apf.host_apf_reason = 0; + vcpu->arch.apf.host_apf_flags = 0; local_irq_disable(); - kvm_async_pf_task_wait(fault_address, 0); + kvm_async_pf_task_wait_schedule(fault_address); local_irq_enable(); break; case KVM_PV_REASON_PAGE_READY: - vcpu->arch.apf.host_apf_reason = 0; + vcpu->arch.apf.host_apf_flags = 0; local_irq_disable(); kvm_async_pf_task_wake(fault_address); local_irq_enable(); @@ -4217,8 +4195,8 @@ int kvm_tdp_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, { int max_level; - for (max_level = PT_MAX_HUGEPAGE_LEVEL; - max_level > PT_PAGE_TABLE_LEVEL; + for (max_level = KVM_MAX_HUGEPAGE_LEVEL; + max_level > PG_LEVEL_4K; max_level--) { int page_num = KVM_PAGES_PER_HPAGE(max_level); gfn_t base = (gpa >> PAGE_SHIFT) & ~(page_num - 1); @@ -4237,7 +4215,7 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, context->page_fault = nonpaging_page_fault; context->gva_to_gpa = nonpaging_gva_to_gpa; context->sync_page = nonpaging_sync_page; - context->invlpg = nonpaging_invlpg; + context->invlpg = NULL; context->update_pte = nonpaging_update_pte; context->root_level = 0; context->shadow_root_level = PT32E_ROOT_LEVEL; @@ -4245,51 +4223,50 @@ static void nonpaging_init_context(struct kvm_vcpu *vcpu, context->nx = false; } -static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t cr3, +static inline bool is_root_usable(struct kvm_mmu_root_info *root, gpa_t pgd, union kvm_mmu_page_role role) { - return (role.direct || cr3 == root->cr3) && + return (role.direct || pgd == root->pgd) && VALID_PAGE(root->hpa) && page_header(root->hpa) && role.word == page_header(root->hpa)->role.word; } /* - * Find out if a previously cached root matching the new CR3/role is available. + * Find out if a previously cached root matching the new pgd/role is available. * The current root is also inserted into the cache. * If a matching root was found, it is assigned to kvm_mmu->root_hpa and true is * returned. * Otherwise, the LRU root from the cache is assigned to kvm_mmu->root_hpa and * false is returned. This root should now be freed by the caller. */ -static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_cr3, +static bool cached_root_available(struct kvm_vcpu *vcpu, gpa_t new_pgd, union kvm_mmu_page_role new_role) { uint i; struct kvm_mmu_root_info root; struct kvm_mmu *mmu = vcpu->arch.mmu; - root.cr3 = mmu->root_cr3; + root.pgd = mmu->root_pgd; root.hpa = mmu->root_hpa; - if (is_root_usable(&root, new_cr3, new_role)) + if (is_root_usable(&root, new_pgd, new_role)) return true; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { swap(root, mmu->prev_roots[i]); - if (is_root_usable(&root, new_cr3, new_role)) + if (is_root_usable(&root, new_pgd, new_role)) break; } mmu->root_hpa = root.hpa; - mmu->root_cr3 = root.cr3; + mmu->root_pgd = root.pgd; return i < KVM_MMU_NUM_PREV_ROOTS; } -static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, - union kvm_mmu_page_role new_role, - bool skip_tlb_flush) +static bool fast_pgd_switch(struct kvm_vcpu *vcpu, gpa_t new_pgd, + union kvm_mmu_page_role new_role) { struct kvm_mmu *mmu = vcpu->arch.mmu; @@ -4299,70 +4276,59 @@ static bool fast_cr3_switch(struct kvm_vcpu *vcpu, gpa_t new_cr3, * later if necessary. */ if (mmu->shadow_root_level >= PT64_ROOT_4LEVEL && - mmu->root_level >= PT64_ROOT_4LEVEL) { - if (mmu_check_root(vcpu, new_cr3 >> PAGE_SHIFT)) - return false; - - if (cached_root_available(vcpu, new_cr3, new_role)) { - /* - * It is possible that the cached previous root page is - * obsolete because of a change in the MMU generation - * number. However, changing the generation number is - * accompanied by KVM_REQ_MMU_RELOAD, which will free - * the root set here and allocate a new one. - */ - kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu); - if (!skip_tlb_flush) { - kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - } - - /* - * The last MMIO access's GVA and GPA are cached in the - * VCPU. When switching to a new CR3, that GVA->GPA - * mapping may no longer be valid. So clear any cached - * MMIO info even when we don't need to sync the shadow - * page tables. - */ - vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); - - __clear_sp_write_flooding_count( - page_header(mmu->root_hpa)); - - return true; - } - } + mmu->root_level >= PT64_ROOT_4LEVEL) + return !mmu_check_root(vcpu, new_pgd >> PAGE_SHIFT) && + cached_root_available(vcpu, new_pgd, new_role); return false; } -static void __kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, +static void __kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, union kvm_mmu_page_role new_role, - bool skip_tlb_flush) + bool skip_tlb_flush, bool skip_mmu_sync) { - if (!fast_cr3_switch(vcpu, new_cr3, new_role, skip_tlb_flush)) - kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, - KVM_MMU_ROOT_CURRENT); + if (!fast_pgd_switch(vcpu, new_pgd, new_role)) { + kvm_mmu_free_roots(vcpu, vcpu->arch.mmu, KVM_MMU_ROOT_CURRENT); + return; + } + + /* + * It's possible that the cached previous root page is obsolete because + * of a change in the MMU generation number. However, changing the + * generation number is accompanied by KVM_REQ_MMU_RELOAD, which will + * free the root set here and allocate a new one. + */ + kvm_make_request(KVM_REQ_LOAD_MMU_PGD, vcpu); + + if (!skip_mmu_sync || force_flush_and_sync_on_reuse) + kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); + if (!skip_tlb_flush || force_flush_and_sync_on_reuse) + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + + /* + * The last MMIO access's GVA and GPA are cached in the VCPU. When + * switching to a new CR3, that GVA->GPA mapping may no longer be + * valid. So clear any cached MMIO info even when we don't need to sync + * the shadow page tables. + */ + vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY); + + __clear_sp_write_flooding_count(page_header(vcpu->arch.mmu->root_hpa)); } -void kvm_mmu_new_cr3(struct kvm_vcpu *vcpu, gpa_t new_cr3, bool skip_tlb_flush) +void kvm_mmu_new_pgd(struct kvm_vcpu *vcpu, gpa_t new_pgd, bool skip_tlb_flush, + bool skip_mmu_sync) { - __kvm_mmu_new_cr3(vcpu, new_cr3, kvm_mmu_calc_root_page_role(vcpu), - skip_tlb_flush); + __kvm_mmu_new_pgd(vcpu, new_pgd, kvm_mmu_calc_root_page_role(vcpu), + skip_tlb_flush, skip_mmu_sync); } -EXPORT_SYMBOL_GPL(kvm_mmu_new_cr3); +EXPORT_SYMBOL_GPL(kvm_mmu_new_pgd); static unsigned long get_cr3(struct kvm_vcpu *vcpu) { return kvm_read_cr3(vcpu); } -static void inject_page_fault(struct kvm_vcpu *vcpu, - struct x86_exception *fault) -{ - vcpu->arch.mmu->inject_page_fault(vcpu, fault); -} - static bool sync_mmio_spte(struct kvm_vcpu *vcpu, u64 *sptep, gfn_t gfn, unsigned int access, int *nr_present) { @@ -4391,11 +4357,11 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu, gpte &= level - mmu->last_nonleaf_level; /* - * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set - * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means - * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then. + * PG_LEVEL_4K always terminates. The RHS has bit 7 set + * iff level <= PG_LEVEL_4K, which for our purpose means + * level == PG_LEVEL_4K; set PT_PAGE_SIZE_MASK in gpte then. */ - gpte |= level - PT_PAGE_TABLE_LEVEL - 1; + gpte |= level - PG_LEVEL_4K - 1; return gpte & PT_PAGE_SIZE_MASK; } @@ -4909,7 +4875,7 @@ kvm_calc_tdp_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) union kvm_mmu_role role = kvm_calc_mmu_role_common(vcpu, base_only); role.base.ad_disabled = (shadow_accessed_mask == 0); - role.base.level = kvm_x86_ops.get_tdp_level(vcpu); + role.base.level = vcpu->arch.tdp_level; role.base.direct = true; role.base.gpte_is_8_bytes = true; @@ -4928,9 +4894,9 @@ static void init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context->mmu_role.as_u64 = new_role.as_u64; context->page_fault = kvm_tdp_page_fault; context->sync_page = nonpaging_sync_page; - context->invlpg = nonpaging_invlpg; + context->invlpg = NULL; context->update_pte = nonpaging_update_pte; - context->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu); + context->shadow_root_level = vcpu->arch.tdp_level; context->direct_map = true; context->get_guest_pgd = get_cr3; context->get_pdptr = kvm_pdptr_read; @@ -4986,7 +4952,7 @@ kvm_calc_shadow_mmu_root_page_role(struct kvm_vcpu *vcpu, bool base_only) return role; } -void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) +void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu, u32 cr0, u32 cr4, u32 efer) { struct kvm_mmu *context = vcpu->arch.mmu; union kvm_mmu_role new_role = @@ -4995,11 +4961,11 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) if (new_role.as_u64 == context->mmu_role.as_u64) return; - if (!is_paging(vcpu)) + if (!(cr0 & X86_CR0_PG)) nonpaging_init_context(vcpu, context); - else if (is_long_mode(vcpu)) + else if (efer & EFER_LMA) paging64_init_context(vcpu, context); - else if (is_pae(vcpu)) + else if (cr4 & X86_CR4_PAE) paging32E_init_context(vcpu, context); else paging32_init_context(vcpu, context); @@ -5047,7 +5013,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, kvm_calc_shadow_ept_root_page_role(vcpu, accessed_dirty, execonly, level); - __kvm_mmu_new_cr3(vcpu, new_eptp, new_role.base, false); + __kvm_mmu_new_pgd(vcpu, new_eptp, new_role.base, true, true); if (new_role.as_u64 == context->mmu_role.as_u64) return; @@ -5077,7 +5043,11 @@ static void init_kvm_softmmu(struct kvm_vcpu *vcpu) { struct kvm_mmu *context = vcpu->arch.mmu; - kvm_init_shadow_mmu(vcpu); + kvm_init_shadow_mmu(vcpu, + kvm_read_cr0_bits(vcpu, X86_CR0_PG), + kvm_read_cr4_bits(vcpu, X86_CR4_PAE), + vcpu->arch.efer); + context->get_guest_pgd = get_cr3; context->get_pdptr = kvm_pdptr_read; context->inject_page_fault = kvm_inject_page_fault; @@ -5097,6 +5067,12 @@ static void init_kvm_nested_mmu(struct kvm_vcpu *vcpu) g_context->inject_page_fault = kvm_inject_page_fault; /* + * L2 page tables are never shadowed, so there is no need to sync + * SPTEs. + */ + g_context->invlpg = NULL; + + /* * Note that arch.mmu->gva_to_gpa translates l2_gpa to l1_gpa using * L1's nested page tables (e.g. EPT12). The nested translation * of l2_gva to l1_gpa is done by arch.nested_mmu.gva_to_gpa using @@ -5183,7 +5159,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) if (r) goto out; kvm_mmu_load_pgd(vcpu); - kvm_x86_ops.tlb_flush(vcpu, true); + kvm_x86_ops.tlb_flush_current(vcpu); out: return r; } @@ -5202,7 +5178,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *new) { - if (sp->role.level != PT_PAGE_TABLE_LEVEL) { + if (sp->role.level != PG_LEVEL_4K) { ++vcpu->kvm->stat.mmu_pde_zapped; return; } @@ -5260,7 +5236,7 @@ static bool detect_write_flooding(struct kvm_mmu_page *sp) * Skip write-flooding detected for the sp whose level is 1, because * it can become unsync, then the guest page is not write-protected. */ - if (sp->role.level == PT_PAGE_TABLE_LEVEL) + if (sp->role.level == PG_LEVEL_4K) return false; atomic_inc(&sp->write_flooding_count); @@ -5497,37 +5473,54 @@ emulate: } EXPORT_SYMBOL_GPL(kvm_mmu_page_fault); -void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) +void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gva_t gva, hpa_t root_hpa) { - struct kvm_mmu *mmu = vcpu->arch.mmu; int i; - /* INVLPG on a * non-canonical address is a NOP according to the SDM. */ - if (is_noncanonical_address(gva, vcpu)) + /* It's actually a GPA for vcpu->arch.guest_mmu. */ + if (mmu != &vcpu->arch.guest_mmu) { + /* INVLPG on a non-canonical address is a NOP according to the SDM. */ + if (is_noncanonical_address(gva, vcpu)) + return; + + kvm_x86_ops.tlb_flush_gva(vcpu, gva); + } + + if (!mmu->invlpg) return; - mmu->invlpg(vcpu, gva, mmu->root_hpa); + if (root_hpa == INVALID_PAGE) { + mmu->invlpg(vcpu, gva, mmu->root_hpa); - /* - * INVLPG is required to invalidate any global mappings for the VA, - * irrespective of PCID. Since it would take us roughly similar amount - * of work to determine whether any of the prev_root mappings of the VA - * is marked global, or to just sync it blindly, so we might as well - * just always sync it. - * - * Mappings not reachable via the current cr3 or the prev_roots will be - * synced when switching to that cr3, so nothing needs to be done here - * for them. - */ - for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - if (VALID_PAGE(mmu->prev_roots[i].hpa)) - mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); + /* + * INVLPG is required to invalidate any global mappings for the VA, + * irrespective of PCID. Since it would take us roughly similar amount + * of work to determine whether any of the prev_root mappings of the VA + * is marked global, or to just sync it blindly, so we might as well + * just always sync it. + * + * Mappings not reachable via the current cr3 or the prev_roots will be + * synced when switching to that cr3, so nothing needs to be done here + * for them. + */ + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) + if (VALID_PAGE(mmu->prev_roots[i].hpa)) + mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); + } else { + mmu->invlpg(vcpu, gva, root_hpa); + } +} +EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_gva); - kvm_x86_ops.tlb_flush_gva(vcpu, gva); +void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva) +{ + kvm_mmu_invalidate_gva(vcpu, vcpu->arch.mmu, gva, INVALID_PAGE); ++vcpu->stat.invlpg; } EXPORT_SYMBOL_GPL(kvm_mmu_invlpg); + void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) { struct kvm_mmu *mmu = vcpu->arch.mmu; @@ -5541,7 +5534,7 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid) for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { if (VALID_PAGE(mmu->prev_roots[i].hpa) && - pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].cr3)) { + pcid == kvm_get_pcid(vcpu, mmu->prev_roots[i].pgd)) { mmu->invlpg(vcpu, gva, mmu->prev_roots[i].hpa); tlb_flush = true; } @@ -5574,9 +5567,9 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_page_level) if (tdp_enabled) max_page_level = tdp_page_level; else if (boot_cpu_has(X86_FEATURE_GBPAGES)) - max_page_level = PT_PDPE_LEVEL; + max_page_level = PG_LEVEL_1G; else - max_page_level = PT_DIRECTORY_LEVEL; + max_page_level = PG_LEVEL_2M; } EXPORT_SYMBOL_GPL(kvm_configure_mmu); @@ -5632,24 +5625,24 @@ static __always_inline bool slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { - return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, - PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); + return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K, + KVM_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } static __always_inline bool slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { - return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL + 1, - PT_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); + return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K + 1, + KVM_MAX_HUGEPAGE_LEVEL, lock_flush_tlb); } static __always_inline bool slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot, slot_level_handler fn, bool lock_flush_tlb) { - return slot_handle_level(kvm, memslot, fn, PT_PAGE_TABLE_LEVEL, - PT_PAGE_TABLE_LEVEL, lock_flush_tlb); + return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K, + PG_LEVEL_4K, lock_flush_tlb); } static void free_mmu_pages(struct kvm_mmu *mmu) @@ -5672,7 +5665,7 @@ static int alloc_mmu_pages(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu) * SVM's 32-bit NPT support, TDP paging doesn't use PAE paging and can * skip allocating the PDP table. */ - if (tdp_enabled && kvm_x86_ops.get_tdp_level(vcpu) > PT32E_ROOT_LEVEL) + if (tdp_enabled && vcpu->arch.tdp_level > PT32E_ROOT_LEVEL) return 0; page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_DMA32); @@ -5695,13 +5688,13 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu) vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; vcpu->arch.root_mmu.root_hpa = INVALID_PAGE; - vcpu->arch.root_mmu.root_cr3 = 0; + vcpu->arch.root_mmu.root_pgd = 0; vcpu->arch.root_mmu.translate_gpa = translate_gpa; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) vcpu->arch.root_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; vcpu->arch.guest_mmu.root_hpa = INVALID_PAGE; - vcpu->arch.guest_mmu.root_cr3 = 0; + vcpu->arch.guest_mmu.root_pgd = 0; vcpu->arch.guest_mmu.translate_gpa = translate_gpa; for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) vcpu->arch.guest_mmu.prev_roots[i] = KVM_MMU_ROOT_INFO_INVALID; @@ -5859,7 +5852,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end) continue; slot_handle_level_range(kvm, memslot, kvm_zap_rmapp, - PT_PAGE_TABLE_LEVEL, PT_MAX_HUGEPAGE_LEVEL, + PG_LEVEL_4K, + KVM_MAX_HUGEPAGE_LEVEL, start, end - 1, true); } } @@ -5881,7 +5875,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, spin_lock(&kvm->mmu_lock); flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect, - start_level, PT_MAX_HUGEPAGE_LEVEL, false); + start_level, KVM_MAX_HUGEPAGE_LEVEL, false); spin_unlock(&kvm->mmu_lock); /* @@ -6142,27 +6136,18 @@ static void kvm_set_mmio_spte_mask(void) u64 mask; /* - * Set the reserved bits and the present bit of an paging-structure - * entry to generate page fault with PFER.RSV = 1. + * Set a reserved PA bit in MMIO SPTEs to generate page faults with + * PFEC.RSVD=1 on MMIO accesses. 64-bit PTEs (PAE, x86-64, and EPT + * paging) support a maximum of 52 bits of PA, i.e. if the CPU supports + * 52-bit physical addresses then there are no reserved PA bits in the + * PTEs and so the reserved PA approach must be disabled. */ + if (shadow_phys_bits < 52) + mask = BIT_ULL(51) | PT_PRESENT_MASK; + else + mask = 0; - /* - * Mask the uppermost physical address bit, which would be reserved as - * long as the supported physical address width is less than 52. - */ - mask = 1ull << 51; - - /* Set the present bit. */ - mask |= 1ull; - - /* - * If reserved bit is not supported, clear the present bit to disable - * mmio page fault. - */ - if (shadow_phys_bits == 52) - mask &= ~1ull; - - kvm_mmu_set_mmio_spte_mask(mask, mask, ACC_WRITE_MASK | ACC_USER_MASK); + kvm_mmu_set_mmio_spte_mask(mask, ACC_WRITE_MASK | ACC_USER_MASK); } static bool get_nx_auto_mode(void) diff --git a/arch/x86/kvm/mmu/page_track.c b/arch/x86/kvm/mmu/page_track.c index ddc1ec3bdacd..a7bcde34d1f2 100644 --- a/arch/x86/kvm/mmu/page_track.c +++ b/arch/x86/kvm/mmu/page_track.c @@ -61,7 +61,7 @@ static void update_gfn_track(struct kvm_memory_slot *slot, gfn_t gfn, { int index, val; - index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); + index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); val = slot->arch.gfn_track[mode][index]; @@ -151,7 +151,7 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, if (!slot) return false; - index = gfn_to_index(gfn, slot->base_gfn, PT_PAGE_TABLE_LEVEL); + index = gfn_to_index(gfn, slot->base_gfn, PG_LEVEL_4K); return !!READ_ONCE(slot->arch.gfn_track[mode][index]); } diff --git a/arch/x86/kvm/mmu/paging_tmpl.h b/arch/x86/kvm/mmu/paging_tmpl.h index 9bdf9b7d9a96..38c576495048 100644 --- a/arch/x86/kvm/mmu/paging_tmpl.h +++ b/arch/x86/kvm/mmu/paging_tmpl.h @@ -75,7 +75,7 @@ #define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT) #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl) -#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL) +#define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PG_LEVEL_4K) /* * The guest_walker structure emulates the behavior of the hardware page @@ -198,7 +198,7 @@ static bool FNAME(prefetch_invalid_gpte)(struct kvm_vcpu *vcpu, !(gpte & PT_GUEST_ACCESSED_MASK)) goto no_present; - if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PT_PAGE_TABLE_LEVEL)) + if (FNAME(is_rsvd_bits_set)(vcpu->arch.mmu, gpte, PG_LEVEL_4K)) goto no_present; return false; @@ -436,7 +436,7 @@ retry_walk: gfn = gpte_to_gfn_lvl(pte, walker->level); gfn += (addr & PT_LVL_OFFSET_MASK(walker->level)) >> PAGE_SHIFT; - if (PTTYPE == 32 && walker->level == PT_DIRECTORY_LEVEL && is_cpuid_PSE36()) + if (PTTYPE == 32 && walker->level > PG_LEVEL_4K && is_cpuid_PSE36()) gfn += pse36_gfn_delta(pte); real_gpa = mmu->translate_gpa(vcpu, gfn_to_gpa(gfn), access, &walker->fault); @@ -552,7 +552,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * we call mmu_set_spte() with host_writable = true because * pte_prefetch_gfn_to_pfn always gets a writable pfn. */ - mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, + mmu_set_spte(vcpu, spte, pte_access, 0, PG_LEVEL_4K, gfn, pfn, true, true); kvm_release_pfn_clean(pfn); @@ -575,7 +575,7 @@ static bool FNAME(gpte_changed)(struct kvm_vcpu *vcpu, u64 mask; int r, index; - if (level == PT_PAGE_TABLE_LEVEL) { + if (level == PG_LEVEL_4K) { mask = PTE_PREFETCH_NUM * sizeof(pt_element_t) - 1; base_gpa = pte_gpa & ~mask; index = (pte_gpa - base_gpa) / sizeof(pt_element_t); @@ -600,7 +600,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, sp = page_header(__pa(sptep)); - if (sp->role.level > PT_PAGE_TABLE_LEVEL) + if (sp->role.level > PG_LEVEL_4K) return; if (sp->role.direct) @@ -812,7 +812,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, if (!r) { pgprintk("%s: guest page fault\n", __func__); if (!prefault) - inject_page_fault(vcpu, &walker.fault); + kvm_inject_emulated_page_fault(vcpu, &walker.fault); return RET_PF_RETRY; } @@ -828,7 +828,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code, &walker, user_fault, &vcpu->arch.write_fault_to_shadow_pgtable); if (lpage_disallowed || is_self_change_mapping) - max_level = PT_PAGE_TABLE_LEVEL; + max_level = PG_LEVEL_4K; else max_level = walker.level; @@ -884,7 +884,7 @@ static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) { int offset = 0; - WARN_ON(sp->role.level != PT_PAGE_TABLE_LEVEL); + WARN_ON(sp->role.level != PG_LEVEL_4K); if (PTTYPE == 32) offset = sp->role.quadrant << PT64_LEVEL_BITS; @@ -1070,7 +1070,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp) host_writable = sp->spt[i] & SPTE_HOST_WRITEABLE; set_spte_ret |= set_spte(vcpu, &sp->spt[i], - pte_access, PT_PAGE_TABLE_LEVEL, + pte_access, PG_LEVEL_4K, gfn, spte_to_pfn(sp->spt[i]), true, false, host_writable); } diff --git a/arch/x86/kvm/mmu_audit.c b/arch/x86/kvm/mmu_audit.c index ca39f62aabc6..9d2844f87f6d 100644 --- a/arch/x86/kvm/mmu_audit.c +++ b/arch/x86/kvm/mmu_audit.c @@ -100,7 +100,7 @@ static void audit_mappings(struct kvm_vcpu *vcpu, u64 *sptep, int level) sp = page_header(__pa(sptep)); if (sp->unsync) { - if (level != PT_PAGE_TABLE_LEVEL) { + if (level != PG_LEVEL_4K) { audit_printk(vcpu->kvm, "unsync sp: %p " "level = %d\n", sp, level); return; @@ -176,7 +176,7 @@ static void check_mappings_rmap(struct kvm *kvm, struct kvm_mmu_page *sp) { int i; - if (sp->role.level != PT_PAGE_TABLE_LEVEL) + if (sp->role.level != PG_LEVEL_4K) return; for (i = 0; i < PT64_ENT_PER_PAGE; ++i) { @@ -200,7 +200,7 @@ static void audit_write_protection(struct kvm *kvm, struct kvm_mmu_page *sp) slots = kvm_memslots_for_spte_role(kvm, sp->role); slot = __gfn_to_memslot(slots, sp->gfn); - rmap_head = __gfn_to_rmap(sp->gfn, PT_PAGE_TABLE_LEVEL, slot); + rmap_head = __gfn_to_rmap(sp->gfn, PG_LEVEL_4K, slot); for_each_rmap_spte(rmap_head, &iter, sptep) { if (is_writable_pte(*sptep)) diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index a5078841bdac..b86346903f2e 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -397,9 +397,9 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr) __set_bit(pmc->idx, pmu->pmc_in_use); } -int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) +int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { - return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr, data); + return kvm_x86_ops.pmu_ops->get_msr(vcpu, msr_info); } int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index a6c78a797cb1..ab85eed8a6cc 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -32,7 +32,7 @@ struct kvm_pmu_ops { struct kvm_pmc *(*msr_idx_to_pmc)(struct kvm_vcpu *vcpu, u32 msr); int (*is_valid_rdpmc_ecx)(struct kvm_vcpu *vcpu, unsigned int idx); bool (*is_valid_msr)(struct kvm_vcpu *vcpu, u32 msr); - int (*get_msr)(struct kvm_vcpu *vcpu, u32 msr, u64 *data); + int (*get_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int (*set_msr)(struct kvm_vcpu *vcpu, struct msr_data *msr_info); void (*refresh)(struct kvm_vcpu *vcpu); void (*init)(struct kvm_vcpu *vcpu); @@ -147,7 +147,7 @@ void kvm_pmu_handle_event(struct kvm_vcpu *vcpu); int kvm_pmu_rdpmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data); int kvm_pmu_is_valid_rdpmc_ecx(struct kvm_vcpu *vcpu, unsigned int idx); bool kvm_pmu_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr); -int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data); +int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info); void kvm_pmu_refresh(struct kvm_vcpu *vcpu); void kvm_pmu_reset(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c index 9a2a62e5afeb..8a6db11dcb43 100644 --- a/arch/x86/kvm/svm/nested.c +++ b/arch/x86/kvm/svm/nested.c @@ -25,6 +25,8 @@ #include "trace.h" #include "mmu.h" #include "x86.h" +#include "cpuid.h" +#include "lapic.h" #include "svm.h" static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, @@ -59,7 +61,7 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) { struct vcpu_svm *svm = to_svm(vcpu); - u64 cr3 = svm->nested.nested_cr3; + u64 cr3 = svm->nested.ctl.nested_cr3; u64 pdpte; int ret; @@ -74,19 +76,22 @@ static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); - return svm->nested.nested_cr3; + return svm->nested.ctl.nested_cr3; } static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) { + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *hsave = svm->nested.hsave; + WARN_ON(mmu_is_nested(vcpu)); vcpu->arch.mmu = &vcpu->arch.guest_mmu; - kvm_init_shadow_mmu(vcpu); + kvm_init_shadow_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer); vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3; vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr; vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit; - vcpu->arch.mmu->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu); + vcpu->arch.mmu->shadow_root_level = vcpu->arch.tdp_level; reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu); vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; } @@ -99,8 +104,7 @@ static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) void recalc_intercepts(struct vcpu_svm *svm) { - struct vmcb_control_area *c, *h; - struct nested_state *g; + struct vmcb_control_area *c, *h, *g; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); @@ -109,14 +113,16 @@ void recalc_intercepts(struct vcpu_svm *svm) c = &svm->vmcb->control; h = &svm->nested.hsave->control; - g = &svm->nested; + g = &svm->nested.ctl; + + svm->nested.host_intercept_exceptions = h->intercept_exceptions; c->intercept_cr = h->intercept_cr; c->intercept_dr = h->intercept_dr; c->intercept_exceptions = h->intercept_exceptions; c->intercept = h->intercept; - if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { + if (g->int_ctl & V_INTR_MASKING_MASK) { /* We only want the cr8 intercept bits of L1 */ c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ); c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE); @@ -138,11 +144,9 @@ void recalc_intercepts(struct vcpu_svm *svm) c->intercept |= g->intercept; } -static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb) +static void copy_vmcb_control_area(struct vmcb_control_area *dst, + struct vmcb_control_area *from) { - struct vmcb_control_area *dst = &dst_vmcb->control; - struct vmcb_control_area *from = &from_vmcb->control; - dst->intercept_cr = from->intercept_cr; dst->intercept_dr = from->intercept_dr; dst->intercept_exceptions = from->intercept_exceptions; @@ -150,7 +154,7 @@ static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb dst->iopm_base_pa = from->iopm_base_pa; dst->msrpm_base_pa = from->msrpm_base_pa; dst->tsc_offset = from->tsc_offset; - dst->asid = from->asid; + /* asid not copied, it is handled manually for svm->vmcb. */ dst->tlb_ctl = from->tlb_ctl; dst->int_ctl = from->int_ctl; dst->int_vector = from->int_vector; @@ -179,7 +183,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) */ int i; - if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) + if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT))) return true; for (i = 0; i < MSRPM_OFFSETS; i++) { @@ -190,7 +194,7 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) break; p = msrpm_offsets[i]; - offset = svm->nested.vmcb_msrpm + (p * 4); + offset = svm->nested.ctl.msrpm_base_pa + (p * 4); if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4)) return false; @@ -203,41 +207,111 @@ static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) return true; } -static bool nested_vmcb_checks(struct vmcb *vmcb) +static bool nested_vmcb_check_controls(struct vmcb_control_area *control) { - if ((vmcb->save.efer & EFER_SVME) == 0) + if ((control->intercept & (1ULL << INTERCEPT_VMRUN)) == 0) return false; - if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0) + if (control->asid == 0) return false; - if (vmcb->control.asid == 0) - return false; - - if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && + if ((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled) return false; return true; } -void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, - struct vmcb *nested_vmcb, struct kvm_host_map *map) +static bool nested_vmcb_checks(struct vmcb *vmcb) { - bool evaluate_pending_interrupts = - is_intercept(svm, INTERCEPT_VINTR) || - is_intercept(svm, INTERCEPT_IRET); + if ((vmcb->save.efer & EFER_SVME) == 0) + return false; - if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) - svm->vcpu.arch.hflags |= HF_HIF_MASK; - else - svm->vcpu.arch.hflags &= ~HF_HIF_MASK; + if (((vmcb->save.cr0 & X86_CR0_CD) == 0) && + (vmcb->save.cr0 & X86_CR0_NW)) + return false; - if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) { - svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3; - nested_svm_init_mmu_context(&svm->vcpu); + return nested_vmcb_check_controls(&vmcb->control); +} + +static void load_nested_vmcb_control(struct vcpu_svm *svm, + struct vmcb_control_area *control) +{ + copy_vmcb_control_area(&svm->nested.ctl, control); + + /* Copy it here because nested_svm_check_controls will check it. */ + svm->nested.ctl.asid = control->asid; + svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL; + svm->nested.ctl.iopm_base_pa &= ~0x0fffULL; +} + +/* + * Synchronize fields that are written by the processor, so that + * they can be copied back into the nested_vmcb. + */ +void sync_nested_vmcb_control(struct vcpu_svm *svm) +{ + u32 mask; + svm->nested.ctl.event_inj = svm->vmcb->control.event_inj; + svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err; + + /* Only a few fields of int_ctl are written by the processor. */ + mask = V_IRQ_MASK | V_TPR_MASK; + if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) && + is_intercept(svm, SVM_EXIT_VINTR)) { + /* + * In order to request an interrupt window, L0 is usurping + * svm->vmcb->control.int_ctl and possibly setting V_IRQ + * even if it was clear in L1's VMCB. Restoring it would be + * wrong. However, in this case V_IRQ will remain true until + * interrupt_window_interception calls svm_clear_vintr and + * restores int_ctl. We can just leave it aside. + */ + mask &= ~V_IRQ_MASK; } + svm->nested.ctl.int_ctl &= ~mask; + svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask; +} + +/* + * Transfer any event that L0 or L1 wanted to inject into L2 to + * EXIT_INT_INFO. + */ +static void nested_vmcb_save_pending_event(struct vcpu_svm *svm, + struct vmcb *nested_vmcb) +{ + struct kvm_vcpu *vcpu = &svm->vcpu; + u32 exit_int_info = 0; + unsigned int nr; + + if (vcpu->arch.exception.injected) { + nr = vcpu->arch.exception.nr; + exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; + + if (vcpu->arch.exception.has_error_code) { + exit_int_info |= SVM_EVTINJ_VALID_ERR; + nested_vmcb->control.exit_int_info_err = + vcpu->arch.exception.error_code; + } + + } else if (vcpu->arch.nmi_injected) { + exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; + } else if (vcpu->arch.interrupt.injected) { + nr = vcpu->arch.interrupt.nr; + exit_int_info = nr | SVM_EVTINJ_VALID; + + if (vcpu->arch.interrupt.soft) + exit_int_info |= SVM_EVTINJ_TYPE_SOFT; + else + exit_int_info |= SVM_EVTINJ_TYPE_INTR; + } + + nested_vmcb->control.exit_int_info = exit_int_info; +} + +static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *nested_vmcb) +{ /* Load the nested guest state */ svm->vmcb->save.es = nested_vmcb->save.es; svm->vmcb->save.cs = nested_vmcb->save.cs; @@ -249,14 +323,7 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); - if (npt_enabled) { - svm->vmcb->save.cr3 = nested_vmcb->save.cr3; - svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; - } else - (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); - - /* Guest paging mode is active - reset mmu */ - kvm_mmu_reset_context(&svm->vcpu); + (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax); @@ -270,38 +337,34 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, svm->vmcb->save.dr7 = nested_vmcb->save.dr7; svm->vcpu.arch.dr6 = nested_vmcb->save.dr6; svm->vmcb->save.cpl = nested_vmcb->save.cpl; +} - svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; - svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; +static void nested_prepare_vmcb_control(struct vcpu_svm *svm) +{ + const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK; + if (svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) + nested_svm_init_mmu_context(&svm->vcpu); - /* cache intercepts */ - svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; - svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; - svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; - svm->nested.intercept = nested_vmcb->control.intercept; + /* Guest paging mode is active - reset mmu */ + kvm_mmu_reset_context(&svm->vcpu); - svm_flush_tlb(&svm->vcpu, true); - svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; - if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) - svm->vcpu.arch.hflags |= HF_VINTR_MASK; - else - svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; + svm_flush_tlb(&svm->vcpu); - svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset; - svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset; + svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset = + svm->vcpu.arch.l1_tsc_offset + svm->nested.ctl.tsc_offset; - svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; - svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; - svm->vmcb->control.int_state = nested_vmcb->control.int_state; - svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; - svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; + svm->vmcb->control.int_ctl = + (svm->nested.ctl.int_ctl & ~mask) | + (svm->nested.hsave->control.int_ctl & mask); - svm->vmcb->control.pause_filter_count = - nested_vmcb->control.pause_filter_count; - svm->vmcb->control.pause_filter_thresh = - nested_vmcb->control.pause_filter_thresh; + svm->vmcb->control.virt_ext = svm->nested.ctl.virt_ext; + svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; + svm->vmcb->control.int_state = svm->nested.ctl.int_state; + svm->vmcb->control.event_inj = svm->nested.ctl.event_inj; + svm->vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err; - kvm_vcpu_unmap(&svm->vcpu, map, true); + svm->vmcb->control.pause_filter_count = svm->nested.ctl.pause_filter_count; + svm->vmcb->control.pause_filter_thresh = svm->nested.ctl.pause_filter_thresh; /* Enter Guest-Mode */ enter_guest_mode(&svm->vcpu); @@ -312,25 +375,18 @@ void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, */ recalc_intercepts(svm); - svm->nested.vmcb = vmcb_gpa; + mark_all_dirty(svm->vmcb); +} - /* - * If L1 had a pending IRQ/NMI before executing VMRUN, - * which wasn't delivered because it was disallowed (e.g. - * interrupts disabled), L0 needs to evaluate if this pending - * event should cause an exit from L2 to L1 or be delivered - * directly to L2. - * - * Usually this would be handled by the processor noticing an - * IRQ/NMI window request. However, VMRUN can unblock interrupts - * by implicitly setting GIF, so force L0 to perform pending event - * evaluation by requesting a KVM_REQ_EVENT. - */ - enable_gif(svm); - if (unlikely(evaluate_pending_interrupts)) - kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); +void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, + struct vmcb *nested_vmcb) +{ + svm->nested.vmcb = vmcb_gpa; + load_nested_vmcb_control(svm, &nested_vmcb->control); + nested_prepare_vmcb_save(svm, nested_vmcb); + nested_prepare_vmcb_control(svm); - mark_all_dirty(svm->vmcb); + svm_set_gif(svm, true); } int nested_svm_vmrun(struct vcpu_svm *svm) @@ -342,8 +398,12 @@ int nested_svm_vmrun(struct vcpu_svm *svm) struct kvm_host_map map; u64 vmcb_gpa; - vmcb_gpa = svm->vmcb->save.rax; + if (is_smm(&svm->vcpu)) { + kvm_queue_exception(&svm->vcpu, UD_VECTOR); + return 1; + } + vmcb_gpa = svm->vmcb->save.rax; ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map); if (ret == -EINVAL) { kvm_inject_gp(&svm->vcpu, 0); @@ -361,10 +421,7 @@ int nested_svm_vmrun(struct vcpu_svm *svm) nested_vmcb->control.exit_code_hi = 0; nested_vmcb->control.exit_info_1 = 0; nested_vmcb->control.exit_info_2 = 0; - - kvm_vcpu_unmap(&svm->vcpu, &map, true); - - return ret; + goto out; } trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa, @@ -404,9 +461,10 @@ int nested_svm_vmrun(struct vcpu_svm *svm) else hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); - copy_vmcb_control_area(hsave, vmcb); + copy_vmcb_control_area(&hsave->control, &vmcb->control); - enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map); + svm->nested.nested_run_pending = 1; + enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb); if (!nested_svm_vmrun_msrpm(svm)) { svm->vmcb->control.exit_code = SVM_EXIT_ERR; @@ -417,6 +475,9 @@ int nested_svm_vmrun(struct vcpu_svm *svm) nested_svm_vmexit(svm); } +out: + kvm_vcpu_unmap(&svm->vcpu, &map, true); + return ret; } @@ -444,13 +505,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm) struct vmcb *vmcb = svm->vmcb; struct kvm_host_map map; - trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, - vmcb->control.exit_info_1, - vmcb->control.exit_info_2, - vmcb->control.exit_int_info, - vmcb->control.exit_int_info_err, - KVM_ISA_SVM); - rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map); if (rc) { if (rc == -EINVAL) @@ -463,9 +517,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm) /* Exit Guest-Mode */ leave_guest_mode(&svm->vcpu); svm->nested.vmcb = 0; + WARN_ON_ONCE(svm->nested.nested_run_pending); + + /* in case we halted in L2 */ + svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; /* Give the current vmcb to the guest */ - disable_gif(svm); + svm_set_gif(svm, false); nested_vmcb->save.es = vmcb->save.es; nested_vmcb->save.cs = vmcb->save.cs; @@ -479,62 +537,42 @@ int nested_svm_vmexit(struct vcpu_svm *svm) nested_vmcb->save.cr2 = vmcb->save.cr2; nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); - nested_vmcb->save.rip = vmcb->save.rip; - nested_vmcb->save.rsp = vmcb->save.rsp; - nested_vmcb->save.rax = vmcb->save.rax; + nested_vmcb->save.rip = kvm_rip_read(&svm->vcpu); + nested_vmcb->save.rsp = kvm_rsp_read(&svm->vcpu); + nested_vmcb->save.rax = kvm_rax_read(&svm->vcpu); nested_vmcb->save.dr7 = vmcb->save.dr7; nested_vmcb->save.dr6 = svm->vcpu.arch.dr6; nested_vmcb->save.cpl = vmcb->save.cpl; - nested_vmcb->control.int_ctl = vmcb->control.int_ctl; - nested_vmcb->control.int_vector = vmcb->control.int_vector; nested_vmcb->control.int_state = vmcb->control.int_state; nested_vmcb->control.exit_code = vmcb->control.exit_code; nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1; nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; - nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; - nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; + + if (nested_vmcb->control.exit_code != SVM_EXIT_ERR) + nested_vmcb_save_pending_event(svm, nested_vmcb); if (svm->nrips_enabled) nested_vmcb->control.next_rip = vmcb->control.next_rip; - /* - * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have - * to make sure that we do not lose injected events. So check event_inj - * here and copy it to exit_int_info if it is valid. - * Exit_int_info and event_inj can't be both valid because the case - * below only happens on a VMRUN instruction intercept which has - * no valid exit_int_info set. - */ - if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { - struct vmcb_control_area *nc = &nested_vmcb->control; - - nc->exit_int_info = vmcb->control.event_inj; - nc->exit_int_info_err = vmcb->control.event_inj_err; - } - - nested_vmcb->control.tlb_ctl = 0; - nested_vmcb->control.event_inj = 0; - nested_vmcb->control.event_inj_err = 0; + nested_vmcb->control.int_ctl = svm->nested.ctl.int_ctl; + nested_vmcb->control.tlb_ctl = svm->nested.ctl.tlb_ctl; + nested_vmcb->control.event_inj = svm->nested.ctl.event_inj; + nested_vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err; nested_vmcb->control.pause_filter_count = svm->vmcb->control.pause_filter_count; nested_vmcb->control.pause_filter_thresh = svm->vmcb->control.pause_filter_thresh; - /* We always set V_INTR_MASKING and remember the old value in hflags */ - if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) - nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; - /* Restore the original control entries */ - copy_vmcb_control_area(vmcb, hsave); + copy_vmcb_control_area(&vmcb->control, &hsave->control); - svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset; - kvm_clear_exception_queue(&svm->vcpu); - kvm_clear_interrupt_queue(&svm->vcpu); + svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset = + svm->vcpu.arch.l1_tsc_offset; - svm->nested.nested_cr3 = 0; + svm->nested.ctl.nested_cr3 = 0; /* Restore selected save entries */ svm->vmcb->save.es = hsave->save.es; @@ -562,6 +600,13 @@ int nested_svm_vmexit(struct vcpu_svm *svm) mark_all_dirty(svm->vmcb); + trace_kvm_nested_vmexit_inject(nested_vmcb->control.exit_code, + nested_vmcb->control.exit_info_1, + nested_vmcb->control.exit_info_2, + nested_vmcb->control.exit_int_info, + nested_vmcb->control.exit_int_info_err, + KVM_ISA_SVM); + kvm_vcpu_unmap(&svm->vcpu, &map, true); nested_svm_uninit_mmu_context(&svm->vcpu); @@ -579,12 +624,28 @@ int nested_svm_vmexit(struct vcpu_svm *svm) return 0; } +/* + * Forcibly leave nested mode in order to be able to reset the VCPU later on. + */ +void svm_leave_nested(struct vcpu_svm *svm) +{ + if (is_guest_mode(&svm->vcpu)) { + struct vmcb *hsave = svm->nested.hsave; + struct vmcb *vmcb = svm->vmcb; + + svm->nested.nested_run_pending = 0; + leave_guest_mode(&svm->vcpu); + copy_vmcb_control_area(&vmcb->control, &hsave->control); + nested_svm_uninit_mmu_context(&svm->vcpu); + } +} + static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) { u32 offset, msr, value; int write, mask; - if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) + if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_MSR_PROT))) return NESTED_EXIT_HOST; msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; @@ -598,56 +659,12 @@ static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) /* Offset is in 32 bit units but need in 8 bit units */ offset *= 4; - if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4)) + if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4)) return NESTED_EXIT_DONE; return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; } -/* DB exceptions for our internal use must not cause vmexit */ -static int nested_svm_intercept_db(struct vcpu_svm *svm) -{ - unsigned long dr6 = svm->vmcb->save.dr6; - - /* Always catch it and pass it to userspace if debugging. */ - if (svm->vcpu.guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return NESTED_EXIT_HOST; - - /* if we're not singlestepping, it's not ours */ - if (!svm->nmi_singlestep) - goto reflected_db; - - /* if it's not a singlestep exception, it's not ours */ - if (!(dr6 & DR6_BS)) - goto reflected_db; - - /* if the guest is singlestepping, it should get the vmexit */ - if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) { - disable_nmi_singlestep(svm); - goto reflected_db; - } - - /* it's ours, the nested hypervisor must not see this one */ - return NESTED_EXIT_HOST; - -reflected_db: - /* - * Synchronize guest DR6 here just like in kvm_deliver_exception_payload; - * it will be moved into the nested VMCB by nested_svm_vmexit. Once - * exceptions will be moved to svm_check_nested_events, all this stuff - * will just go away and we could just return NESTED_EXIT_HOST - * unconditionally. db_interception will queue the exception, which - * will be processed by svm_check_nested_events if a nested vmexit is - * required, and we will just use kvm_deliver_exception_payload to copy - * the payload to DR6 before vmexit. - */ - WARN_ON(svm->vcpu.arch.switch_db_regs & KVM_DEBUGREG_WONT_EXIT); - svm->vcpu.arch.dr6 &= ~(DR_TRAP_BITS | DR6_RTM); - svm->vcpu.arch.dr6 |= dr6 & ~DR6_FIXED_1; - return NESTED_EXIT_DONE; -} - static int nested_svm_intercept_ioio(struct vcpu_svm *svm) { unsigned port, size, iopm_len; @@ -655,13 +672,13 @@ static int nested_svm_intercept_ioio(struct vcpu_svm *svm) u8 start_bit; u64 gpa; - if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) + if (!(svm->nested.ctl.intercept & (1ULL << INTERCEPT_IOIO_PROT))) return NESTED_EXIT_HOST; port = svm->vmcb->control.exit_info_1 >> 16; size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; - gpa = svm->nested.vmcb_iopm + (port / 8); + gpa = svm->nested.ctl.iopm_base_pa + (port / 8); start_bit = port % 8; iopm_len = (start_bit + size > 8) ? 2 : 1; mask = (0xf >> (4 - size)) << start_bit; @@ -687,31 +704,23 @@ static int nested_svm_intercept(struct vcpu_svm *svm) break; case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); - if (svm->nested.intercept_cr & bit) + if (svm->nested.ctl.intercept_cr & bit) vmexit = NESTED_EXIT_DONE; break; } case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); - if (svm->nested.intercept_dr & bit) + if (svm->nested.ctl.intercept_dr & bit) vmexit = NESTED_EXIT_DONE; break; } case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { - u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); - if (svm->nested.intercept_exceptions & excp_bits) { - if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR) - vmexit = nested_svm_intercept_db(svm); - else if (exit_code == SVM_EXIT_EXCP_BASE + BP_VECTOR && - svm->vcpu.guest_debug & KVM_GUESTDBG_USE_SW_BP) - vmexit = NESTED_EXIT_HOST; - else - vmexit = NESTED_EXIT_DONE; - } - /* async page fault always cause vmexit */ - else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && - svm->vcpu.arch.exception.nested_apf != 0) - vmexit = NESTED_EXIT_DONE; + /* + * Host-intercepted exceptions have been checked already in + * nested_svm_exit_special. There is nothing to do here, + * the vmexit is injected by svm_check_nested_events. + */ + vmexit = NESTED_EXIT_DONE; break; } case SVM_EXIT_ERR: { @@ -720,7 +729,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm) } default: { u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); - if (svm->nested.intercept & exit_bits) + if (svm->nested.ctl.intercept & exit_bits) vmexit = NESTED_EXIT_DONE; } } @@ -756,62 +765,140 @@ int nested_svm_check_permissions(struct vcpu_svm *svm) return 0; } -int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, - bool has_error_code, u32 error_code) +static bool nested_exit_on_exception(struct vcpu_svm *svm) { - int vmexit; + unsigned int nr = svm->vcpu.arch.exception.nr; - if (!is_guest_mode(&svm->vcpu)) - return 0; + return (svm->nested.ctl.intercept_exceptions & (1 << nr)); +} - vmexit = nested_svm_intercept(svm); - if (vmexit != NESTED_EXIT_DONE) - return 0; +static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm) +{ + unsigned int nr = svm->vcpu.arch.exception.nr; svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; svm->vmcb->control.exit_code_hi = 0; - svm->vmcb->control.exit_info_1 = error_code; + + if (svm->vcpu.arch.exception.has_error_code) + svm->vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code; /* * EXITINFO2 is undefined for all exception intercepts other * than #PF. */ - if (svm->vcpu.arch.exception.nested_apf) - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; - else if (svm->vcpu.arch.exception.has_payload) - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; - else - svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + if (nr == PF_VECTOR) { + if (svm->vcpu.arch.exception.nested_apf) + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; + else if (svm->vcpu.arch.exception.has_payload) + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; + else + svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; + } else if (nr == DB_VECTOR) { + /* See inject_pending_event. */ + kvm_deliver_exception_payload(&svm->vcpu); + if (svm->vcpu.arch.dr7 & DR7_GD) { + svm->vcpu.arch.dr7 &= ~DR7_GD; + kvm_update_dr7(&svm->vcpu); + } + } else + WARN_ON(svm->vcpu.arch.exception.has_payload); - svm->nested.exit_required = true; - return vmexit; + nested_svm_vmexit(svm); +} + +static void nested_svm_smi(struct vcpu_svm *svm) +{ + svm->vmcb->control.exit_code = SVM_EXIT_SMI; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; + + nested_svm_vmexit(svm); +} + +static void nested_svm_nmi(struct vcpu_svm *svm) +{ + svm->vmcb->control.exit_code = SVM_EXIT_NMI; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; + + nested_svm_vmexit(svm); } static void nested_svm_intr(struct vcpu_svm *svm) { + trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); + svm->vmcb->control.exit_code = SVM_EXIT_INTR; svm->vmcb->control.exit_info_1 = 0; svm->vmcb->control.exit_info_2 = 0; - /* nested_svm_vmexit this gets called afterwards from handle_exit */ - svm->nested.exit_required = true; - trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); + nested_svm_vmexit(svm); } -static bool nested_exit_on_intr(struct vcpu_svm *svm) +static inline bool nested_exit_on_init(struct vcpu_svm *svm) { - return (svm->nested.intercept & 1ULL); + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INIT)); +} + +static void nested_svm_init(struct vcpu_svm *svm) +{ + svm->vmcb->control.exit_code = SVM_EXIT_INIT; + svm->vmcb->control.exit_info_1 = 0; + svm->vmcb->control.exit_info_2 = 0; + + nested_svm_vmexit(svm); } -int svm_check_nested_events(struct kvm_vcpu *vcpu) + +static int svm_check_nested_events(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); bool block_nested_events = - kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required; + kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending; + struct kvm_lapic *apic = vcpu->arch.apic; + + if (lapic_in_kernel(vcpu) && + test_bit(KVM_APIC_INIT, &apic->pending_events)) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_init(svm)) + return 0; + nested_svm_init(svm); + return 0; + } + + if (vcpu->arch.exception.pending) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_exception(svm)) + return 0; + nested_svm_inject_exception_vmexit(svm); + return 0; + } + + if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_smi(svm)) + return 0; + nested_svm_smi(svm); + return 0; + } + + if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) { + if (block_nested_events) + return -EBUSY; + if (!nested_exit_on_nmi(svm)) + return 0; + nested_svm_nmi(svm); + return 0; + } - if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) { + if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) { if (block_nested_events) return -EBUSY; + if (!nested_exit_on_intr(svm)) + return 0; nested_svm_intr(svm); return 0; } @@ -826,21 +913,170 @@ int nested_svm_exit_special(struct vcpu_svm *svm) switch (exit_code) { case SVM_EXIT_INTR: case SVM_EXIT_NMI: - case SVM_EXIT_EXCP_BASE + MC_VECTOR: - return NESTED_EXIT_HOST; case SVM_EXIT_NPF: - /* For now we are always handling NPFs when using them */ - if (npt_enabled) + return NESTED_EXIT_HOST; + case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { + u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); + + if (get_host_vmcb(svm)->control.intercept_exceptions & excp_bits) return NESTED_EXIT_HOST; - break; - case SVM_EXIT_EXCP_BASE + PF_VECTOR: - /* When we're shadowing, trap PFs, but not async PF */ - if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0) + else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR && + svm->vcpu.arch.apf.host_apf_flags) + /* Trap async PF even if not shadowing */ return NESTED_EXIT_HOST; break; + } default: break; } return NESTED_EXIT_CONTINUE; } + +static int svm_get_nested_state(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + u32 user_data_size) +{ + struct vcpu_svm *svm; + struct kvm_nested_state kvm_state = { + .flags = 0, + .format = KVM_STATE_NESTED_FORMAT_SVM, + .size = sizeof(kvm_state), + }; + struct vmcb __user *user_vmcb = (struct vmcb __user *) + &user_kvm_nested_state->data.svm[0]; + + if (!vcpu) + return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE; + + svm = to_svm(vcpu); + + if (user_data_size < kvm_state.size) + goto out; + + /* First fill in the header and copy it out. */ + if (is_guest_mode(vcpu)) { + kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb; + kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE; + kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; + + if (svm->nested.nested_run_pending) + kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; + } + + if (gif_set(svm)) + kvm_state.flags |= KVM_STATE_NESTED_GIF_SET; + + if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state))) + return -EFAULT; + + if (!is_guest_mode(vcpu)) + goto out; + + /* + * Copy over the full size of the VMCB rather than just the size + * of the structs. + */ + if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE)) + return -EFAULT; + if (copy_to_user(&user_vmcb->control, &svm->nested.ctl, + sizeof(user_vmcb->control))) + return -EFAULT; + if (copy_to_user(&user_vmcb->save, &svm->nested.hsave->save, + sizeof(user_vmcb->save))) + return -EFAULT; + +out: + return kvm_state.size; +} + +static int svm_set_nested_state(struct kvm_vcpu *vcpu, + struct kvm_nested_state __user *user_kvm_nested_state, + struct kvm_nested_state *kvm_state) +{ + struct vcpu_svm *svm = to_svm(vcpu); + struct vmcb *hsave = svm->nested.hsave; + struct vmcb __user *user_vmcb = (struct vmcb __user *) + &user_kvm_nested_state->data.svm[0]; + struct vmcb_control_area ctl; + struct vmcb_save_area save; + u32 cr0; + + if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM) + return -EINVAL; + + if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE | + KVM_STATE_NESTED_RUN_PENDING | + KVM_STATE_NESTED_GIF_SET)) + return -EINVAL; + + /* + * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's + * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed. + */ + if (!(vcpu->arch.efer & EFER_SVME)) { + /* GIF=1 and no guest mode are required if SVME=0. */ + if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET) + return -EINVAL; + } + + /* SMM temporarily disables SVM, so we cannot be in guest mode. */ + if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) + return -EINVAL; + + if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { + svm_leave_nested(svm); + goto out_set_gif; + } + + if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa)) + return -EINVAL; + if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE) + return -EINVAL; + if (copy_from_user(&ctl, &user_vmcb->control, sizeof(ctl))) + return -EFAULT; + if (copy_from_user(&save, &user_vmcb->save, sizeof(save))) + return -EFAULT; + + if (!nested_vmcb_check_controls(&ctl)) + return -EINVAL; + + /* + * Processor state contains L2 state. Check that it is + * valid for guest mode (see nested_vmcb_checks). + */ + cr0 = kvm_read_cr0(vcpu); + if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW)) + return -EINVAL; + + /* + * Validate host state saved from before VMRUN (see + * nested_svm_check_permissions). + * TODO: validate reserved bits for all saved state. + */ + if (!(save.cr0 & X86_CR0_PG)) + return -EINVAL; + + /* + * All checks done, we can enter guest mode. L1 control fields + * come from the nested save state. Guest state is already + * in the registers, the save area of the nested state instead + * contains saved L1 state. + */ + copy_vmcb_control_area(&hsave->control, &svm->vmcb->control); + hsave->save = save; + + svm->nested.vmcb = kvm_state->hdr.svm.vmcb_pa; + load_nested_vmcb_control(svm, &ctl); + nested_prepare_vmcb_control(svm); + +out_set_gif: + svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); + return 0; +} + +struct kvm_x86_nested_ops svm_nested_ops = { + .check_events = svm_check_nested_events, + .get_state = svm_get_nested_state, + .set_state = svm_set_nested_state, +}; diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c index ce0b10fe5e2b..035da07500e8 100644 --- a/arch/x86/kvm/svm/pmu.c +++ b/arch/x86/kvm/svm/pmu.c @@ -215,21 +215,22 @@ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) return pmc; } -static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) +static int amd_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; + u32 msr = msr_info->index; /* MSR_PERFCTRn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_COUNTER); if (pmc) { - *data = pmc_read_counter(pmc); + msr_info->data = pmc_read_counter(pmc); return 0; } /* MSR_EVNTSELn */ pmc = get_gp_pmc_amd(pmu, msr, PMU_TYPE_EVNTSEL); if (pmc) { - *data = pmc->eventsel; + msr_info->data = pmc->eventsel; return 0; } diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 89f7f3aebd31..5573a97f1520 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -336,8 +336,7 @@ static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr, /* Avoid using vmalloc for smaller buffers. */ size = npages * sizeof(struct page *); if (size > PAGE_SIZE) - pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO, - PAGE_KERNEL); + pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO); else pages = kmalloc(size, GFP_KERNEL_ACCOUNT); diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index a862c768fd54..9e333b91ff78 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -33,6 +33,7 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> +#include <asm/mce.h> #include <asm/spec-ctrl.h> #include <asm/cpu_device_id.h> @@ -264,6 +265,7 @@ static int get_npt_level(struct kvm_vcpu *vcpu) void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { + struct vcpu_svm *svm = to_svm(vcpu); vcpu->arch.efer = efer; if (!npt_enabled) { @@ -274,8 +276,13 @@ void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) efer &= ~EFER_LME; } - to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; - mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); + if (!(efer & EFER_SVME)) { + svm_leave_nested(svm); + svm_set_gif(svm, true); + } + + svm->vmcb->save.efer = efer | EFER_SVME; + mark_dirty(svm->vmcb, VMCB_CR); } static int is_external_interrupt(u32 info) @@ -318,9 +325,6 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) return 0; } else { - if (svm->next_rip - kvm_rip_read(vcpu) > MAX_INST_SIZE) - pr_err("%s: ip 0x%lx next 0x%llx\n", - __func__, kvm_rip_read(vcpu), svm->next_rip); kvm_rip_write(vcpu, svm->next_rip); } svm_set_interrupt_shadow(vcpu, 0); @@ -333,17 +337,8 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); unsigned nr = vcpu->arch.exception.nr; bool has_error_code = vcpu->arch.exception.has_error_code; - bool reinject = vcpu->arch.exception.injected; u32 error_code = vcpu->arch.exception.error_code; - /* - * If we are within a nested VM we'd better #VMEXIT and let the guest - * handle the exception - */ - if (!reinject && - nested_svm_check_exception(svm, nr, has_error_code, error_code)) - return; - kvm_deliver_exception_payload(&svm->vcpu); if (nr == BP_VECTOR && !nrips) { @@ -780,7 +775,7 @@ static __init void svm_adjust_mmio_mask(void) */ mask = (mask_bit < 52) ? rsvd_bits(mask_bit, 51) | PT_PRESENT_MASK : 0; - kvm_mmu_set_mmio_spte_mask(mask, mask, PT_WRITABLE_MASK | PT_USER_MASK); + kvm_mmu_set_mmio_spte_mask(mask, PT_WRITABLE_MASK | PT_USER_MASK); } static void svm_hardware_teardown(void) @@ -890,7 +885,7 @@ static __init int svm_hardware_setup(void) if (npt_enabled && !npt) npt_enabled = false; - kvm_configure_mmu(npt_enabled, PT_PDPE_LEVEL); + kvm_configure_mmu(npt_enabled, PG_LEVEL_1G); pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis"); if (nrips) { @@ -953,16 +948,6 @@ static void init_sys_seg(struct vmcb_seg *seg, uint32_t type) seg->base = 0; } -static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - if (is_guest_mode(vcpu)) - return svm->nested.hsave->control.tsc_offset; - - return vcpu->arch.tsc_offset; -} - static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); @@ -1208,6 +1193,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) svm->avic_is_running = true; svm->nested.hsave = page_address(hsave_page); + clear_page(svm->nested.hsave); svm->msrpm = page_address(msrpm_pages); svm_vcpu_init_msrpm(svm->msrpm); @@ -1364,12 +1350,13 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) } } -static inline void svm_enable_vintr(struct vcpu_svm *svm) +static void svm_set_vintr(struct vcpu_svm *svm) { struct vmcb_control_area *control; /* The following fields are ignored when AVIC is enabled */ WARN_ON(kvm_vcpu_apicv_active(&svm->vcpu)); + set_intercept(svm, INTERCEPT_VINTR); /* * This is just a dummy VINTR to actually cause a vmexit to happen. @@ -1383,18 +1370,19 @@ static inline void svm_enable_vintr(struct vcpu_svm *svm) mark_dirty(svm->vmcb, VMCB_INTR); } -static void svm_set_vintr(struct vcpu_svm *svm) -{ - set_intercept(svm, INTERCEPT_VINTR); - if (is_intercept(svm, INTERCEPT_VINTR)) - svm_enable_vintr(svm); -} - static void svm_clear_vintr(struct vcpu_svm *svm) { + const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK; clr_intercept(svm, INTERCEPT_VINTR); - svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; + /* Drop int_ctl fields related to VINTR injection. */ + svm->vmcb->control.int_ctl &= mask; + if (is_guest_mode(&svm->vcpu)) { + WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) != + (svm->nested.ctl.int_ctl & V_TPR_MASK)); + svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask; + } + mark_dirty(svm->vmcb, VMCB_INTR); } @@ -1533,14 +1521,6 @@ static void svm_set_gdt(struct kvm_vcpu *vcpu, struct desc_ptr *dt) mark_dirty(svm->vmcb, VMCB_DT); } -static void svm_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) -{ -} - -static void svm_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) -{ -} - static void update_cr0_intercept(struct vcpu_svm *svm) { ulong gcr0 = svm->vcpu.arch.cr0; @@ -1603,7 +1583,7 @@ int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; if (npt_enabled && ((old_cr4 ^ cr4) & X86_CR4_PGE)) - svm_flush_tlb(vcpu, true); + svm_flush_tlb(vcpu); vcpu->arch.cr4 = cr4; if (!npt_enabled) @@ -1842,6 +1822,25 @@ static bool is_erratum_383(void) return true; } +/* + * Trigger machine check on the host. We assume all the MSRs are already set up + * by the CPU and that we still run on the same CPU as the MCE occurred on. + * We pass a fake environment to the machine check handler because we want + * the guest to be always treated like user space, no matter what context + * it used internally. + */ +static void kvm_machine_check(void) +{ +#if defined(CONFIG_X86_MCE) + struct pt_regs regs = { + .cs = 3, /* Fake ring 3 no matter what the guest ran on */ + .flags = X86_EFLAGS_IF, + }; + + do_machine_check(®s, 0); +#endif +} + static void svm_handle_mce(struct vcpu_svm *svm) { if (is_erratum_383()) { @@ -1860,11 +1859,7 @@ static void svm_handle_mce(struct vcpu_svm *svm) * On an #MC intercept the MCE handler is not called automatically in * the host. So do it by hand here. */ - asm volatile ( - "int $0x12\n"); - /* not sure if we ever come back to this point */ - - return; + kvm_machine_check(); } static int mc_interception(struct vcpu_svm *svm) @@ -1993,6 +1988,38 @@ static int vmrun_interception(struct vcpu_svm *svm) return nested_svm_vmrun(svm); } +void svm_set_gif(struct vcpu_svm *svm, bool value) +{ + if (value) { + /* + * If VGIF is enabled, the STGI intercept is only added to + * detect the opening of the SMI/NMI window; remove it now. + * Likewise, clear the VINTR intercept, we will set it + * again while processing KVM_REQ_EVENT if needed. + */ + if (vgif_enabled(svm)) + clr_intercept(svm, INTERCEPT_STGI); + if (is_intercept(svm, SVM_EXIT_VINTR)) + svm_clear_vintr(svm); + + enable_gif(svm); + if (svm->vcpu.arch.smi_pending || + svm->vcpu.arch.nmi_pending || + kvm_cpu_has_injectable_intr(&svm->vcpu)) + kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); + } else { + disable_gif(svm); + + /* + * After a CLGI no interrupts should come. But if vGIF is + * in use, we still rely on the VINTR intercept (rather than + * STGI) to detect an open interrupt window. + */ + if (!vgif_enabled(svm)) + svm_clear_vintr(svm); + } +} + static int stgi_interception(struct vcpu_svm *svm) { int ret; @@ -2000,18 +2027,8 @@ static int stgi_interception(struct vcpu_svm *svm) if (nested_svm_check_permissions(svm)) return 1; - /* - * If VGIF is enabled, the STGI intercept is only added to - * detect the opening of the SMI/NMI window; remove it now. - */ - if (vgif_enabled(svm)) - clr_intercept(svm, INTERCEPT_STGI); - ret = kvm_skip_emulated_instruction(&svm->vcpu); - kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); - - enable_gif(svm); - + svm_set_gif(svm, true); return ret; } @@ -2023,13 +2040,7 @@ static int clgi_interception(struct vcpu_svm *svm) return 1; ret = kvm_skip_emulated_instruction(&svm->vcpu); - - disable_gif(svm); - - /* After a CLGI no interrupts should come */ - if (!kvm_vcpu_apicv_active(&svm->vcpu)) - svm_clear_vintr(svm); - + svm_set_gif(svm, false); return ret; } @@ -2193,7 +2204,7 @@ static bool check_selective_cr0_intercepted(struct vcpu_svm *svm, bool ret = false; u64 intercept; - intercept = svm->nested.intercept; + intercept = svm->nested.ctl.intercept; if (!is_guest_mode(&svm->vcpu) || (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0)))) @@ -2671,8 +2682,6 @@ static int interrupt_window_interception(struct vcpu_svm *svm) */ svm_toggle_avic_for_irq_window(&svm->vcpu, true); - svm->vmcb->control.int_ctl &= ~V_IRQ_MASK; - mark_dirty(svm->vmcb, VMCB_INTR); ++svm->vcpu.stat.irq_window_exits; return 1; } @@ -2898,8 +2907,7 @@ static void svm_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) *info2 = control->exit_info_2; } -static int handle_exit(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion exit_fastpath) +static int handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_svm *svm = to_svm(vcpu); struct kvm_run *kvm_run = vcpu->run; @@ -2912,12 +2920,7 @@ static int handle_exit(struct kvm_vcpu *vcpu, if (npt_enabled) vcpu->arch.cr3 = svm->vmcb->save.cr3; - if (unlikely(svm->nested.exit_required)) { - nested_svm_vmexit(svm); - svm->nested.exit_required = false; - - return 1; - } + svm_complete_interrupts(svm); if (is_guest_mode(vcpu)) { int vmexit; @@ -2938,8 +2941,6 @@ static int handle_exit(struct kvm_vcpu *vcpu, return 1; } - svm_complete_interrupts(svm); - if (svm->vmcb->control.exit_code == SVM_EXIT_ERR) { kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY; kvm_run->fail_entry.hardware_entry_failure_reason @@ -2957,10 +2958,10 @@ static int handle_exit(struct kvm_vcpu *vcpu, __func__, svm->vmcb->control.exit_int_info, exit_code); - if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { - kvm_skip_emulated_instruction(vcpu); + if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; - } else if (exit_code >= ARRAY_SIZE(svm_exit_handlers) + + if (exit_code >= ARRAY_SIZE(svm_exit_handlers) || !svm_exit_handlers[exit_code]) { vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%x\n", exit_code); dump_vmcb(vcpu); @@ -3049,18 +3050,37 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr) set_cr_intercept(svm, INTERCEPT_CR8_WRITE); } -static int svm_nmi_allowed(struct kvm_vcpu *vcpu) +bool svm_nmi_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - int ret; - ret = !(vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) && - !(svm->vcpu.arch.hflags & HF_NMI_MASK); - ret = ret && gif_set(svm) && nested_svm_nmi(svm); + bool ret; + + if (!gif_set(svm)) + return true; + + if (is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) + return false; + + ret = (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK) || + (svm->vcpu.arch.hflags & HF_NMI_MASK); return ret; } +static int svm_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (svm->nested.nested_run_pending) + return -EBUSY; + + /* An NMI must not be injected into L2 if it's supposed to VM-Exit. */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(svm)) + return -EBUSY; + + return !svm_nmi_blocked(vcpu); +} + static bool svm_get_nmi_mask(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3081,19 +3101,46 @@ static void svm_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) } } -static int svm_interrupt_allowed(struct kvm_vcpu *vcpu) +bool svm_interrupt_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); struct vmcb *vmcb = svm->vmcb; - if (!gif_set(svm) || - (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK)) - return 0; + if (!gif_set(svm)) + return true; - if (is_guest_mode(vcpu) && (svm->vcpu.arch.hflags & HF_VINTR_MASK)) - return !!(svm->vcpu.arch.hflags & HF_HIF_MASK); - else - return !!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF); + if (is_guest_mode(vcpu)) { + /* As long as interrupts are being delivered... */ + if ((svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) + ? !(svm->nested.hsave->save.rflags & X86_EFLAGS_IF) + : !(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) + return true; + + /* ... vmexits aren't blocked by the interrupt shadow */ + if (nested_exit_on_intr(svm)) + return false; + } else { + if (!(kvm_get_rflags(vcpu) & X86_EFLAGS_IF)) + return true; + } + + return (vmcb->control.int_state & SVM_INTERRUPT_SHADOW_MASK); +} + +static int svm_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (svm->nested.nested_run_pending) + return -EBUSY; + + /* + * An IRQ must not be injected into L2 if it's supposed to VM-Exit, + * e.g. if the IRQ arrived asynchronously after checking nested events. + */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(svm)) + return -EBUSY; + + return !svm_interrupt_blocked(vcpu); } static void enable_irq_window(struct kvm_vcpu *vcpu) @@ -3134,9 +3181,6 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu) return; /* STGI will cause a vm exit */ } - if (svm->nested.exit_required) - return; /* we're not going to run the guest yet */ - /* * Something prevents NMI from been injected. Single step over possible * problem (IRET or exception injection or interrupt shadow) @@ -3156,10 +3200,17 @@ static int svm_set_identity_map_addr(struct kvm *kvm, u64 ident_addr) return 0; } -void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) +void svm_flush_tlb(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * Flush only the current ASID even if the TLB flush was invoked via + * kvm_flush_remote_tlbs(). Although flushing remote TLBs requires all + * ASIDs to be flushed, KVM uses a single ASID for L1 and L2, and + * unconditionally does a TLB flush on both nested VM-Enter and nested + * VM-Exit (via kvm_mmu_reset_context()). + */ if (static_cpu_has(X86_FEATURE_FLUSHBYASID)) svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID; else @@ -3279,10 +3330,21 @@ static void svm_cancel_injection(struct kvm_vcpu *vcpu) svm_complete_interrupts(svm); } +static fastpath_t svm_exit_handlers_fastpath(struct kvm_vcpu *vcpu) +{ + if (!is_guest_mode(vcpu) && + to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && + to_svm(vcpu)->vmcb->control.exit_info_1) + return handle_fastpath_set_msr_irqoff(vcpu); + + return EXIT_FASTPATH_NONE; +} + void __svm_vcpu_run(unsigned long vmcb_pa, unsigned long *regs); -static void svm_vcpu_run(struct kvm_vcpu *vcpu) +static fastpath_t svm_vcpu_run(struct kvm_vcpu *vcpu) { + fastpath_t exit_fastpath; struct vcpu_svm *svm = to_svm(vcpu); svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; @@ -3290,13 +3352,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm->vmcb->save.rip = vcpu->arch.regs[VCPU_REGS_RIP]; /* - * A vmexit emulation is required before the vcpu can be executed - * again. - */ - if (unlikely(svm->nested.exit_required)) - return; - - /* * Disable singlestep if we're injecting an interrupt/exception. * We don't want our modified rflags to be pushed on the stack where * we might not be able to easily reset them if we disabled NMI @@ -3387,6 +3442,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) stgi(); /* Any pending NMI will happen here */ + exit_fastpath = svm_exit_handlers_fastpath(vcpu); if (unlikely(svm->vmcb->control.exit_code == SVM_EXIT_NMI)) kvm_after_interrupt(&svm->vcpu); @@ -3394,12 +3450,17 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) sync_cr8_to_lapic(vcpu); svm->next_rip = 0; + if (is_guest_mode(&svm->vcpu)) { + sync_nested_vmcb_control(svm); + svm->nested.nested_run_pending = 0; + } svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; /* if exit due to PF check for async PF */ if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) - svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); + svm->vcpu.arch.apf.host_apf_flags = + kvm_read_and_reset_apf_flags(); if (npt_enabled) { vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); @@ -3415,12 +3476,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) svm_handle_mce(svm); mark_all_clean(svm->vmcb); + return exit_fastpath; } static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); - bool update_guest_cr3 = true; unsigned long cr3; cr3 = __sme_set(root); @@ -3429,18 +3490,13 @@ static void svm_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long root) mark_dirty(svm->vmcb, VMCB_NPT); /* Loading L2's CR3 is handled by enter_svm_guest_mode. */ - if (is_guest_mode(vcpu)) - update_guest_cr3 = false; - else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) - cr3 = vcpu->arch.cr3; - else /* CR3 is already up-to-date. */ - update_guest_cr3 = false; + if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) + return; + cr3 = vcpu->arch.cr3; } - if (update_guest_cr3) { - svm->vmcb->save.cr3 = cr3; - mark_dirty(svm->vmcb, VMCB_CR); - } + svm->vmcb->save.cr3 = cr3; + mark_dirty(svm->vmcb, VMCB_CR); } static int is_disabled(void) @@ -3475,7 +3531,7 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_emulated_msr(int index) +static bool svm_has_emulated_msr(u32 index) { switch (index) { case MSR_IA32_MCG_EXT_CTL: @@ -3628,7 +3684,7 @@ static int svm_check_intercept(struct kvm_vcpu *vcpu, info->intercept == x86_intercept_clts) break; - intercept = svm->nested.intercept; + intercept = svm->nested.ctl.intercept; if (!(intercept & (1ULL << INTERCEPT_SELECTIVE_CR0))) break; @@ -3716,13 +3772,8 @@ out: return ret; } -static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion *exit_fastpath) +static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu) { - if (!is_guest_mode(vcpu) && - to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_MSR && - to_svm(vcpu)->vmcb->control.exit_info_1) - *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu) @@ -3737,23 +3788,28 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu) vcpu->arch.mcg_cap &= 0x1ff; } -static int svm_smi_allowed(struct kvm_vcpu *vcpu) +bool svm_smi_blocked(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); /* Per APM Vol.2 15.22.2 "Response to SMI" */ if (!gif_set(svm)) - return 0; + return true; - if (is_guest_mode(&svm->vcpu) && - svm->nested.intercept & (1ULL << INTERCEPT_SMI)) { - /* TODO: Might need to set exit_info_1 and exit_info_2 here */ - svm->vmcb->control.exit_code = SVM_EXIT_SMI; - svm->nested.exit_required = true; - return 0; - } + return is_smm(vcpu); +} - return 1; +static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + struct vcpu_svm *svm = to_svm(vcpu); + if (svm->nested.nested_run_pending) + return -EBUSY; + + /* An SMI must not be injected into L2 if it's supposed to VM-Exit. */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_smi(svm)) + return -EBUSY; + + return !svm_smi_blocked(vcpu); } static int svm_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) @@ -3793,12 +3849,13 @@ static int svm_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb), &map) == -EINVAL) return 1; nested_vmcb = map.hva; - enter_svm_guest_mode(svm, vmcb, nested_vmcb, &map); + enter_svm_guest_mode(svm, vmcb, nested_vmcb); + kvm_vcpu_unmap(&svm->vcpu, &map, true); } return 0; } -static int enable_smi_window(struct kvm_vcpu *vcpu) +static void enable_smi_window(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); @@ -3806,9 +3863,9 @@ static int enable_smi_window(struct kvm_vcpu *vcpu) if (vgif_enabled(svm)) set_intercept(svm, INTERCEPT_STGI); /* STGI will cause a vm exit */ - return 1; + } else { + /* We must be in SMM; RSM will cause a vmexit anyway. */ } - return 0; } static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) @@ -3819,6 +3876,13 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) bool is_user = svm_get_cpl(vcpu) == 3; /* + * If RIP is invalid, go ahead with emulation which will cause an + * internal error exit. + */ + if (!kvm_vcpu_gfn_to_memslot(vcpu, kvm_rip_read(vcpu) >> PAGE_SHIFT)) + return true; + + /* * Detect and workaround Errata 1096 Fam_17h_00_0Fh. * * Errata: @@ -3876,9 +3940,9 @@ static bool svm_apic_init_signal_blocked(struct kvm_vcpu *vcpu) /* * TODO: Last condition latch INIT signals on vCPU when * vCPU is in guest-mode and vmcb12 defines intercept on INIT. - * To properly emulate the INIT intercept, SVM should implement - * kvm_x86_ops.check_nested_events() and call nested_svm_vmexit() - * there if an INIT signal is pending. + * To properly emulate the INIT intercept, + * svm_check_nested_events() should call nested_svm_vmexit() + * if an INIT signal is pending. */ return !gif_set(svm) || (svm->vmcb->control.intercept & (1ULL << INTERCEPT_INIT)); @@ -3932,8 +3996,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .set_segment = svm_set_segment, .get_cpl = svm_get_cpl, .get_cs_db_l_bits = kvm_get_cs_db_l_bits, - .decache_cr0_guest_bits = svm_decache_cr0_guest_bits, - .decache_cr4_guest_bits = svm_decache_cr4_guest_bits, .set_cr0 = svm_set_cr0, .set_cr4 = svm_set_cr4, .set_efer = svm_set_efer, @@ -3947,8 +4009,10 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .get_rflags = svm_get_rflags, .set_rflags = svm_set_rflags, - .tlb_flush = svm_flush_tlb, + .tlb_flush_all = svm_flush_tlb, + .tlb_flush_current = svm_flush_tlb, .tlb_flush_gva = svm_flush_tlb_gva, + .tlb_flush_guest = svm_flush_tlb, .run = svm_vcpu_run, .handle_exit = handle_exit, @@ -3989,7 +4053,6 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .has_wbinvd_exit = svm_has_wbinvd_exit, - .read_l1_tsc_offset = svm_read_l1_tsc_offset, .write_l1_tsc_offset = svm_write_l1_tsc_offset, .load_mmu_pgd = svm_load_mmu_pgd, @@ -4002,6 +4065,8 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .sched_in = svm_sched_in, .pmu_ops = &amd_pmu_ops, + .nested_ops = &svm_nested_ops, + .deliver_posted_interrupt = svm_deliver_avic_intr, .dy_apicv_has_pending_interrupt = svm_dy_apicv_has_pending_interrupt, .update_pi_irte = svm_update_pi_irte, @@ -4016,14 +4081,9 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { .mem_enc_reg_region = svm_register_enc_region, .mem_enc_unreg_region = svm_unregister_enc_region, - .nested_enable_evmcs = NULL, - .nested_get_evmcs_version = NULL, - .need_emulation_on_page_fault = svm_need_emulation_on_page_fault, .apic_init_signal_blocked = svm_apic_init_signal_blocked, - - .check_nested_events = svm_check_nested_events, }; static struct kvm_x86_init_ops svm_init_ops __initdata = { diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index df3474f4fb02..6ac4c00a5d82 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -86,25 +86,17 @@ struct nested_state { u64 hsave_msr; u64 vm_cr_msr; u64 vmcb; + u32 host_intercept_exceptions; /* These are the merged vectors */ u32 *msrpm; - /* gpa pointers to the real vectors */ - u64 vmcb_msrpm; - u64 vmcb_iopm; + /* A VMRUN has started but has not yet been performed, so + * we cannot inject a nested vmexit yet. */ + bool nested_run_pending; - /* A VMEXIT is required but not yet emulated */ - bool exit_required; - - /* cache for intercepts of the guest */ - u32 intercept_cr; - u32 intercept_dr; - u32 intercept_exceptions; - u64 intercept; - - /* Nested Paging related state */ - u64 nested_cr3; + /* cache for control fields of the guest */ + struct vmcb_control_area ctl; }; struct vcpu_svm { @@ -360,8 +352,12 @@ u32 svm_msrpm_offset(u32 msr); void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer); void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0); int svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4); -void svm_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa); +void svm_flush_tlb(struct kvm_vcpu *vcpu); void disable_nmi_singlestep(struct vcpu_svm *svm); +bool svm_smi_blocked(struct kvm_vcpu *vcpu); +bool svm_nmi_blocked(struct kvm_vcpu *vcpu); +bool svm_interrupt_blocked(struct kvm_vcpu *vcpu); +void svm_set_gif(struct vcpu_svm *svm, bool value); /* nested.c */ @@ -369,28 +365,31 @@ void disable_nmi_singlestep(struct vcpu_svm *svm); #define NESTED_EXIT_DONE 1 /* Exit caused nested vmexit */ #define NESTED_EXIT_CONTINUE 2 /* Further checks needed */ -/* This function returns true if it is save to enable the nmi window */ -static inline bool nested_svm_nmi(struct vcpu_svm *svm) +static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu) { - if (!is_guest_mode(&svm->vcpu)) - return true; + struct vcpu_svm *svm = to_svm(vcpu); - if (!(svm->nested.intercept & (1ULL << INTERCEPT_NMI))) - return true; + return is_guest_mode(vcpu) && (svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK); +} - svm->vmcb->control.exit_code = SVM_EXIT_NMI; - svm->nested.exit_required = true; +static inline bool nested_exit_on_smi(struct vcpu_svm *svm) +{ + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_SMI)); +} - return false; +static inline bool nested_exit_on_intr(struct vcpu_svm *svm) +{ + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_INTR)); } -static inline bool svm_nested_virtualize_tpr(struct kvm_vcpu *vcpu) +static inline bool nested_exit_on_nmi(struct vcpu_svm *svm) { - return is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK); + return (svm->nested.ctl.intercept & (1ULL << INTERCEPT_NMI)); } void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, - struct vmcb *nested_vmcb, struct kvm_host_map *map); + struct vmcb *nested_vmcb); +void svm_leave_nested(struct vcpu_svm *svm); int nested_svm_vmrun(struct vcpu_svm *svm); void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb); int nested_svm_vmexit(struct vcpu_svm *svm); @@ -398,8 +397,10 @@ int nested_svm_exit_handled(struct vcpu_svm *svm); int nested_svm_check_permissions(struct vcpu_svm *svm); int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, bool has_error_code, u32 error_code); -int svm_check_nested_events(struct kvm_vcpu *vcpu); int nested_svm_exit_special(struct vcpu_svm *svm); +void sync_nested_vmcb_control(struct vcpu_svm *svm); + +extern struct kvm_x86_nested_ops svm_nested_ops; /* avic.c */ diff --git a/arch/x86/kvm/trace.h b/arch/x86/kvm/trace.h index 249062f24b94..b66432b015d2 100644 --- a/arch/x86/kvm/trace.h +++ b/arch/x86/kvm/trace.h @@ -225,6 +225,14 @@ TRACE_EVENT(kvm_apic, #define KVM_ISA_VMX 1 #define KVM_ISA_SVM 2 +#define kvm_print_exit_reason(exit_reason, isa) \ + (isa == KVM_ISA_VMX) ? \ + __print_symbolic(exit_reason & 0xffff, VMX_EXIT_REASONS) : \ + __print_symbolic(exit_reason, SVM_EXIT_REASONS), \ + (isa == KVM_ISA_VMX && exit_reason & ~0xffff) ? " " : "", \ + (isa == KVM_ISA_VMX) ? \ + __print_flags(exit_reason & ~0xffff, " ", VMX_EXIT_REASON_FLAGS) : "" + /* * Tracepoint for kvm guest exit: */ @@ -250,12 +258,10 @@ TRACE_EVENT(kvm_exit, &__entry->info2); ), - TP_printk("vcpu %u reason %s rip 0x%lx info %llx %llx", + TP_printk("vcpu %u reason %s%s%s rip 0x%lx info %llx %llx", __entry->vcpu_id, - (__entry->isa == KVM_ISA_VMX) ? - __print_symbolic(__entry->exit_reason, VMX_EXIT_REASONS) : - __print_symbolic(__entry->exit_reason, SVM_EXIT_REASONS), - __entry->guest_rip, __entry->info1, __entry->info2) + kvm_print_exit_reason(__entry->exit_reason, __entry->isa), + __entry->guest_rip, __entry->info1, __entry->info2) ); /* @@ -588,12 +594,10 @@ TRACE_EVENT(kvm_nested_vmexit, __entry->exit_int_info_err = exit_int_info_err; __entry->isa = isa; ), - TP_printk("rip: 0x%016llx reason: %s ext_inf1: 0x%016llx " + TP_printk("rip: 0x%016llx reason: %s%s%s ext_inf1: 0x%016llx " "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", __entry->rip, - (__entry->isa == KVM_ISA_VMX) ? - __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) : - __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS), + kvm_print_exit_reason(__entry->exit_code, __entry->isa), __entry->exit_info1, __entry->exit_info2, __entry->exit_int_info, __entry->exit_int_info_err) ); @@ -626,13 +630,11 @@ TRACE_EVENT(kvm_nested_vmexit_inject, __entry->isa = isa; ), - TP_printk("reason: %s ext_inf1: 0x%016llx " + TP_printk("reason: %s%s%s ext_inf1: 0x%016llx " "ext_inf2: 0x%016llx ext_int: 0x%08x ext_int_err: 0x%08x", - (__entry->isa == KVM_ISA_VMX) ? - __print_symbolic(__entry->exit_code, VMX_EXIT_REASONS) : - __print_symbolic(__entry->exit_code, SVM_EXIT_REASONS), - __entry->exit_info1, __entry->exit_info2, - __entry->exit_int_info, __entry->exit_int_info_err) + kvm_print_exit_reason(__entry->exit_code, __entry->isa), + __entry->exit_info1, __entry->exit_info2, + __entry->exit_int_info, __entry->exit_int_info_err) ); /* @@ -1539,6 +1541,57 @@ TRACE_EVENT(kvm_nested_vmenter_failed, __print_symbolic(__entry->err, VMX_VMENTER_INSTRUCTION_ERRORS)) ); +/* + * Tracepoint for syndbg_set_msr. + */ +TRACE_EVENT(kvm_hv_syndbg_set_msr, + TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data), + TP_ARGS(vcpu_id, vp_index, msr, data), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, vp_index) + __field(u32, msr) + __field(u64, data) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->vp_index = vp_index; + __entry->msr = msr; + __entry->data = data; + ), + + TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx", + __entry->vcpu_id, __entry->vp_index, __entry->msr, + __entry->data) +); + +/* + * Tracepoint for syndbg_get_msr. + */ +TRACE_EVENT(kvm_hv_syndbg_get_msr, + TP_PROTO(int vcpu_id, u32 vp_index, u32 msr, u64 data), + TP_ARGS(vcpu_id, vp_index, msr, data), + + TP_STRUCT__entry( + __field(int, vcpu_id) + __field(u32, vp_index) + __field(u32, msr) + __field(u64, data) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->vp_index = vp_index; + __entry->msr = msr; + __entry->data = data; + ), + + TP_printk("vcpu_id %d vp_index %u msr 0x%x data 0x%llx", + __entry->vcpu_id, __entry->vp_index, __entry->msr, + __entry->data) +); #endif /* _TRACE_KVM_H */ #undef TRACE_INCLUDE_PATH diff --git a/arch/x86/kvm/vmx/capabilities.h b/arch/x86/kvm/vmx/capabilities.h index 8903475f751e..4bbd8b448d22 100644 --- a/arch/x86/kvm/vmx/capabilities.h +++ b/arch/x86/kvm/vmx/capabilities.h @@ -18,6 +18,8 @@ extern int __read_mostly pt_mode; #define PT_MODE_SYSTEM 0 #define PT_MODE_HOST_GUEST 1 +#define PMU_CAP_FW_WRITES (1ULL << 13) + struct nested_vmx_msrs { /* * We only store the "true" versions of the VMX capability MSRs. We @@ -367,4 +369,13 @@ static inline bool vmx_pt_mode_is_host_guest(void) return pt_mode == PT_MODE_HOST_GUEST; } +static inline u64 vmx_get_perf_capabilities(void) +{ + /* + * Since counters are virtualized, KVM would support full + * width counting unconditionally, even if the host lacks it. + */ + return PMU_CAP_FW_WRITES; +} + #endif /* __KVM_X86_VMX_CAPS_H */ diff --git a/arch/x86/kvm/vmx/evmcs.c b/arch/x86/kvm/vmx/evmcs.c index 303813423c3e..e5325bd0f304 100644 --- a/arch/x86/kvm/vmx/evmcs.c +++ b/arch/x86/kvm/vmx/evmcs.c @@ -4,6 +4,7 @@ #include <linux/smp.h> #include "../hyperv.h" +#include "../cpuid.h" #include "evmcs.h" #include "vmcs.h" #include "vmx.h" @@ -160,14 +161,6 @@ const struct evmcs_field vmcs_field_to_evmcs_1[] = { HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), EVMCS1_FIELD(VM_ENTRY_MSR_LOAD_ADDR, vm_entry_msr_load_addr, HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), - EVMCS1_FIELD(CR3_TARGET_VALUE0, cr3_target_value0, - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), - EVMCS1_FIELD(CR3_TARGET_VALUE1, cr3_target_value1, - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), - EVMCS1_FIELD(CR3_TARGET_VALUE2, cr3_target_value2, - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), - EVMCS1_FIELD(CR3_TARGET_VALUE3, cr3_target_value3, - HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL), /* 32 bit rw */ EVMCS1_FIELD(TPR_THRESHOLD, tpr_threshold, @@ -334,17 +327,18 @@ bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa) uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu) { - struct vcpu_vmx *vmx = to_vmx(vcpu); - /* - * vmcs_version represents the range of supported Enlightened VMCS - * versions: lower 8 bits is the minimal version, higher 8 bits is the - * maximum supported version. KVM supports versions from 1 to - * KVM_EVMCS_VERSION. - */ - if (vmx->nested.enlightened_vmcs_enabled) - return (KVM_EVMCS_VERSION << 8) | 1; - - return 0; + struct vcpu_vmx *vmx = to_vmx(vcpu); + /* + * vmcs_version represents the range of supported Enlightened VMCS + * versions: lower 8 bits is the minimal version, higher 8 bits is the + * maximum supported version. KVM supports versions from 1 to + * KVM_EVMCS_VERSION. + */ + if (kvm_cpu_cap_get(X86_FEATURE_VMX) && + vmx->nested.enlightened_vmcs_enabled) + return (KVM_EVMCS_VERSION << 8) | 1; + + return 0; } void nested_evmcs_filter_control_msr(u32 msr_index, u64 *pdata) diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index e44f33c82332..9c74a732b08d 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -303,11 +303,11 @@ static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs) cpu = get_cpu(); prev = vmx->loaded_vmcs; vmx->loaded_vmcs = vmcs; - vmx_vcpu_load_vmcs(vcpu, cpu); + vmx_vcpu_load_vmcs(vcpu, cpu, prev); vmx_sync_vmcs_host_state(vmx, prev); put_cpu(); - vmx_segment_cache_clear(vmx); + vmx_register_cache_reset(vcpu); } /* @@ -328,19 +328,19 @@ static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu, { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu); - u32 exit_reason; + u32 vm_exit_reason; unsigned long exit_qualification = vcpu->arch.exit_qualification; if (vmx->nested.pml_full) { - exit_reason = EXIT_REASON_PML_FULL; + vm_exit_reason = EXIT_REASON_PML_FULL; vmx->nested.pml_full = false; exit_qualification &= INTR_INFO_UNBLOCK_NMI; } else if (fault->error_code & PFERR_RSVD_MASK) - exit_reason = EXIT_REASON_EPT_MISCONFIG; + vm_exit_reason = EXIT_REASON_EPT_MISCONFIG; else - exit_reason = EXIT_REASON_EPT_VIOLATION; + vm_exit_reason = EXIT_REASON_EPT_VIOLATION; - nested_vmx_vmexit(vcpu, exit_reason, 0, exit_qualification); + nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification); vmcs12->guest_physical_address = fault->address; } @@ -437,11 +437,6 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, } } -static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa) -{ - return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu)); -} - static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -698,11 +693,6 @@ static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu) VM_EXIT_ACK_INTR_ON_EXIT; } -static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) -{ - return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu)); -} - static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { @@ -927,6 +917,7 @@ static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count) } return 0; fail: + /* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */ return i + 1; } @@ -1074,34 +1065,81 @@ static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val) } /* + * Returns true if the MMU needs to be sync'd on nested VM-Enter/VM-Exit. + * tl;dr: the MMU needs a sync if L0 is using shadow paging and L1 didn't + * enable VPID for L2 (implying it expects a TLB flush on VMX transitions). + * Here's why. + * + * If EPT is enabled by L0 a sync is never needed: + * - if it is disabled by L1, then L0 is not shadowing L1 or L2 PTEs, there + * cannot be unsync'd SPTEs for either L1 or L2. + * + * - if it is also enabled by L1, then L0 doesn't need to sync on VM-Enter + * VM-Enter as VM-Enter isn't required to invalidate guest-physical mappings + * (irrespective of VPID), i.e. L1 can't rely on the (virtual) CPU to flush + * stale guest-physical mappings for L2 from the TLB. And as above, L0 isn't + * shadowing L1 PTEs so there are no unsync'd SPTEs to sync on VM-Exit. + * + * If EPT is disabled by L0: + * - if VPID is enabled by L1 (for L2), the situation is similar to when L1 + * enables EPT: L0 doesn't need to sync as VM-Enter and VM-Exit aren't + * required to invalidate linear mappings (EPT is disabled so there are + * no combined or guest-physical mappings), i.e. L1 can't rely on the + * (virtual) CPU to flush stale linear mappings for either L2 or itself (L1). + * + * - however if VPID is disabled by L1, then a sync is needed as L1 expects all + * linear mappings (EPT is disabled so there are no combined or guest-physical + * mappings) to be invalidated on both VM-Enter and VM-Exit. + * + * Note, this logic is subtly different than nested_has_guest_tlb_tag(), which + * additionally checks that L2 has been assigned a VPID (when EPT is disabled). + * Whether or not L2 has been assigned a VPID by L0 is irrelevant with respect + * to L1's expectations, e.g. L0 needs to invalidate hardware TLB entries if L2 + * doesn't have a unique VPID to prevent reusing L1's entries (assuming L1 has + * been assigned a VPID), but L0 doesn't need to do a MMU sync because L1 + * doesn't expect stale (virtual) TLB entries to be flushed, i.e. L1 doesn't + * know that L0 will flush the TLB and so L1 will do INVVPID as needed to flush + * stale TLB entries, at which point L0 will sync L2's MMU. + */ +static bool nested_vmx_transition_mmu_sync(struct kvm_vcpu *vcpu) +{ + return !enable_ept && !nested_cpu_has_vpid(get_vmcs12(vcpu)); +} + +/* * Load guest's/host's cr3 at nested entry/exit. @nested_ept is true if we are * emulating VM-Entry into a guest with EPT enabled. On failure, the expected * Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to * @entry_failure_code. */ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, - u32 *entry_failure_code) + enum vm_entry_failure_code *entry_failure_code) { - if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { - if (CC(!nested_cr3_valid(vcpu, cr3))) { - *entry_failure_code = ENTRY_FAIL_DEFAULT; - return -EINVAL; - } + if (CC(!nested_cr3_valid(vcpu, cr3))) { + *entry_failure_code = ENTRY_FAIL_DEFAULT; + return -EINVAL; + } - /* - * If PAE paging and EPT are both on, CR3 is not used by the CPU and - * must not be dereferenced. - */ - if (is_pae_paging(vcpu) && !nested_ept) { - if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) { - *entry_failure_code = ENTRY_FAIL_PDPTE; - return -EINVAL; - } + /* + * If PAE paging and EPT are both on, CR3 is not used by the CPU and + * must not be dereferenced. + */ + if (!nested_ept && is_pae_paging(vcpu) && + (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) { + if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) { + *entry_failure_code = ENTRY_FAIL_PDPTE; + return -EINVAL; } } + /* + * Unconditionally skip the TLB flush on fast CR3 switch, all TLB + * flushes are handled by nested_vmx_transition_tlb_flush(). See + * nested_vmx_transition_mmu_sync for details on skipping the MMU sync. + */ if (!nested_ept) - kvm_mmu_new_cr3(vcpu, cr3, false); + kvm_mmu_new_pgd(vcpu, cr3, true, + !nested_vmx_transition_mmu_sync(vcpu)); vcpu->arch.cr3 = cr3; kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); @@ -1132,11 +1170,48 @@ static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu) (nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02); } -static u16 nested_get_vpid02(struct kvm_vcpu *vcpu) +static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12, + bool is_vmenter) { struct vcpu_vmx *vmx = to_vmx(vcpu); - return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid; + /* + * If VPID is disabled, linear and combined mappings are flushed on + * VM-Enter/VM-Exit, and guest-physical mappings are valid only for + * their associated EPTP. + */ + if (!enable_vpid) + return; + + /* + * If vmcs12 doesn't use VPID, L1 expects linear and combined mappings + * for *all* contexts to be flushed on VM-Enter/VM-Exit. + * + * If VPID is enabled and used by vmc12, but L2 does not have a unique + * TLB tag (ASID), i.e. EPT is disabled and KVM was unable to allocate + * a VPID for L2, flush the current context as the effective ASID is + * common to both L1 and L2. + * + * Defer the flush so that it runs after vmcs02.EPTP has been set by + * KVM_REQ_LOAD_MMU_PGD (if nested EPT is enabled) and to avoid + * redundant flushes further down the nested pipeline. + * + * If a TLB flush isn't required due to any of the above, and vpid12 is + * changing then the new "virtual" VPID (vpid12) will reuse the same + * "real" VPID (vpid02), and so needs to be sync'd. There is no direct + * mapping between vpid02 and vpid12, vpid02 is per-vCPU and reused for + * all nested vCPUs. + */ + if (!nested_cpu_has_vpid(vmcs12)) { + kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + } else if (!nested_has_guest_tlb_tag(vcpu)) { + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + } else if (is_vmenter && + vmcs12->virtual_processor_id != vmx->nested.last_vpid) { + vmx->nested.last_vpid = vmcs12->virtual_processor_id; + vpid_sync_context(nested_get_vpid02(vcpu)); + } } static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask) @@ -1700,10 +1775,6 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx) * vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr; * vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr; * vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr; - * vmcs12->cr3_target_value0 = evmcs->cr3_target_value0; - * vmcs12->cr3_target_value1 = evmcs->cr3_target_value1; - * vmcs12->cr3_target_value2 = evmcs->cr3_target_value2; - * vmcs12->cr3_target_value3 = evmcs->cr3_target_value3; * vmcs12->page_fault_error_code_mask = * evmcs->page_fault_error_code_mask; * vmcs12->page_fault_error_code_match = @@ -1777,10 +1848,6 @@ static int copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx) * evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr; * evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr; * evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr; - * evmcs->cr3_target_value0 = vmcs12->cr3_target_value0; - * evmcs->cr3_target_value1 = vmcs12->cr3_target_value1; - * evmcs->cr3_target_value2 = vmcs12->cr3_target_value2; - * evmcs->cr3_target_value3 = vmcs12->cr3_target_value3; * evmcs->tpr_threshold = vmcs12->tpr_threshold; * evmcs->virtual_processor_id = vmcs12->virtual_processor_id; * evmcs->exception_bitmap = vmcs12->exception_bitmap; @@ -2020,9 +2087,25 @@ static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer) return HRTIMER_NORESTART; } -static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) +static u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >> + VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE; + + if (!vmx->nested.has_preemption_timer_deadline) { + vmx->nested.preemption_timer_deadline = + vmcs12->vmx_preemption_timer_value + l1_scaled_tsc; + vmx->nested.has_preemption_timer_deadline = true; + } + return vmx->nested.preemption_timer_deadline - l1_scaled_tsc; +} + +static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu, + u64 preemption_timeout) { - u64 preemption_timeout = get_vmcs12(vcpu)->vmx_preemption_timer_value; struct vcpu_vmx *vmx = to_vmx(vcpu); /* @@ -2041,7 +2124,8 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) preemption_timeout *= 1000000; do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz); hrtimer_start(&vmx->nested.preemption_timer, - ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL); + ktime_add_ns(ktime_get(), preemption_timeout), + HRTIMER_MODE_ABS_PINNED); } static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) @@ -2398,7 +2482,7 @@ static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12) * is assigned to entry_failure_code on failure. */ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - u32 *entry_failure_code) + enum vm_entry_failure_code *entry_failure_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs; @@ -2447,32 +2531,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); - if (enable_vpid) { - /* - * There is no direct mapping between vpid02 and vpid12, the - * vpid02 is per-vCPU for L0 and reused while the value of - * vpid12 is changed w/ one invvpid during nested vmentry. - * The vpid12 is allocated by L1 for L2, so it will not - * influence global bitmap(for vpid01 and vpid02 allocation) - * even if spawn a lot of nested vCPUs. - */ - if (nested_cpu_has_vpid(vmcs12) && nested_has_guest_tlb_tag(vcpu)) { - if (vmcs12->virtual_processor_id != vmx->nested.last_vpid) { - vmx->nested.last_vpid = vmcs12->virtual_processor_id; - __vmx_flush_tlb(vcpu, nested_get_vpid02(vcpu), false); - } - } else { - /* - * If L1 use EPT, then L0 needs to execute INVEPT on - * EPTP02 instead of EPTP01. Therefore, delay TLB - * flush until vmcs02->eptp is fully updated by - * KVM_REQ_LOAD_MMU_PGD. Note that this assumes - * KVM_REQ_TLB_FLUSH is evaluated after - * KVM_REQ_LOAD_MMU_PGD in vcpu_enter_guest(). - */ - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - } - } + nested_vmx_transition_tlb_flush(vcpu, vmcs12, true); if (nested_cpu_has_ept(vmcs12)) nested_ept_init_mmu_context(vcpu); @@ -2883,11 +2942,11 @@ static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12) static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - u32 *exit_qual) + enum vm_entry_failure_code *entry_failure_code) { bool ia32e; - *exit_qual = ENTRY_FAIL_DEFAULT; + *entry_failure_code = ENTRY_FAIL_DEFAULT; if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) || CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4))) @@ -2902,7 +2961,7 @@ static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu, return -EINVAL; if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) { - *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; + *entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR; return -EINVAL; } @@ -3194,9 +3253,12 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + enum vm_entry_failure_code entry_failure_code; bool evaluate_pending_interrupts; - u32 exit_reason = EXIT_REASON_INVALID_STATE; - u32 exit_qual; + u32 exit_reason, failed_index; + + if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) + kvm_vcpu_flush_tlb_current(vcpu); evaluate_pending_interrupts = exec_controls_get(vmx) & (CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING); @@ -3241,24 +3303,33 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, return NVMX_VMENTRY_VMFAIL; } - if (nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) + if (nested_vmx_check_guest_state(vcpu, vmcs12, + &entry_failure_code)) { + exit_reason = EXIT_REASON_INVALID_STATE; + vmcs12->exit_qualification = entry_failure_code; goto vmentry_fail_vmexit; + } } enter_guest_mode(vcpu); if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING) vcpu->arch.tsc_offset += vmcs12->tsc_offset; - if (prepare_vmcs02(vcpu, vmcs12, &exit_qual)) + if (prepare_vmcs02(vcpu, vmcs12, &entry_failure_code)) { + exit_reason = EXIT_REASON_INVALID_STATE; + vmcs12->exit_qualification = entry_failure_code; goto vmentry_fail_vmexit_guest_mode; + } if (from_vmentry) { - exit_reason = EXIT_REASON_MSR_LOAD_FAIL; - exit_qual = nested_vmx_load_msr(vcpu, - vmcs12->vm_entry_msr_load_addr, - vmcs12->vm_entry_msr_load_count); - if (exit_qual) + failed_index = nested_vmx_load_msr(vcpu, + vmcs12->vm_entry_msr_load_addr, + vmcs12->vm_entry_msr_load_count); + if (failed_index) { + exit_reason = EXIT_REASON_MSR_LOAD_FAIL; + vmcs12->exit_qualification = failed_index; goto vmentry_fail_vmexit_guest_mode; + } } else { /* * The MMU is not initialized to point at the right entities yet and @@ -3293,8 +3364,10 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, * the timer. */ vmx->nested.preemption_timer_expired = false; - if (nested_cpu_has_preemption_timer(vmcs12)) - vmx_start_preemption_timer(vcpu); + if (nested_cpu_has_preemption_timer(vmcs12)) { + u64 timer_value = vmx_calc_preemption_timer_value(vcpu); + vmx_start_preemption_timer(vcpu, timer_value); + } /* * Note no nested_vmx_succeed or nested_vmx_fail here. At this point @@ -3322,7 +3395,6 @@ vmentry_fail_vmexit: load_vmcs12_host_state(vcpu, vmcs12); vmcs12->vm_exit_reason = exit_reason | VMX_EXIT_REASONS_FAILED_VMENTRY; - vmcs12->exit_qualification = exit_qual; if (enable_shadow_vmcs || vmx->nested.hv_evmcs) vmx->nested.need_vmcs12_to_shadow_sync = true; return NVMX_VMENTRY_VMEXIT; @@ -3403,6 +3475,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * the nested entry. */ vmx->nested.nested_run_pending = 1; + vmx->nested.has_preemption_timer_deadline = false; status = nested_vmx_enter_non_root_mode(vcpu, true); if (unlikely(status != NVMX_VMENTRY_SUCCESS)) goto vmentry_failed; @@ -3632,6 +3705,12 @@ static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu) vcpu->arch.exception.payload); } +static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu) +{ + return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && + to_vmx(vcpu)->nested.preemption_timer_expired; +} + static int vmx_check_nested_events(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -3661,11 +3740,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) /* * Process any exceptions that are not debug traps before MTF. */ - if (vcpu->arch.exception.pending && - !vmx_pending_dbg_trap(vcpu) && - nested_vmx_check_exception(vcpu, &exit_qual)) { + if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) { if (block_nested_events) return -EBUSY; + if (!nested_vmx_check_exception(vcpu, &exit_qual)) + goto no_vmexit; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); return 0; } @@ -3678,25 +3757,34 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) return 0; } - if (vcpu->arch.exception.pending && - nested_vmx_check_exception(vcpu, &exit_qual)) { + if (vcpu->arch.exception.pending) { if (block_nested_events) return -EBUSY; + if (!nested_vmx_check_exception(vcpu, &exit_qual)) + goto no_vmexit; nested_vmx_inject_exception_vmexit(vcpu, exit_qual); return 0; } - if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && - vmx->nested.preemption_timer_expired) { + if (nested_vmx_preemption_timer_pending(vcpu)) { if (block_nested_events) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0); return 0; } - if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { + if (vcpu->arch.smi_pending && !is_smm(vcpu)) { + if (block_nested_events) + return -EBUSY; + goto no_vmexit; + } + + if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) { if (block_nested_events) return -EBUSY; + if (!nested_exit_on_nmi(vcpu)) + goto no_vmexit; + nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, NMI_VECTOR | INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK, 0); @@ -3709,13 +3797,16 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu) return 0; } - if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(vcpu)) { + if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) { if (block_nested_events) return -EBUSY; + if (!nested_exit_on_intr(vcpu)) + goto no_vmexit; nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0); return 0; } +no_vmexit: vmx_complete_nested_posted_interrupt(vcpu); return 0; } @@ -3842,12 +3933,12 @@ static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu, cpu = get_cpu(); vmx->loaded_vmcs = &vmx->nested.vmcs02; - vmx_vcpu_load(&vmx->vcpu, cpu); + vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01); sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12); vmx->loaded_vmcs = &vmx->vmcs01; - vmx_vcpu_load(&vmx->vcpu, cpu); + vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02); put_cpu(); } @@ -3876,10 +3967,6 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES); vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES); - vmcs12->guest_sysenter_cs = vmcs_read32(GUEST_SYSENTER_CS); - vmcs12->guest_sysenter_esp = vmcs_readl(GUEST_SYSENTER_ESP); - vmcs12->guest_sysenter_eip = vmcs_readl(GUEST_SYSENTER_EIP); - vmcs12->guest_interruptibility_info = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO); @@ -3889,9 +3976,10 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE; if (nested_cpu_has_preemption_timer(vmcs12) && - vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER) - vmcs12->vmx_preemption_timer_value = - vmx_get_preemption_timer_value(vcpu); + vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER && + !vmx->nested.nested_run_pending) + vmcs12->vmx_preemption_timer_value = + vmx_get_preemption_timer_value(vcpu); /* * In some cases (usually, nested EPT), L2 is allowed to change its @@ -3939,11 +4027,11 @@ static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) * which already writes to vmcs12 directly. */ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - u32 exit_reason, u32 exit_intr_info, + u32 vm_exit_reason, u32 exit_intr_info, unsigned long exit_qualification) { /* update exit information fields: */ - vmcs12->vm_exit_reason = exit_reason; + vmcs12->vm_exit_reason = vm_exit_reason; vmcs12->exit_qualification = exit_qualification; vmcs12->vm_exit_intr_info = exit_intr_info; @@ -3998,8 +4086,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { + enum vm_entry_failure_code ignored; struct kvm_segment seg; - u32 entry_failure_code; if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; @@ -4034,30 +4122,13 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, * Only PDPTE load can fail as the value of cr3 was checked on entry and * couldn't have changed. */ - if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &entry_failure_code)) + if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &ignored)) nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL); if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault; - /* - * If vmcs01 doesn't use VPID, CPU flushes TLB on every - * VMEntry/VMExit. Thus, no need to flush TLB. - * - * If vmcs12 doesn't use VPID, L1 expects TLB to be - * flushed on every VMEntry/VMExit. - * - * Otherwise, we can preserve TLB entries as long as we are - * able to tag L1 TLB entries differently than L2 TLB entries. - * - * If vmcs12 uses EPT, we need to execute this flush on EPTP01 - * and therefore we request the TLB flush to happen only after VMCS EPTP - * has been set by KVM_REQ_LOAD_MMU_PGD. - */ - if (enable_vpid && - (!nested_cpu_has_vpid(vmcs12) || !nested_has_guest_tlb_tag(vcpu))) { - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); - } + nested_vmx_transition_tlb_flush(vcpu, vmcs12, false); vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs); vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp); @@ -4204,7 +4275,7 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) * VMFail, like everything else we just need to ensure our * software model is up-to-date. */ - if (enable_ept) + if (enable_ept && is_pae_paging(vcpu)) ept_save_pdptrs(vcpu); kvm_mmu_reset_context(vcpu); @@ -4272,7 +4343,7 @@ vmabort: * and modify vmcs12 to make it see what it would expect to see there if * L2 was its real guest. Must only be called when in L2 (is_guest_mode()) */ -void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, +void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, u32 exit_intr_info, unsigned long exit_qualification) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -4281,6 +4352,10 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, /* trying to cancel vmlaunch/vmresume is a bug */ WARN_ON_ONCE(vmx->nested.nested_run_pending); + /* Service the TLB flush request for L2 before switching to L1. */ + if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) + kvm_vcpu_flush_tlb_current(vcpu); + leave_guest_mode(vcpu); if (nested_cpu_has_preemption_timer(vmcs12)) @@ -4292,9 +4367,9 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, if (likely(!vmx->fail)) { sync_vmcs02_to_vmcs12(vcpu, vmcs12); - if (exit_reason != -1) - prepare_vmcs12(vcpu, vmcs12, exit_reason, exit_intr_info, - exit_qualification); + if (vm_exit_reason != -1) + prepare_vmcs12(vcpu, vmcs12, vm_exit_reason, + exit_intr_info, exit_qualification); /* * Must happen outside of sync_vmcs02_to_vmcs12() as it will @@ -4344,20 +4419,20 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true); vmx->nested.pi_desc = NULL; - /* - * We are now running in L2, mmu_notifier will force to reload the - * page's hpa for L2 vmcs. Need to reload it for L1 before entering L1. - */ - kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); + if (vmx->nested.reload_vmcs01_apic_access_page) { + vmx->nested.reload_vmcs01_apic_access_page = false; + kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); + } - if ((exit_reason != -1) && (enable_shadow_vmcs || vmx->nested.hv_evmcs)) + if ((vm_exit_reason != -1) && + (enable_shadow_vmcs || vmx->nested.hv_evmcs)) vmx->nested.need_vmcs12_to_shadow_sync = true; /* in case we halted in L2 */ vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; if (likely(!vmx->fail)) { - if (exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && + if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT && nested_exit_intr_ack_set(vcpu)) { int irq = kvm_cpu_get_interrupt(vcpu); WARN_ON(irq < 0); @@ -4365,7 +4440,7 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR; } - if (exit_reason != -1) + if (vm_exit_reason != -1) trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason, vmcs12->exit_qualification, vmcs12->idt_vectoring_info_field, @@ -4554,13 +4629,13 @@ static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer) gva_t gva; struct x86_exception e; - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), vmcs_read32(VMX_INSTRUCTION_INFO), false, sizeof(*vmpointer), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } @@ -4614,7 +4689,7 @@ static int enter_vmx_operation(struct kvm_vcpu *vcpu) goto out_shadow_vmcs; hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_PINNED); + HRTIMER_MODE_ABS_PINNED); vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; vmx->nested.vpid02 = allocate_vpid(); @@ -4819,7 +4894,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) : get_vmcs12(vcpu); - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct x86_exception e; @@ -4869,7 +4944,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu) return 1; /* _system ok, nested_vmx_check_permission has verified cpl=0 */ if (kvm_write_guest_virt_system(vcpu, gva, &value, len, &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } } @@ -4905,7 +4980,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) { struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu) : get_vmcs12(vcpu); - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); struct vcpu_vmx *vmx = to_vmx(vcpu); struct x86_exception e; @@ -4943,7 +5018,7 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) instr_info, false, len, &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, &value, len, &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } } @@ -5090,7 +5165,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) /* Emulate the VMPTRST instruction */ static int handle_vmptrst(struct kvm_vcpu *vcpu) { - unsigned long exit_qual = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qual = vmx_get_exit_qual(vcpu); u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO); gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr; struct x86_exception e; @@ -5108,23 +5183,33 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu) /* *_system ok, nested_vmx_check_permission has verified cpl=0 */ if (kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr, sizeof(gpa_t), &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } return nested_vmx_succeed(vcpu); } +#define EPTP_PA_MASK GENMASK_ULL(51, 12) + +static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp) +{ + return VALID_PAGE(root_hpa) && + ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK)); +} + /* Emulate the INVEPT instruction */ static int handle_invept(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 vmx_instruction_info, types; - unsigned long type; + unsigned long type, roots_to_free; + struct kvm_mmu *mmu; gva_t gva; struct x86_exception e; struct { u64 eptp, gpa; } operand; + int i; if (!(vmx->nested.msrs.secondary_ctls_high & SECONDARY_EXEC_ENABLE_EPT) || @@ -5148,27 +5233,49 @@ static int handle_invept(struct kvm_vcpu *vcpu) /* According to the Intel VMX instruction reference, the memory * operand is read even if it isn't needed (e.g., for type==global) */ - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), vmx_instruction_info, false, sizeof(operand), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } - switch (type) { - case VMX_EPT_EXTENT_GLOBAL: - case VMX_EPT_EXTENT_CONTEXT: /* - * TODO: Sync the necessary shadow EPT roots here, rather than - * at the next emulated VM-entry. + * Nested EPT roots are always held through guest_mmu, + * not root_mmu. */ + mmu = &vcpu->arch.guest_mmu; + + switch (type) { + case VMX_EPT_EXTENT_CONTEXT: + if (!nested_vmx_check_eptp(vcpu, operand.eptp)) + return nested_vmx_failValid(vcpu, + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); + + roots_to_free = 0; + if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd, + operand.eptp)) + roots_to_free |= KVM_MMU_ROOT_CURRENT; + + for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) { + if (nested_ept_root_matches(mmu->prev_roots[i].hpa, + mmu->prev_roots[i].pgd, + operand.eptp)) + roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); + } + break; + case VMX_EPT_EXTENT_GLOBAL: + roots_to_free = KVM_MMU_ROOTS_ALL; break; default: BUG(); break; } + if (roots_to_free) + kvm_mmu_free_roots(vcpu, mmu, roots_to_free); + return nested_vmx_succeed(vcpu); } @@ -5208,11 +5315,11 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) /* according to the intel vmx instruction reference, the memory * operand is read even if it isn't needed (e.g., for type==global) */ - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), vmx_instruction_info, false, sizeof(operand), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } if (operand.vpid >> 16) @@ -5226,27 +5333,37 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) is_noncanonical_address(operand.gla, vcpu)) return nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); - if (cpu_has_vmx_invvpid_individual_addr()) { - __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, - vpid02, operand.gla); - } else - __vmx_flush_tlb(vcpu, vpid02, false); + vpid_sync_vcpu_addr(vpid02, operand.gla); break; case VMX_VPID_EXTENT_SINGLE_CONTEXT: case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL: if (!operand.vpid) return nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); - __vmx_flush_tlb(vcpu, vpid02, false); + vpid_sync_context(vpid02); break; case VMX_VPID_EXTENT_ALL_CONTEXT: - __vmx_flush_tlb(vcpu, vpid02, false); + vpid_sync_context(vpid02); break; default: WARN_ON_ONCE(1); return kvm_skip_emulated_instruction(vcpu); } + /* + * Sync the shadow page tables if EPT is disabled, L1 is invalidating + * linear mappings for L2 (tagged with L2's VPID). Free all roots as + * VPIDs are not tracked in the MMU role. + * + * Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share + * an MMU when EPT is disabled. + * + * TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR. + */ + if (!enable_ept) + kvm_mmu_free_roots(vcpu, &vcpu->arch.root_mmu, + KVM_MMU_ROOTS_ALL); + return nested_vmx_succeed(vcpu); } @@ -5327,8 +5444,8 @@ static int handle_vmfunc(struct kvm_vcpu *vcpu) fail: nested_vmx_vmexit(vcpu, vmx->exit_reason, - vmcs_read32(VM_EXIT_INTR_INFO), - vmcs_readl(EXIT_QUALIFICATION)); + vmx_get_intr_info(vcpu), + vmx_get_exit_qual(vcpu)); return 1; } @@ -5379,7 +5496,7 @@ static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu, if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS)) return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING); - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); port = exit_qualification >> 16; size = (exit_qualification & 7) + 1; @@ -5433,7 +5550,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu, static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); int cr = exit_qualification & 15; int reg; unsigned long val; @@ -5449,15 +5566,6 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu, return true; break; case 3: - if ((vmcs12->cr3_target_count >= 1 && - vmcs12->cr3_target_value0 == val) || - (vmcs12->cr3_target_count >= 2 && - vmcs12->cr3_target_value1 == val) || - (vmcs12->cr3_target_count >= 3 && - vmcs12->cr3_target_value2 == val) || - (vmcs12->cr3_target_count >= 4 && - vmcs12->cr3_target_value3 == val)) - return false; if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING)) return true; break; @@ -5551,49 +5659,85 @@ static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12) } /* - * Return true if we should exit from L2 to L1 to handle an exit, or false if we - * should handle it ourselves in L0 (and then continue L2). Only call this - * when in is_guest_mode (L2). + * Return true if L0 wants to handle an exit from L2 regardless of whether or not + * L1 wants the exit. Only call this when in is_guest_mode (L2). */ -bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) +static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason) { - u32 intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - struct vcpu_vmx *vmx = to_vmx(vcpu); - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - - WARN_ON_ONCE(vmx->nested.nested_run_pending); - - if (unlikely(vmx->fail)) { - trace_kvm_nested_vmenter_failed( - "hardware VM-instruction error: ", - vmcs_read32(VM_INSTRUCTION_ERROR)); - return true; - } - - trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, - vmcs_readl(EXIT_QUALIFICATION), - vmx->idt_vectoring_info, - intr_info, - vmcs_read32(VM_EXIT_INTR_ERROR_CODE), - KVM_ISA_VMX); + u32 intr_info; switch (exit_reason) { case EXIT_REASON_EXCEPTION_NMI: + intr_info = vmx_get_intr_info(vcpu); if (is_nmi(intr_info)) - return false; + return true; else if (is_page_fault(intr_info)) - return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept; + return vcpu->arch.apf.host_apf_flags || !enable_ept; else if (is_debug(intr_info) && vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return false; + return true; else if (is_breakpoint(intr_info) && vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) - return false; + return true; + return false; + case EXIT_REASON_EXTERNAL_INTERRUPT: + return true; + case EXIT_REASON_MCE_DURING_VMENTRY: + return true; + case EXIT_REASON_EPT_VIOLATION: + /* + * L0 always deals with the EPT violation. If nested EPT is + * used, and the nested mmu code discovers that the address is + * missing in the guest EPT table (EPT12), the EPT violation + * will be injected with nested_ept_inject_page_fault() + */ + return true; + case EXIT_REASON_EPT_MISCONFIG: + /* + * L2 never uses directly L1's EPT, but rather L0's own EPT + * table (shadow on EPT) or a merged EPT table that L0 built + * (EPT on EPT). So any problems with the structure of the + * table is L0's fault. + */ + return true; + case EXIT_REASON_PREEMPTION_TIMER: + return true; + case EXIT_REASON_PML_FULL: + /* We emulate PML support to L1. */ + return true; + case EXIT_REASON_VMFUNC: + /* VM functions are emulated through L2->L0 vmexits. */ + return true; + case EXIT_REASON_ENCLS: + /* SGX is never exposed to L1 */ + return true; + default: + break; + } + return false; +} + +/* + * Return 1 if L1 wants to intercept an exit from L2. Only call this when in + * is_guest_mode (L2). + */ +static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu, u32 exit_reason) +{ + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + u32 intr_info; + + switch (exit_reason) { + case EXIT_REASON_EXCEPTION_NMI: + intr_info = vmx_get_intr_info(vcpu); + if (is_nmi(intr_info)) + return true; + else if (is_page_fault(intr_info)) + return true; return vmcs12->exception_bitmap & (1u << (intr_info & INTR_INFO_VECTOR_MASK)); case EXIT_REASON_EXTERNAL_INTERRUPT: - return false; + return nested_exit_on_intr(vcpu); case EXIT_REASON_TRIPLE_FAULT: return true; case EXIT_REASON_INTERRUPT_WINDOW: @@ -5658,7 +5802,7 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) nested_cpu_has2(vmcs12, SECONDARY_EXEC_PAUSE_LOOP_EXITING); case EXIT_REASON_MCE_DURING_VMENTRY: - return false; + return true; case EXIT_REASON_TPR_BELOW_THRESHOLD: return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW); case EXIT_REASON_APIC_ACCESS: @@ -5670,22 +5814,6 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) * delivery" only come from vmcs12. */ return true; - case EXIT_REASON_EPT_VIOLATION: - /* - * L0 always deals with the EPT violation. If nested EPT is - * used, and the nested mmu code discovers that the address is - * missing in the guest EPT table (EPT12), the EPT violation - * will be injected with nested_ept_inject_page_fault() - */ - return false; - case EXIT_REASON_EPT_MISCONFIG: - /* - * L2 never uses directly L1's EPT, but rather L0's own EPT - * table (shadow on EPT) or a merged EPT table that L0 built - * (EPT on EPT). So any problems with the structure of the - * table is L0's fault. - */ - return false; case EXIT_REASON_INVPCID: return nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) && @@ -5702,17 +5830,6 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) * the XSS exit bitmap in vmcs12. */ return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES); - case EXIT_REASON_PREEMPTION_TIMER: - return false; - case EXIT_REASON_PML_FULL: - /* We emulate PML support to L1. */ - return false; - case EXIT_REASON_VMFUNC: - /* VM functions are emulated through L2->L0 vmexits. */ - return false; - case EXIT_REASON_ENCLS: - /* SGX is never exposed to L1 */ - return false; case EXIT_REASON_UMWAIT: case EXIT_REASON_TPAUSE: return nested_cpu_has2(vmcs12, @@ -5722,6 +5839,67 @@ bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason) } } +/* + * Conditionally reflect a VM-Exit into L1. Returns %true if the VM-Exit was + * reflected into L1. + */ +bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 exit_reason = vmx->exit_reason; + unsigned long exit_qual; + u32 exit_intr_info; + + WARN_ON_ONCE(vmx->nested.nested_run_pending); + + /* + * Late nested VM-Fail shares the same flow as nested VM-Exit since KVM + * has already loaded L2's state. + */ + if (unlikely(vmx->fail)) { + trace_kvm_nested_vmenter_failed( + "hardware VM-instruction error: ", + vmcs_read32(VM_INSTRUCTION_ERROR)); + exit_intr_info = 0; + exit_qual = 0; + goto reflect_vmexit; + } + + exit_intr_info = vmx_get_intr_info(vcpu); + exit_qual = vmx_get_exit_qual(vcpu); + + trace_kvm_nested_vmexit(kvm_rip_read(vcpu), exit_reason, exit_qual, + vmx->idt_vectoring_info, exit_intr_info, + vmcs_read32(VM_EXIT_INTR_ERROR_CODE), + KVM_ISA_VMX); + + /* If L0 (KVM) wants the exit, it trumps L1's desires. */ + if (nested_vmx_l0_wants_exit(vcpu, exit_reason)) + return false; + + /* If L1 doesn't want the exit, handle it in L0. */ + if (!nested_vmx_l1_wants_exit(vcpu, exit_reason)) + return false; + + /* + * vmcs.VM_EXIT_INTR_INFO is only valid for EXCEPTION_NMI exits. For + * EXTERNAL_INTERRUPT, the value for vmcs12->vm_exit_intr_info would + * need to be synthesized by querying the in-kernel LAPIC, but external + * interrupts are never reflected to L1 so it's a non-issue. + */ + if ((exit_intr_info & + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == + (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + + vmcs12->vm_exit_intr_error_code = + vmcs_read32(VM_EXIT_INTR_ERROR_CODE); + } + +reflect_vmexit: + nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, exit_qual); + return true; +} static int vmx_get_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state __user *user_kvm_nested_state, @@ -5733,8 +5911,10 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, .flags = 0, .format = KVM_STATE_NESTED_FORMAT_VMX, .size = sizeof(kvm_state), + .hdr.vmx.flags = 0, .hdr.vmx.vmxon_pa = -1ull, .hdr.vmx.vmcs12_pa = -1ull, + .hdr.vmx.preemption_timer_deadline = 0, }; struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = &user_kvm_nested_state->data.vmx[0]; @@ -5776,6 +5956,14 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, if (vmx->nested.mtf_pending) kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING; + + if (nested_cpu_has_preemption_timer(vmcs12) && + vmx->nested.has_preemption_timer_deadline) { + kvm_state.hdr.vmx.flags |= + KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE; + kvm_state.hdr.vmx.preemption_timer_deadline = + vmx->nested.preemption_timer_deadline; + } } } @@ -5821,7 +6009,6 @@ static int vmx_get_nested_state(struct kvm_vcpu *vcpu, get_shadow_vmcs12(vcpu), VMCS12_SIZE)) return -EFAULT; } - out: return kvm_state.size; } @@ -5844,7 +6031,7 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); struct vmcs12 *vmcs12; - u32 exit_qual; + enum vm_entry_failure_code ignored; struct kvm_vmx_nested_state_data __user *user_vmx_nested_state = &user_kvm_nested_state->data.vmx[0]; int ret; @@ -5983,9 +6170,15 @@ static int vmx_set_nested_state(struct kvm_vcpu *vcpu, goto error_guest_mode; } + if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) { + vmx->nested.has_preemption_timer_deadline = true; + vmx->nested.preemption_timer_deadline = + kvm_state->hdr.vmx.preemption_timer_deadline; + } + if (nested_vmx_check_controls(vcpu, vmcs12) || nested_vmx_check_host_state(vcpu, vmcs12) || - nested_vmx_check_guest_state(vcpu, vmcs12, &exit_qual)) + nested_vmx_check_guest_state(vcpu, vmcs12, &ignored)) goto error_guest_mode; vmx->nested.dirty_vmcs12 = true; @@ -6031,7 +6224,7 @@ void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps) * reason is that if one of these bits is necessary, it will appear * in vmcs01 and prepare_vmcs02, when it bitwise-or's the control * fields of vmcs01 and vmcs02, will turn these bits off - and - * nested_vmx_exit_reflected() will not pass related exits to L1. + * nested_vmx_l1_wants_exit() will not pass related exits to L1. * These rules have exceptions below. */ @@ -6259,8 +6452,7 @@ void nested_vmx_hardware_unsetup(void) } } -__init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops, - int (*exit_handlers[])(struct kvm_vcpu *)) +__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) { int i; @@ -6296,12 +6488,15 @@ __init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops, exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid; exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc; - ops->check_nested_events = vmx_check_nested_events; - ops->get_nested_state = vmx_get_nested_state; - ops->set_nested_state = vmx_set_nested_state; - ops->get_vmcs12_pages = nested_get_vmcs12_pages; - ops->nested_enable_evmcs = nested_enable_evmcs; - ops->nested_get_evmcs_version = nested_get_evmcs_version; - return 0; } + +struct kvm_x86_nested_ops vmx_nested_ops = { + .check_events = vmx_check_nested_events, + .hv_timer_pending = nested_vmx_preemption_timer_pending, + .get_state = vmx_get_nested_state, + .set_state = vmx_set_nested_state, + .get_vmcs12_pages = nested_get_vmcs12_pages, + .enable_evmcs = nested_enable_evmcs, + .get_evmcs_version = nested_get_evmcs_version, +}; diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h index ac56aefa49e3..758bccc26cf9 100644 --- a/arch/x86/kvm/vmx/nested.h +++ b/arch/x86/kvm/vmx/nested.h @@ -19,14 +19,13 @@ enum nvmx_vmentry_status { void vmx_leave_nested(struct kvm_vcpu *vcpu); void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps); void nested_vmx_hardware_unsetup(void); -__init int nested_vmx_hardware_setup(struct kvm_x86_ops *ops, - int (*exit_handlers[])(struct kvm_vcpu *)); +__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)); void nested_vmx_set_vmcs_shadowing_bitmap(void); void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu); enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry); -bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason); -void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, +bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu); +void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason, u32 exit_intr_info, unsigned long exit_qualification); void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu); int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data); @@ -62,6 +61,13 @@ static inline int vmx_has_valid_vmcs12(struct kvm_vcpu *vcpu) vmx->nested.hv_evmcs; } +static inline u16 nested_get_vpid02(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + return vmx->nested.vpid02 ? vmx->nested.vpid02 : vmx->vpid; +} + static inline unsigned long nested_ept_get_eptp(struct kvm_vcpu *vcpu) { /* return the page table to be shadowed - in our case, EPT12 */ @@ -74,34 +80,6 @@ static inline bool nested_ept_ad_enabled(struct kvm_vcpu *vcpu) } /* - * Reflect a VM Exit into L1. - */ -static inline int nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu, - u32 exit_reason) -{ - u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); - - /* - * At this point, the exit interruption info in exit_intr_info - * is only valid for EXCEPTION_NMI exits. For EXTERNAL_INTERRUPT - * we need to query the in-kernel LAPIC. - */ - WARN_ON(exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT); - if ((exit_intr_info & - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) == - (INTR_INFO_VALID_MASK | INTR_INFO_DELIVER_CODE_MASK)) { - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - - vmcs12->vm_exit_intr_error_code = - vmcs_read32(VM_EXIT_INTR_ERROR_CODE); - } - - nested_vmx_vmexit(vcpu, exit_reason, exit_intr_info, - vmcs_readl(EXIT_QUALIFICATION)); - return 1; -} - -/* * Return the cr0 value that a nested guest would read. This is a combination * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by * its hypervisor (cr0_read_shadow). @@ -246,6 +224,11 @@ static inline bool nested_cpu_has_save_preemption_timer(struct vmcs12 *vmcs12) VM_EXIT_SAVE_VMX_PREEMPTION_TIMER; } +static inline bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) +{ + return nested_cpu_has_nmi_exiting(get_vmcs12(vcpu)); +} + /* * In nested virtualization, check if L1 asked to exit on external interrupts. * For most existing hypervisors, this will always return true. @@ -299,4 +282,6 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val) #define nested_guest_cr4_valid nested_cr4_valid #define nested_host_cr4_valid nested_cr4_valid +extern struct kvm_x86_nested_ops vmx_nested_ops; + #endif /* __KVM_X86_VMX_NESTED_H */ diff --git a/arch/x86/kvm/vmx/ops.h b/arch/x86/kvm/vmx/ops.h index 19717d0a1100..5f1ac002b4b6 100644 --- a/arch/x86/kvm/vmx/ops.h +++ b/arch/x86/kvm/vmx/ops.h @@ -268,42 +268,38 @@ static inline void __invept(unsigned long ext, u64 eptp, gpa_t gpa) vmx_asm2(invept, "r"(ext), "m"(operand), ext, eptp, gpa); } -static inline bool vpid_sync_vcpu_addr(int vpid, gva_t addr) -{ - if (vpid == 0) - return true; - - if (cpu_has_vmx_invvpid_individual_addr()) { - __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr); - return true; - } - - return false; -} - static inline void vpid_sync_vcpu_single(int vpid) { if (vpid == 0) return; - if (cpu_has_vmx_invvpid_single()) - __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0); + __invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vpid, 0); } static inline void vpid_sync_vcpu_global(void) { - if (cpu_has_vmx_invvpid_global()) - __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); + __invvpid(VMX_VPID_EXTENT_ALL_CONTEXT, 0, 0); } static inline void vpid_sync_context(int vpid) { if (cpu_has_vmx_invvpid_single()) vpid_sync_vcpu_single(vpid); - else + else if (vpid != 0) vpid_sync_vcpu_global(); } +static inline void vpid_sync_vcpu_addr(int vpid, gva_t addr) +{ + if (vpid == 0) + return; + + if (cpu_has_vmx_invvpid_individual_addr()) + __invvpid(VMX_VPID_EXTENT_INDIVIDUAL_ADDR, vpid, addr); + else + vpid_sync_context(vpid); +} + static inline void ept_sync_global(void) { __invept(VMX_EPT_EXTENT_GLOBAL, 0, 0); diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 7c857737b438..d33d890b605f 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -18,6 +18,8 @@ #include "nested.h" #include "pmu.h" +#define MSR_PMC_FULL_WIDTH_BIT (MSR_IA32_PMC0 - MSR_IA32_PERFCTR0) + static struct kvm_event_hw_type_mapping intel_arch_events[] = { /* Index must match CPUID 0x0A.EBX bit vector */ [0] = { 0x3c, 0x00, PERF_COUNT_HW_CPU_CYCLES }, @@ -150,6 +152,22 @@ static struct kvm_pmc *intel_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu, return &counters[array_index_nospec(idx, num_counters)]; } +static inline bool fw_writes_is_enabled(struct kvm_vcpu *vcpu) +{ + if (!guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + return false; + + return vcpu->arch.perf_capabilities & PMU_CAP_FW_WRITES; +} + +static inline struct kvm_pmc *get_fw_gp_pmc(struct kvm_pmu *pmu, u32 msr) +{ + if (!fw_writes_is_enabled(pmu_to_vcpu(pmu))) + return NULL; + + return get_gp_pmc(pmu, msr, MSR_IA32_PMC0); +} + static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); @@ -162,10 +180,13 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr) case MSR_CORE_PERF_GLOBAL_OVF_CTRL: ret = pmu->version > 1; break; + case MSR_IA32_PERF_CAPABILITIES: + ret = guest_cpuid_has(vcpu, X86_FEATURE_PDCM); + break; default: ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) || get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0) || - get_fixed_pmc(pmu, msr); + get_fixed_pmc(pmu, msr) || get_fw_gp_pmc(pmu, msr); break; } @@ -184,35 +205,45 @@ static struct kvm_pmc *intel_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr) return pmc; } -static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *data) +static int intel_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmc *pmc; + u32 msr = msr_info->index; switch (msr) { case MSR_CORE_PERF_FIXED_CTR_CTRL: - *data = pmu->fixed_ctr_ctrl; + msr_info->data = pmu->fixed_ctr_ctrl; return 0; case MSR_CORE_PERF_GLOBAL_STATUS: - *data = pmu->global_status; + msr_info->data = pmu->global_status; return 0; case MSR_CORE_PERF_GLOBAL_CTRL: - *data = pmu->global_ctrl; + msr_info->data = pmu->global_ctrl; return 0; case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - *data = pmu->global_ovf_ctrl; + msr_info->data = pmu->global_ovf_ctrl; + return 0; + case MSR_IA32_PERF_CAPABILITIES: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + return 1; + msr_info->data = vcpu->arch.perf_capabilities; return 0; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || + (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { u64 val = pmc_read_counter(pmc); - *data = val & pmu->counter_bitmask[KVM_PMC_GP]; + msr_info->data = + val & pmu->counter_bitmask[KVM_PMC_GP]; return 0; } else if ((pmc = get_fixed_pmc(pmu, msr))) { u64 val = pmc_read_counter(pmc); - *data = val & pmu->counter_bitmask[KVM_PMC_FIXED]; + msr_info->data = + val & pmu->counter_bitmask[KVM_PMC_FIXED]; return 0; } else if ((pmc = get_gp_pmc(pmu, msr, MSR_P6_EVNTSEL0))) { - *data = pmc->eventsel; + msr_info->data = pmc->eventsel; return 0; } } @@ -258,9 +289,22 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 0; } break; + case MSR_IA32_PERF_CAPABILITIES: + if (!msr_info->host_initiated) + return 1; + if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM) ? + (data & ~vmx_get_perf_capabilities()) : data) + return 1; + vcpu->arch.perf_capabilities = data; + return 0; default: - if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0))) { - if (!msr_info->host_initiated) + if ((pmc = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0)) || + (pmc = get_gp_pmc(pmu, msr, MSR_IA32_PMC0))) { + if ((msr & MSR_PMC_FULL_WIDTH_BIT) && + (data & ~pmu->counter_bitmask[KVM_PMC_GP])) + return 1; + if (!msr_info->host_initiated && + !(msr & MSR_PMC_FULL_WIDTH_BIT)) data = (s64)(s32)data; pmc->counter += data - pmc_read_counter(pmc); if (pmc->perf_event) @@ -300,6 +344,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) pmu->counter_bitmask[KVM_PMC_FIXED] = 0; pmu->version = 0; pmu->reserved_bits = 0xffffffff00200000ull; + vcpu->arch.perf_capabilities = 0; entry = kvm_find_cpuid_entry(vcpu, 0xa, 0); if (!entry) @@ -312,6 +357,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu) return; perf_get_x86_pmu_capability(&x86_pmu); + if (guest_cpuid_has(vcpu, X86_FEATURE_PDCM)) + vcpu->arch.perf_capabilities = vmx_get_perf_capabilities(); pmu->nr_arch_gp_counters = min_t(int, eax.split.num_counters, x86_pmu.num_counters_gp); diff --git a/arch/x86/kvm/vmx/vmcs.h b/arch/x86/kvm/vmx/vmcs.h index 481ad879197b..5c0ff80b85c0 100644 --- a/arch/x86/kvm/vmx/vmcs.h +++ b/arch/x86/kvm/vmx/vmcs.h @@ -19,7 +19,7 @@ struct vmcs_hdr { struct vmcs { struct vmcs_hdr hdr; u32 abort; - char data[0]; + char data[]; }; DECLARE_PER_CPU(struct vmcs *, current_vmcs); diff --git a/arch/x86/kvm/vmx/vmcs12.c b/arch/x86/kvm/vmx/vmcs12.c index 53dfb401316d..c8e51c004f78 100644 --- a/arch/x86/kvm/vmx/vmcs12.c +++ b/arch/x86/kvm/vmx/vmcs12.c @@ -115,10 +115,6 @@ const unsigned short vmcs_field_to_offset_table[] = { FIELD(CR4_GUEST_HOST_MASK, cr4_guest_host_mask), FIELD(CR0_READ_SHADOW, cr0_read_shadow), FIELD(CR4_READ_SHADOW, cr4_read_shadow), - FIELD(CR3_TARGET_VALUE0, cr3_target_value0), - FIELD(CR3_TARGET_VALUE1, cr3_target_value1), - FIELD(CR3_TARGET_VALUE2, cr3_target_value2), - FIELD(CR3_TARGET_VALUE3, cr3_target_value3), FIELD(EXIT_QUALIFICATION, exit_qualification), FIELD(GUEST_LINEAR_ADDRESS, guest_linear_address), FIELD(GUEST_CR0, guest_cr0), diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h index d0c6df373f67..80232daf00ff 100644 --- a/arch/x86/kvm/vmx/vmcs12.h +++ b/arch/x86/kvm/vmx/vmcs12.h @@ -80,10 +80,7 @@ struct __packed vmcs12 { natural_width cr4_guest_host_mask; natural_width cr0_read_shadow; natural_width cr4_read_shadow; - natural_width cr3_target_value0; - natural_width cr3_target_value1; - natural_width cr3_target_value2; - natural_width cr3_target_value3; + natural_width dead_space[4]; /* Last remnants of cr3_target_value[0-3]. */ natural_width exit_qualification; natural_width guest_linear_address; natural_width guest_cr0; @@ -263,10 +260,7 @@ static inline void vmx_check_vmcs12_offsets(void) CHECK_OFFSET(cr4_guest_host_mask, 352); CHECK_OFFSET(cr0_read_shadow, 360); CHECK_OFFSET(cr4_read_shadow, 368); - CHECK_OFFSET(cr3_target_value0, 376); - CHECK_OFFSET(cr3_target_value1, 384); - CHECK_OFFSET(cr3_target_value2, 392); - CHECK_OFFSET(cr3_target_value3, 400); + CHECK_OFFSET(dead_space, 376); CHECK_OFFSET(exit_qualification, 408); CHECK_OFFSET(guest_linear_address, 416); CHECK_OFFSET(guest_cr0, 424); diff --git a/arch/x86/kvm/vmx/vmenter.S b/arch/x86/kvm/vmx/vmenter.S index 51d1a82742fd..e0a182cb3cdd 100644 --- a/arch/x86/kvm/vmx/vmenter.S +++ b/arch/x86/kvm/vmx/vmenter.S @@ -166,13 +166,13 @@ SYM_FUNC_START(__vmx_vcpu_run) mov WORD_SIZE(%_ASM_SP), %_ASM_AX /* Save all guest registers, including RAX from the stack */ - __ASM_SIZE(pop) VCPU_RAX(%_ASM_AX) - mov %_ASM_CX, VCPU_RCX(%_ASM_AX) - mov %_ASM_DX, VCPU_RDX(%_ASM_AX) - mov %_ASM_BX, VCPU_RBX(%_ASM_AX) - mov %_ASM_BP, VCPU_RBP(%_ASM_AX) - mov %_ASM_SI, VCPU_RSI(%_ASM_AX) - mov %_ASM_DI, VCPU_RDI(%_ASM_AX) + pop VCPU_RAX(%_ASM_AX) + mov %_ASM_CX, VCPU_RCX(%_ASM_AX) + mov %_ASM_DX, VCPU_RDX(%_ASM_AX) + mov %_ASM_BX, VCPU_RBX(%_ASM_AX) + mov %_ASM_BP, VCPU_RBP(%_ASM_AX) + mov %_ASM_SI, VCPU_RSI(%_ASM_AX) + mov %_ASM_DI, VCPU_RDI(%_ASM_AX) #ifdef CONFIG_X86_64 mov %r8, VCPU_R8 (%_ASM_AX) mov %r9, VCPU_R9 (%_ASM_AX) diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c index 89c766fad889..170cc76a581f 100644 --- a/arch/x86/kvm/vmx/vmx.c +++ b/arch/x86/kvm/vmx/vmx.c @@ -437,6 +437,11 @@ static const struct kvm_vmx_segment_field { VMX_SEGMENT_FIELD(LDTR), }; +static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +{ + vmx->segment_cache.bitmask = 0; +} + static unsigned long host_idt_base; /* @@ -1306,10 +1311,12 @@ after_clear_sn: pi_set_on(pi_desc); } -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, + struct loaded_vmcs *buddy) { struct vcpu_vmx *vmx = to_vmx(vcpu); bool already_loaded = vmx->loaded_vmcs->cpu == cpu; + struct vmcs *prev; if (!already_loaded) { loaded_vmcs_clear(vmx->loaded_vmcs); @@ -1328,16 +1335,28 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) local_irq_enable(); } - if (per_cpu(current_vmcs, cpu) != vmx->loaded_vmcs->vmcs) { + prev = per_cpu(current_vmcs, cpu); + if (prev != vmx->loaded_vmcs->vmcs) { per_cpu(current_vmcs, cpu) = vmx->loaded_vmcs->vmcs; vmcs_load(vmx->loaded_vmcs->vmcs); - indirect_branch_prediction_barrier(); + + /* + * No indirect branch prediction barrier needed when switching + * the active VMCS within a guest, e.g. on nested VM-Enter. + * The L1 VMM can protect itself with retpolines, IBPB or IBRS. + */ + if (!buddy || WARN_ON_ONCE(buddy->vmcs != prev)) + indirect_branch_prediction_barrier(); } if (!already_loaded) { void *gdt = get_current_gdt_ro(); unsigned long sysenter_esp; + /* + * Flush all EPTP/VPID contexts, the new pCPU may have stale + * TLB entries from its previous association with the vCPU. + */ kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); /* @@ -1364,11 +1383,11 @@ void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu) * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. */ -void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) +static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - vmx_vcpu_load_vmcs(vcpu, cpu); + vmx_vcpu_load_vmcs(vcpu, cpu, NULL); vmx_vcpu_pi_load(vcpu, cpu); @@ -1546,7 +1565,7 @@ static int vmx_rtit_ctl_check(struct kvm_vcpu *vcpu, u64 data) static int skip_emulated_instruction(struct kvm_vcpu *vcpu) { - unsigned long rip; + unsigned long rip, orig_rip; /* * Using VMCS.VM_EXIT_INSTRUCTION_LEN on EPT misconfig depends on @@ -1558,8 +1577,17 @@ static int skip_emulated_instruction(struct kvm_vcpu *vcpu) */ if (!static_cpu_has(X86_FEATURE_HYPERVISOR) || to_vmx(vcpu)->exit_reason != EXIT_REASON_EPT_MISCONFIG) { - rip = kvm_rip_read(vcpu); - rip += vmcs_read32(VM_EXIT_INSTRUCTION_LEN); + orig_rip = kvm_rip_read(vcpu); + rip = orig_rip + vmcs_read32(VM_EXIT_INSTRUCTION_LEN); +#ifdef CONFIG_X86_64 + /* + * We need to mask out the high 32 bits of RIP if not in 64-bit + * mode, but just finding out that we are in 64-bit mode is + * quite expensive. Only do it if there was a carry. + */ + if (unlikely(((rip ^ orig_rip) >> 31) == 3) && !is_64_bit_mode(vcpu)) + rip = (u32)rip; +#endif kvm_rip_write(vcpu, rip); } else { if (!kvm_emulate_instruction(vcpu, EMULTYPE_SKIP)) @@ -1712,17 +1740,6 @@ static void setup_msrs(struct vcpu_vmx *vmx) vmx_update_msr_bitmap(&vmx->vcpu); } -static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) -{ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - - if (is_guest_mode(vcpu) && - (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)) - return vcpu->arch.tsc_offset - vmcs12->tsc_offset; - - return vcpu->arch.tsc_offset; -} - static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vmcs12 *vmcs12 = get_vmcs12(vcpu); @@ -1771,6 +1788,9 @@ static int vmx_get_msr_feature(struct kvm_msr_entry *msr) if (!nested) return 1; return vmx_get_vmx_msr(&vmcs_config.nested, msr->index, &msr->data); + case MSR_IA32_PERF_CAPABILITIES: + msr->data = vmx_get_perf_capabilities(); + return 0; default: return 1; } @@ -1926,6 +1946,16 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return 0; } +static u64 nested_vmx_truncate_sysenter_addr(struct kvm_vcpu *vcpu, + u64 data) +{ +#ifdef CONFIG_X86_64 + if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) + return (u32)data; +#endif + return (unsigned long)data; +} + /* * Writes msr value into the appropriate "register". * Returns 0 on success, non-0 otherwise. @@ -1963,13 +1993,17 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) vmcs_write32(GUEST_SYSENTER_CS, data); break; case MSR_IA32_SYSENTER_EIP: - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + data = nested_vmx_truncate_sysenter_addr(vcpu, data); get_vmcs12(vcpu)->guest_sysenter_eip = data; + } vmcs_writel(GUEST_SYSENTER_EIP, data); break; case MSR_IA32_SYSENTER_ESP: - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + data = nested_vmx_truncate_sysenter_addr(vcpu, data); get_vmcs12(vcpu)->guest_sysenter_esp = data; + } vmcs_writel(GUEST_SYSENTER_ESP, data); break; case MSR_IA32_DEBUGCTLMSR: @@ -2187,6 +2221,8 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) { + unsigned long guest_owned_bits; + kvm_register_mark_available(vcpu, reg); switch (reg) { @@ -2200,10 +2236,22 @@ static void vmx_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) if (enable_ept) ept_save_pdptrs(vcpu); break; + case VCPU_EXREG_CR0: + guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; + + vcpu->arch.cr0 &= ~guest_owned_bits; + vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & guest_owned_bits; + break; case VCPU_EXREG_CR3: if (enable_unrestricted_guest || (enable_ept && is_paging(vcpu))) vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); break; + case VCPU_EXREG_CR4: + guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; + + vcpu->arch.cr4 &= ~guest_owned_bits; + vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & guest_owned_bits; + break; default: WARN_ON_ONCE(1); break; @@ -2837,34 +2885,64 @@ static void exit_lmode(struct kvm_vcpu *vcpu) #endif -static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) +static void vmx_flush_tlb_all(struct kvm_vcpu *vcpu) { - int vpid = to_vmx(vcpu)->vpid; - - if (!vpid_sync_vcpu_addr(vpid, addr)) - vpid_sync_context(vpid); + struct vcpu_vmx *vmx = to_vmx(vcpu); /* - * If VPIDs are not supported or enabled, then the above is a no-op. - * But we don't really need a TLB flush in that case anyway, because - * each VM entry/exit includes an implicit flush when VPID is 0. + * INVEPT must be issued when EPT is enabled, irrespective of VPID, as + * the CPU is not required to invalidate guest-physical mappings on + * VM-Entry, even if VPID is disabled. Guest-physical mappings are + * associated with the root EPT structure and not any particular VPID + * (INVVPID also isn't required to invalidate guest-physical mappings). */ + if (enable_ept) { + ept_sync_global(); + } else if (enable_vpid) { + if (cpu_has_vmx_invvpid_global()) { + vpid_sync_vcpu_global(); + } else { + vpid_sync_vcpu_single(vmx->vpid); + vpid_sync_vcpu_single(vmx->nested.vpid02); + } + } } -static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) +static void vmx_flush_tlb_current(struct kvm_vcpu *vcpu) { - ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; + u64 root_hpa = vcpu->arch.mmu->root_hpa; - vcpu->arch.cr0 &= ~cr0_guest_owned_bits; - vcpu->arch.cr0 |= vmcs_readl(GUEST_CR0) & cr0_guest_owned_bits; + /* No flush required if the current context is invalid. */ + if (!VALID_PAGE(root_hpa)) + return; + + if (enable_ept) + ept_sync_context(construct_eptp(vcpu, root_hpa)); + else if (!is_guest_mode(vcpu)) + vpid_sync_context(to_vmx(vcpu)->vpid); + else + vpid_sync_context(nested_get_vpid02(vcpu)); } -static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu) +static void vmx_flush_tlb_gva(struct kvm_vcpu *vcpu, gva_t addr) { - ulong cr4_guest_owned_bits = vcpu->arch.cr4_guest_owned_bits; + /* + * vpid_sync_vcpu_addr() is a nop if vmx->vpid==0, see the comment in + * vmx_flush_tlb_guest() for an explanation of why this is ok. + */ + vpid_sync_vcpu_addr(to_vmx(vcpu)->vpid, addr); +} - vcpu->arch.cr4 &= ~cr4_guest_owned_bits; - vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & cr4_guest_owned_bits; +static void vmx_flush_tlb_guest(struct kvm_vcpu *vcpu) +{ + /* + * vpid_sync_context() is a nop if vmx->vpid==0, e.g. if enable_vpid==0 + * or a vpid couldn't be allocated for this vCPU. VM-Enter and VM-Exit + * are required to flush GVA->{G,H}PA mappings from the TLB if vpid is + * disabled (VM-Enter with vpid enabled and vpid==0 is disallowed), + * i.e. no explicit INVVPID is necessary. + */ + vpid_sync_context(to_vmx(vcpu)->vpid); } static void ept_load_pdptrs(struct kvm_vcpu *vcpu) @@ -2886,12 +2964,13 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu->arch.walk_mmu; - if (is_pae_paging(vcpu)) { - mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); - mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); - mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); - mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); - } + if (WARN_ON_ONCE(!is_pae_paging(vcpu))) + return; + + mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0); + mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1); + mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2); + mmu->pdptrs[3] = vmcs_read64(GUEST_PDPTR3); kvm_register_mark_dirty(vcpu, VCPU_EXREG_PDPTR); } @@ -2955,20 +3034,27 @@ void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; + kvm_register_mark_available(vcpu, VCPU_EXREG_CR0); /* depends on vcpu->arch.cr0 to be set to a new value */ vmx->emulation_required = emulation_required(vcpu); } -static int get_ept_level(struct kvm_vcpu *vcpu) +static int vmx_get_tdp_level(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) - return vmx_eptp_page_walk_level(nested_ept_get_eptp(vcpu)); if (cpu_has_vmx_ept_5levels() && (cpuid_maxphyaddr(vcpu) > 48)) return 5; return 4; } +static int get_ept_level(struct kvm_vcpu *vcpu) +{ + if (is_guest_mode(vcpu) && nested_cpu_has_ept(get_vmcs12(vcpu))) + return vmx_eptp_page_walk_level(nested_ept_get_eptp(vcpu)); + + return vmx_get_tdp_level(vcpu); +} + u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) { u64 eptp = VMX_EPTP_MT_WB; @@ -2983,16 +3069,15 @@ u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa) return eptp; } -void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long cr3) +void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long pgd) { struct kvm *kvm = vcpu->kvm; bool update_guest_cr3 = true; unsigned long guest_cr3; u64 eptp; - guest_cr3 = cr3; if (enable_ept) { - eptp = construct_eptp(vcpu, cr3); + eptp = construct_eptp(vcpu, pgd); vmcs_write64(EPT_POINTER, eptp); if (kvm_x86_ops.tlb_remote_flush) { @@ -3003,16 +3088,15 @@ void vmx_load_mmu_pgd(struct kvm_vcpu *vcpu, unsigned long cr3) spin_unlock(&to_kvm_vmx(kvm)->ept_pointer_lock); } - /* Loading vmcs02.GUEST_CR3 is handled by nested VM-Enter. */ - if (is_guest_mode(vcpu)) - update_guest_cr3 = false; - else if (!enable_unrestricted_guest && !is_paging(vcpu)) + if (!enable_unrestricted_guest && !is_paging(vcpu)) guest_cr3 = to_kvm_vmx(kvm)->ept_identity_map_addr; else if (test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail)) guest_cr3 = vcpu->arch.cr3; else /* vmcs01.GUEST_CR3 is already up-to-date. */ update_guest_cr3 = false; ept_load_pdptrs(vcpu); + } else { + guest_cr3 = pgd; } if (update_guest_cr3) @@ -3063,6 +3147,7 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) return 1; vcpu->arch.cr4 = cr4; + kvm_register_mark_available(vcpu, VCPU_EXREG_CR4); if (!enable_unrestricted_guest) { if (enable_ept) { @@ -3851,7 +3936,8 @@ static int vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) if (pi_test_and_set_on(&vmx->pi_desc)) return 0; - if (!kvm_vcpu_trigger_posted_interrupt(vcpu, false)) + if (vcpu != kvm_get_running_vcpu() && + !kvm_vcpu_trigger_posted_interrupt(vcpu, false)) kvm_vcpu_kick(vcpu); return 0; @@ -4147,8 +4233,7 @@ static void ept_set_mmio_spte_mask(void) * EPT Misconfigurations can be generated if the value of bits 2:0 * of an EPT paging-structure entry is 110b (write/execute). */ - kvm_mmu_set_mmio_spte_mask(VMX_EPT_RWX_MASK, - VMX_EPT_MISCONFIG_WX_VALUE, 0); + kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE, 0); } #define VMX_XSS_EXIT_BITMAP 0 @@ -4453,31 +4538,54 @@ void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) } } -static int vmx_nmi_allowed(struct kvm_vcpu *vcpu) +bool vmx_nmi_blocked(struct kvm_vcpu *vcpu) { - if (to_vmx(vcpu)->nested.nested_run_pending) - return 0; + if (is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu)) + return false; - if (!enable_vnmi && - to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) - return 0; + if (!enable_vnmi && to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked) + return true; - return !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI - | GUEST_INTR_STATE_NMI)); + return (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI | + GUEST_INTR_STATE_NMI)); } -static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu) +static int vmx_nmi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { if (to_vmx(vcpu)->nested.nested_run_pending) - return false; + return -EBUSY; + + /* An NMI must not be injected into L2 if it's supposed to VM-Exit. */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_nmi(vcpu)) + return -EBUSY; + + return !vmx_nmi_blocked(vcpu); +} +bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu) +{ if (is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) - return true; + return false; - return (vmcs_readl(GUEST_RFLAGS) & X86_EFLAGS_IF) && - !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & - (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); + return !(vmx_get_rflags(vcpu) & X86_EFLAGS_IF) || + (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & + (GUEST_INTR_STATE_STI | GUEST_INTR_STATE_MOV_SS)); +} + +static int vmx_interrupt_allowed(struct kvm_vcpu *vcpu, bool for_injection) +{ + if (to_vmx(vcpu)->nested.nested_run_pending) + return -EBUSY; + + /* + * An IRQ must not be injected into L2 if it's supposed to VM-Exit, + * e.g. if the IRQ arrived asynchronously after checking nested events. + */ + if (for_injection && is_guest_mode(vcpu) && nested_exit_on_intr(vcpu)) + return -EBUSY; + + return !vmx_interrupt_blocked(vcpu); } static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr) @@ -4518,10 +4626,8 @@ static bool rmode_exception(struct kvm_vcpu *vcpu, int vec) return false; /* fall through */ case DB_VECTOR: - if (vcpu->guest_debug & - (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)) - return false; - /* fall through */ + return !(vcpu->guest_debug & + (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP)); case DE_VECTOR: case OF_VECTOR: case BR_VECTOR: @@ -4616,7 +4722,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) u32 vect_info; vect_info = vmx->idt_vectoring_info; - intr_info = vmx->exit_intr_info; + intr_info = vmx_get_intr_info(vcpu); if (is_machine_check(intr_info) || is_nmi(intr_info)) return 1; /* handled by handle_exception_nmi_irqoff() */ @@ -4660,9 +4766,9 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) } if (is_page_fault(intr_info)) { - cr2 = vmcs_readl(EXIT_QUALIFICATION); + cr2 = vmx_get_exit_qual(vcpu); /* EPT won't cause page fault directly */ - WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept); + WARN_ON_ONCE(!vcpu->arch.apf.host_apf_flags && enable_ept); return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0); } @@ -4673,7 +4779,7 @@ static int handle_exception_nmi(struct kvm_vcpu *vcpu) switch (ex_no) { case DB_VECTOR: - dr6 = vmcs_readl(EXIT_QUALIFICATION); + dr6 = vmx_get_exit_qual(vcpu); if (!(vcpu->guest_debug & (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) { if (is_icebp(intr_info)) @@ -4740,7 +4846,7 @@ static int handle_io(struct kvm_vcpu *vcpu) int size, in, string; unsigned port; - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); string = (exit_qualification & 16) != 0; ++vcpu->stat.io_exits; @@ -4831,7 +4937,7 @@ static int handle_cr(struct kvm_vcpu *vcpu) int err; int ret; - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); cr = exit_qualification & 15; reg = (exit_qualification >> 8) & 15; switch ((exit_qualification >> 4) & 3) { @@ -4908,7 +5014,7 @@ static int handle_dr(struct kvm_vcpu *vcpu) unsigned long exit_qualification; int dr, dr7, reg; - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); dr = exit_qualification & DEBUG_REG_ACCESS_NUM; /* First, if DR does not exist, trigger UD */ @@ -5010,7 +5116,7 @@ static int handle_invd(struct kvm_vcpu *vcpu) static int handle_invlpg(struct kvm_vcpu *vcpu) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); kvm_mmu_invlpg(vcpu, exit_qualification); return kvm_skip_emulated_instruction(vcpu); @@ -5042,7 +5148,7 @@ static int handle_xsetbv(struct kvm_vcpu *vcpu) static int handle_apic_access(struct kvm_vcpu *vcpu) { if (likely(fasteoi)) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); int access_type, offset; access_type = exit_qualification & APIC_ACCESS_TYPE; @@ -5063,7 +5169,7 @@ static int handle_apic_access(struct kvm_vcpu *vcpu) static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); int vector = exit_qualification & 0xff; /* EOI-induced VM exit is trap-like and thus no need to adjust IP */ @@ -5073,7 +5179,7 @@ static int handle_apic_eoi_induced(struct kvm_vcpu *vcpu) static int handle_apic_write(struct kvm_vcpu *vcpu) { - unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + unsigned long exit_qualification = vmx_get_exit_qual(vcpu); u32 offset = exit_qualification & 0xfff; /* APIC-write VM exit is trap-like and thus no need to adjust IP */ @@ -5094,7 +5200,7 @@ static int handle_task_switch(struct kvm_vcpu *vcpu) idt_index = (vmx->idt_vectoring_info & VECTORING_INFO_VECTOR_MASK); type = (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK); - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); reason = (u32)exit_qualification >> 30; if (reason == TASK_SWITCH_GATE && idt_v) { @@ -5144,7 +5250,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) gpa_t gpa; u64 error_code; - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); /* * EPT violation happened while executing iret from NMI, @@ -5216,18 +5322,11 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) bool intr_window_requested; unsigned count = 130; - /* - * We should never reach the point where we are emulating L2 - * due to invalid guest state as that means we incorrectly - * allowed a nested VMEntry with an invalid vmcs12. - */ - WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending); - intr_window_requested = exec_controls_get(vmx) & CPU_BASED_INTR_WINDOW_EXITING; while (vmx->emulation_required && count-- != 0) { - if (intr_window_requested && vmx_interrupt_allowed(vcpu)) + if (intr_window_requested && !vmx_interrupt_blocked(vcpu)) return handle_interrupt_window(&vmx->vcpu); if (kvm_test_request(KVM_REQ_EVENT, vcpu)) @@ -5404,13 +5503,13 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) /* According to the Intel instruction reference, the memory operand * is read even if it isn't needed (e.g., for type==all) */ - if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION), + if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu), vmx_instruction_info, false, sizeof(operand), &gva)) return 1; if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) { - kvm_inject_page_fault(vcpu, &e); + kvm_inject_emulated_page_fault(vcpu, &e); return 1; } @@ -5439,11 +5538,11 @@ static int handle_invpcid(struct kvm_vcpu *vcpu) if (kvm_get_active_pcid(vcpu) == operand.pcid) { kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) - if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].cr3) + if (kvm_get_pcid(vcpu, vcpu->arch.mmu->prev_roots[i].pgd) == operand.pcid) roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i); @@ -5480,7 +5579,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) trace_kvm_pml_full(vcpu->vcpu_id); - exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + exit_qualification = vmx_get_exit_qual(vcpu); /* * PML buffer FULL happened while executing iret from NMI, @@ -5499,14 +5598,22 @@ static int handle_pml_full(struct kvm_vcpu *vcpu) return 1; } -static int handle_preemption_timer(struct kvm_vcpu *vcpu) +static fastpath_t handle_fastpath_preemption_timer(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); if (!vmx->req_immediate_exit && - !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) + !unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled)) { kvm_lapic_expired_hv_timer(vcpu); + return EXIT_FASTPATH_REENTER_GUEST; + } + + return EXIT_FASTPATH_NONE; +} +static int handle_preemption_timer(struct kvm_vcpu *vcpu) +{ + handle_fastpath_preemption_timer(vcpu); return 1; } @@ -5594,8 +5701,8 @@ static const int kvm_vmx_max_exit_handlers = static void vmx_get_exit_info(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2) { - *info1 = vmcs_readl(EXIT_QUALIFICATION); - *info2 = vmcs_read32(VM_EXIT_INTR_INFO); + *info1 = vmx_get_exit_qual(vcpu); + *info2 = vmx_get_intr_info(vcpu); } static void vmx_destroy_pml_buffer(struct vcpu_vmx *vmx) @@ -5677,7 +5784,6 @@ void dump_vmcs(void) u32 cpu_based_exec_ctrl, pin_based_exec_ctrl, secondary_exec_control; unsigned long cr4; u64 efer; - int i, n; if (!dump_invalid_vmcs) { pr_warn_ratelimited("set kvm_intel.dump_invalid_vmcs=1 to dump internal KVM state.\n"); @@ -5814,14 +5920,6 @@ void dump_vmcs(void) pr_err("PostedIntrVec = 0x%02x\n", vmcs_read16(POSTED_INTR_NV)); if ((secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)) pr_err("EPT pointer = 0x%016llx\n", vmcs_read64(EPT_POINTER)); - n = vmcs_read32(CR3_TARGET_COUNT); - for (i = 0; i + 1 < n; i += 4) - pr_err("CR3 target%u=%016lx target%u=%016lx\n", - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2), - i + 1, vmcs_readl(CR3_TARGET_VALUE0 + i * 2 + 2)); - if (i < n) - pr_err("CR3 target%u=%016lx\n", - i, vmcs_readl(CR3_TARGET_VALUE0 + i * 2)); if (secondary_exec_control & SECONDARY_EXEC_PAUSE_LOOP_EXITING) pr_err("PLE Gap=%08x Window=%08x\n", vmcs_read32(PLE_GAP), vmcs_read32(PLE_WINDOW)); @@ -5834,15 +5932,12 @@ void dump_vmcs(void) * The guest has exited. See if we can fix it or if we need userspace * assistance. */ -static int vmx_handle_exit(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion exit_fastpath) +static int vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exit_reason = vmx->exit_reason; u32 vectoring_info = vmx->idt_vectoring_info; - trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); - /* * Flush logged GPAs PML buffer, this will make dirty_bitmap more * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before @@ -5853,6 +5948,14 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, if (enable_pml) vmx_flush_pml_buffer(vcpu); + /* + * We should never reach this point with a pending nested VM-Enter, and + * more specifically emulation of L2 due to invalid guest state (see + * below) should never happen as that means we incorrectly allowed a + * nested VM-Enter with an invalid vmcs12. + */ + WARN_ON_ONCE(vmx->nested.nested_run_pending); + /* If guest state is invalid, start emulating */ if (vmx->emulation_required) return handle_invalid_guest_state(vcpu); @@ -5871,8 +5974,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, */ nested_mark_vmcs12_pages_dirty(vcpu); - if (nested_vmx_exit_reflected(vcpu, exit_reason)) - return nested_vmx_reflect_vmexit(vcpu, exit_reason); + if (nested_vmx_reflect_vmexit(vcpu)) + return 1; } if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) { @@ -5919,7 +6022,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, if (unlikely(!enable_vnmi && vmx->loaded_vmcs->soft_vnmi_blocked)) { - if (vmx_interrupt_allowed(vcpu)) { + if (!vmx_interrupt_blocked(vcpu)) { vmx->loaded_vmcs->soft_vnmi_blocked = 0; } else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL && vcpu->arch.nmi_pending) { @@ -5936,10 +6039,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu, } } - if (exit_fastpath == EXIT_FASTPATH_SKIP_EMUL_INS) { - kvm_skip_emulated_instruction(vcpu); + if (exit_fastpath != EXIT_FASTPATH_NONE) return 1; - } if (exit_reason >= kvm_vmx_max_exit_handlers) goto unexpected_vmexit; @@ -6093,7 +6194,15 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) if (flexpriority_enabled) { sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - vmx_flush_tlb(vcpu, true); + kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu); + + /* + * Flush the TLB, reloading the APIC access page will + * only do so if its physical address has changed, but + * the guest may have inserted a non-APIC mapping into + * the TLB while the APIC access page was disabled. + */ + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } break; case LAPIC_MODE_X2APIC: @@ -6107,12 +6216,32 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu) vmx_update_msr_bitmap(vcpu); } -static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) +static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu) { - if (!is_guest_mode(vcpu)) { - vmcs_write64(APIC_ACCESS_ADDR, hpa); - vmx_flush_tlb(vcpu, true); + struct page *page; + + /* Defer reload until vmcs01 is the current VMCS. */ + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.reload_vmcs01_apic_access_page = true; + return; } + + if (!(secondary_exec_controls_get(to_vmx(vcpu)) & + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) + return; + + page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); + if (is_error_page(page)) + return; + + vmcs_write64(APIC_ACCESS_ADDR, page_to_phys(page)); + vmx_flush_tlb_current(vcpu); + + /* + * Do not pin apic access page in memory, the MMU notifier + * will call us again if it is migrated or swapped out. + */ + put_page(page); } static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) @@ -6230,16 +6359,16 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx) { - vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + u32 intr_info = vmx_get_intr_info(&vmx->vcpu); /* if exit due to PF check for async PF */ - if (is_page_fault(vmx->exit_intr_info)) { - vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason(); + if (is_page_fault(intr_info)) { + vmx->vcpu.arch.apf.host_apf_flags = kvm_read_and_reset_apf_flags(); /* Handle machine checks before interrupts are enabled */ - } else if (is_machine_check(vmx->exit_intr_info)) { + } else if (is_machine_check(intr_info)) { kvm_machine_check(); /* We need to handle NMIs before interrupts are enabled */ - } else if (is_nmi(vmx->exit_intr_info)) { + } else if (is_nmi(intr_info)) { kvm_before_interrupt(&vmx->vcpu); asm("int $2"); kvm_after_interrupt(&vmx->vcpu); @@ -6254,9 +6383,8 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) unsigned long tmp; #endif gate_desc *desc; - u32 intr_info; + u32 intr_info = vmx_get_intr_info(vcpu); - intr_info = vmcs_read32(VM_EXIT_INTR_INFO); if (WARN_ONCE(!is_external_intr(intr_info), "KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info)) return; @@ -6269,13 +6397,13 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) asm volatile( #ifdef CONFIG_X86_64 - "mov %%" _ASM_SP ", %[sp]\n\t" - "and $0xfffffffffffffff0, %%" _ASM_SP "\n\t" - "push $%c[ss]\n\t" + "mov %%rsp, %[sp]\n\t" + "and $-16, %%rsp\n\t" + "push %[ss]\n\t" "push %[sp]\n\t" #endif "pushf\n\t" - __ASM_SIZE(push) " $%c[cs]\n\t" + "push %[cs]\n\t" CALL_NOSPEC : #ifdef CONFIG_X86_64 @@ -6284,7 +6412,9 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) ASM_CALL_CONSTRAINT : [thunk_target]"r"(entry), +#ifdef CONFIG_X86_64 [ss]"i"(__KERNEL_DS), +#endif [cs]"i"(__KERNEL_CS) ); @@ -6292,8 +6422,7 @@ static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff); -static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, - enum exit_fastpath_completion *exit_fastpath) +static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -6301,12 +6430,9 @@ static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu, handle_external_interrupt_irqoff(vcpu); else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI) handle_exception_nmi_irqoff(vmx); - else if (!is_guest_mode(vcpu) && - vmx->exit_reason == EXIT_REASON_MSR_WRITE) - *exit_fastpath = handle_fastpath_set_msr_irqoff(vcpu); } -static bool vmx_has_emulated_msr(int index) +static bool vmx_has_emulated_msr(u32 index) { switch (index) { case MSR_IA32_SMBASE: @@ -6337,11 +6463,8 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) if (enable_vnmi) { if (vmx->loaded_vmcs->nmi_known_unmasked) return; - /* - * Can't use vmx->exit_intr_info since we're not sure what - * the exit reason is. - */ - exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + + exit_intr_info = vmx_get_intr_info(&vmx->vcpu); unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0; vector = exit_intr_info & INTR_INFO_VECTOR_MASK; /* @@ -6508,13 +6631,27 @@ void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) } } +static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu) +{ + switch (to_vmx(vcpu)->exit_reason) { + case EXIT_REASON_MSR_WRITE: + return handle_fastpath_set_msr_irqoff(vcpu); + case EXIT_REASON_PREEMPTION_TIMER: + return handle_fastpath_preemption_timer(vcpu); + default: + return EXIT_FASTPATH_NONE; + } +} + bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched); -static void vmx_vcpu_run(struct kvm_vcpu *vcpu) +static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu) { + fastpath_t exit_fastpath; struct vcpu_vmx *vmx = to_vmx(vcpu); unsigned long cr3, cr4; +reenter_guest: /* Record the guest's net vcpu time for enforced NMI injections. */ if (unlikely(!enable_vnmi && vmx->loaded_vmcs->soft_vnmi_blocked)) @@ -6523,7 +6660,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) /* Don't enter VMX if guest state is invalid, let the exit handler start emulation until we arrive back to a valid state */ if (vmx->emulation_required) - return; + return EXIT_FASTPATH_NONE; if (vmx->ple_window_dirty) { vmx->ple_window_dirty = false; @@ -6643,12 +6780,7 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) loadsegment(es, __USER_DS); #endif - vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) - | (1 << VCPU_EXREG_RFLAGS) - | (1 << VCPU_EXREG_PDPTR) - | (1 << VCPU_EXREG_SEGMENTS) - | (1 << VCPU_EXREG_CR3)); - vcpu->arch.regs_dirty = 0; + vmx_register_cache_reset(vcpu); pt_guest_exit(vmx); @@ -6657,18 +6789,45 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu) vmx->nested.nested_run_pending = 0; vmx->idt_vectoring_info = 0; - vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON); - if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY) + if (unlikely(vmx->fail)) { + vmx->exit_reason = 0xdead; + return EXIT_FASTPATH_NONE; + } + + vmx->exit_reason = vmcs_read32(VM_EXIT_REASON); + if (unlikely((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)) kvm_machine_check(); - if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) - return; + trace_kvm_exit(vmx->exit_reason, vcpu, KVM_ISA_VMX); + + if (unlikely(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) + return EXIT_FASTPATH_NONE; vmx->loaded_vmcs->launched = 1; vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD); vmx_recover_nmi_blocking(vmx); vmx_complete_interrupts(vmx); + + if (is_guest_mode(vcpu)) + return EXIT_FASTPATH_NONE; + + exit_fastpath = vmx_exit_handlers_fastpath(vcpu); + if (exit_fastpath == EXIT_FASTPATH_REENTER_GUEST) { + if (!kvm_vcpu_exit_request(vcpu)) { + /* + * FIXME: this goto should be a loop in vcpu_enter_guest, + * but it would incur the cost of a retpoline for now. + * Revisit once static calls are available. + */ + if (vcpu->arch.apicv_active) + vmx_sync_pir_to_irr(vcpu); + goto reenter_guest; + } + exit_fastpath = EXIT_FASTPATH_EXIT_HANDLED; + } + + return exit_fastpath; } static void vmx_free_vcpu(struct kvm_vcpu *vcpu) @@ -7138,6 +7297,9 @@ static __init void vmx_set_cpu_caps(void) /* CPUID 0x80000001 */ if (!cpu_has_vmx_rdtscp()) kvm_cpu_cap_clear(X86_FEATURE_RDTSCP); + + if (vmx_waitpkg_supported()) + kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG); } static void vmx_request_immediate_exit(struct kvm_vcpu *vcpu) @@ -7253,10 +7415,6 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc, u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; - if (kvm_mwait_in_guest(vcpu->kvm) || - kvm_can_post_timer_interrupt(vcpu)) - return -EOPNOTSUPP; - vmx = to_vmx(vcpu); tscl = rdtsc(); guest_tscl = kvm_read_l1_tsc(vcpu, tscl); @@ -7599,12 +7757,12 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) ~FEAT_CTL_LMCE_ENABLED; } -static int vmx_smi_allowed(struct kvm_vcpu *vcpu) +static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) { /* we need a nested vmexit to enter SMM, postpone if run is pending */ if (to_vmx(vcpu)->nested.nested_run_pending) - return 0; - return 1; + return -EBUSY; + return !is_smm(vcpu); } static int vmx_pre_enter_smm(struct kvm_vcpu *vcpu, char *smstate) @@ -7641,9 +7799,9 @@ static int vmx_pre_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) return 0; } -static int enable_smi_window(struct kvm_vcpu *vcpu) +static void enable_smi_window(struct kvm_vcpu *vcpu) { - return 0; + /* RSM will cause a vmexit anyway. */ } static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) @@ -7656,6 +7814,16 @@ static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) return to_vmx(vcpu)->nested.vmxon; } +static void vmx_migrate_timers(struct kvm_vcpu *vcpu) +{ + if (is_guest_mode(vcpu)) { + struct hrtimer *timer = &to_vmx(vcpu)->nested.preemption_timer; + + if (hrtimer_try_to_cancel(timer) == 1) + hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); + } +} + static void hardware_unsetup(void) { if (nested) @@ -7700,8 +7868,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_segment = vmx_set_segment, .get_cpl = vmx_get_cpl, .get_cs_db_l_bits = vmx_get_cs_db_l_bits, - .decache_cr0_guest_bits = vmx_decache_cr0_guest_bits, - .decache_cr4_guest_bits = vmx_decache_cr4_guest_bits, .set_cr0 = vmx_set_cr0, .set_cr4 = vmx_set_cr4, .set_efer = vmx_set_efer, @@ -7715,8 +7881,10 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .get_rflags = vmx_get_rflags, .set_rflags = vmx_set_rflags, - .tlb_flush = vmx_flush_tlb, + .tlb_flush_all = vmx_flush_tlb_all, + .tlb_flush_current = vmx_flush_tlb_current, .tlb_flush_gva = vmx_flush_tlb_gva, + .tlb_flush_guest = vmx_flush_tlb_guest, .run = vmx_vcpu_run, .handle_exit = vmx_handle_exit, @@ -7751,7 +7919,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .set_tss_addr = vmx_set_tss_addr, .set_identity_map_addr = vmx_set_identity_map_addr, - .get_tdp_level = get_ept_level, + .get_tdp_level = vmx_get_tdp_level, .get_mt_mask = vmx_get_mt_mask, .get_exit_info = vmx_get_exit_info, @@ -7760,7 +7928,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, - .read_l1_tsc_offset = vmx_read_l1_tsc_offset, .write_l1_tsc_offset = vmx_write_l1_tsc_offset, .load_mmu_pgd = vmx_load_mmu_pgd, @@ -7782,6 +7949,7 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .post_block = vmx_post_block, .pmu_ops = &intel_pmu_ops, + .nested_ops = &vmx_nested_ops, .update_pi_irte = vmx_update_pi_irte, @@ -7797,14 +7965,9 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { .pre_leave_smm = vmx_pre_leave_smm, .enable_smi_window = enable_smi_window, - .check_nested_events = NULL, - .get_nested_state = NULL, - .set_nested_state = NULL, - .get_vmcs12_pages = NULL, - .nested_enable_evmcs = NULL, - .nested_get_evmcs_version = NULL, .need_emulation_on_page_fault = vmx_need_emulation_on_page_fault, .apic_init_signal_blocked = vmx_apic_init_signal_blocked, + .migrate_timers = vmx_migrate_timers, }; static __init int hardware_setup(void) @@ -7903,11 +8066,11 @@ static __init int hardware_setup(void) if (!enable_ept) ept_lpage_level = 0; else if (cpu_has_vmx_ept_1g_page()) - ept_lpage_level = PT_PDPE_LEVEL; + ept_lpage_level = PG_LEVEL_1G; else if (cpu_has_vmx_ept_2m_page()) - ept_lpage_level = PT_DIRECTORY_LEVEL; + ept_lpage_level = PG_LEVEL_2M; else - ept_lpage_level = PT_PAGE_TABLE_LEVEL; + ept_lpage_level = PG_LEVEL_4K; kvm_configure_mmu(enable_ept, ept_lpage_level); /* @@ -7967,8 +8130,7 @@ static __init int hardware_setup(void) nested_vmx_setup_ctls_msrs(&vmcs_config.nested, vmx_capability.ept); - r = nested_vmx_hardware_setup(&vmx_x86_ops, - kvm_vmx_exit_handlers); + r = nested_vmx_hardware_setup(kvm_vmx_exit_handlers); if (r) return r; } diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h index aab9df55336e..672c28f17e49 100644 --- a/arch/x86/kvm/vmx/vmx.h +++ b/arch/x86/kvm/vmx/vmx.h @@ -8,6 +8,7 @@ #include <asm/intel_pt.h> #include "capabilities.h" +#include "kvm_cache_regs.h" #include "ops.h" #include "vmcs.h" @@ -136,6 +137,7 @@ struct nested_vmx { bool vmcs02_initialized; bool change_vmcs01_virtual_apic_mode; + bool reload_vmcs01_apic_access_page; /* * Enlightened VMCS has been enabled. It does not mean that L1 has to @@ -167,6 +169,8 @@ struct nested_vmx { u16 posted_intr_nv; struct hrtimer preemption_timer; + u64 preemption_timer_deadline; + bool has_preemption_timer_deadline; bool preemption_timer_expired; /* to migrate it to L2 if VM_ENTRY_LOAD_DEBUG_CONTROLS is off */ @@ -208,6 +212,7 @@ struct vcpu_vmx { */ bool guest_state_loaded; + unsigned long exit_qualification; u32 exit_intr_info; u32 idt_vectoring_info; ulong rflags; @@ -317,8 +322,8 @@ struct kvm_vmx { }; bool nested_vmx_allowed(struct kvm_vcpu *vcpu); -void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu); -void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu); +void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu, + struct loaded_vmcs *buddy); int allocate_vpid(void); void free_vpid(int vpid); void vmx_set_constant_host_state(struct vcpu_vmx *vmx); @@ -341,6 +346,8 @@ void vmx_set_segment(struct kvm_vcpu *vcpu, struct kvm_segment *var, int seg); u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); void update_exception_bitmap(struct kvm_vcpu *vcpu); void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); +bool vmx_nmi_blocked(struct kvm_vcpu *vcpu); +bool vmx_interrupt_blocked(struct kvm_vcpu *vcpu); bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu); void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu); @@ -441,9 +448,18 @@ BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL) BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL) BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL) -static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx) +static inline void vmx_register_cache_reset(struct kvm_vcpu *vcpu) { - vmx->segment_cache.bitmask = 0; + vcpu->arch.regs_avail = ~((1 << VCPU_REGS_RIP) | (1 << VCPU_REGS_RSP) + | (1 << VCPU_EXREG_RFLAGS) + | (1 << VCPU_EXREG_PDPTR) + | (1 << VCPU_EXREG_SEGMENTS) + | (1 << VCPU_EXREG_CR0) + | (1 << VCPU_EXREG_CR3) + | (1 << VCPU_EXREG_CR4) + | (1 << VCPU_EXREG_EXIT_INFO_1) + | (1 << VCPU_EXREG_EXIT_INFO_2)); + vcpu->arch.regs_dirty = 0; } static inline u32 vmx_vmentry_ctrl(void) @@ -486,6 +502,28 @@ static inline struct pi_desc *vcpu_to_pi_desc(struct kvm_vcpu *vcpu) return &(to_vmx(vcpu)->pi_desc); } +static inline unsigned long vmx_get_exit_qual(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!kvm_register_is_available(vcpu, VCPU_EXREG_EXIT_INFO_1)) { + kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_1); + vmx->exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + } + return vmx->exit_qualification; +} + +static inline u32 vmx_get_intr_info(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!kvm_register_is_available(vcpu, VCPU_EXREG_EXIT_INFO_2)) { + kvm_register_mark_available(vcpu, VCPU_EXREG_EXIT_INFO_2); + vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); + } + return vmx->exit_intr_info; +} + struct vmcs *alloc_vmcs_cpu(bool shadow, int cpu, gfp_t flags); void free_vmcs(struct vmcs *vmcs); int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs); @@ -500,24 +538,6 @@ static inline struct vmcs *alloc_vmcs(bool shadow) u64 construct_eptp(struct kvm_vcpu *vcpu, unsigned long root_hpa); -static inline void __vmx_flush_tlb(struct kvm_vcpu *vcpu, int vpid, - bool invalidate_gpa) -{ - if (enable_ept && (invalidate_gpa || !enable_vpid)) { - if (!VALID_PAGE(vcpu->arch.mmu->root_hpa)) - return; - ept_sync_context(construct_eptp(vcpu, - vcpu->arch.mmu->root_hpa)); - } else { - vpid_sync_context(vpid); - } -} - -static inline void vmx_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) -{ - __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid, invalidate_gpa); -} - static inline void decache_tsc_multiplier(struct vcpu_vmx *vmx) { vmx->current_tsc_ratio = vmx->vcpu.arch.tsc_scaling_ratio; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c17e6eb9ad43..9e41b5135340 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -18,6 +18,7 @@ #include <linux/kvm_host.h> #include "irq.h" +#include "ioapic.h" #include "mmu.h" #include "i8254.h" #include "tss.h" @@ -97,9 +98,6 @@ static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); static u64 __read_mostly cr4_reserved_bits = CR4_RESERVED_BITS; -#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ -#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ - #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) @@ -194,45 +192,46 @@ u64 __read_mostly supported_xss; EXPORT_SYMBOL_GPL(supported_xss); struct kvm_stats_debugfs_item debugfs_entries[] = { - { "pf_fixed", VCPU_STAT(pf_fixed) }, - { "pf_guest", VCPU_STAT(pf_guest) }, - { "tlb_flush", VCPU_STAT(tlb_flush) }, - { "invlpg", VCPU_STAT(invlpg) }, - { "exits", VCPU_STAT(exits) }, - { "io_exits", VCPU_STAT(io_exits) }, - { "mmio_exits", VCPU_STAT(mmio_exits) }, - { "signal_exits", VCPU_STAT(signal_exits) }, - { "irq_window", VCPU_STAT(irq_window_exits) }, - { "nmi_window", VCPU_STAT(nmi_window_exits) }, - { "halt_exits", VCPU_STAT(halt_exits) }, - { "halt_successful_poll", VCPU_STAT(halt_successful_poll) }, - { "halt_attempted_poll", VCPU_STAT(halt_attempted_poll) }, - { "halt_poll_invalid", VCPU_STAT(halt_poll_invalid) }, - { "halt_wakeup", VCPU_STAT(halt_wakeup) }, - { "hypercalls", VCPU_STAT(hypercalls) }, - { "request_irq", VCPU_STAT(request_irq_exits) }, - { "irq_exits", VCPU_STAT(irq_exits) }, - { "host_state_reload", VCPU_STAT(host_state_reload) }, - { "fpu_reload", VCPU_STAT(fpu_reload) }, - { "insn_emulation", VCPU_STAT(insn_emulation) }, - { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, - { "irq_injections", VCPU_STAT(irq_injections) }, - { "nmi_injections", VCPU_STAT(nmi_injections) }, - { "req_event", VCPU_STAT(req_event) }, - { "l1d_flush", VCPU_STAT(l1d_flush) }, - { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, - { "mmu_pte_write", VM_STAT(mmu_pte_write) }, - { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, - { "mmu_pde_zapped", VM_STAT(mmu_pde_zapped) }, - { "mmu_flooded", VM_STAT(mmu_flooded) }, - { "mmu_recycled", VM_STAT(mmu_recycled) }, - { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, - { "mmu_unsync", VM_STAT(mmu_unsync) }, - { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, - { "largepages", VM_STAT(lpages, .mode = 0444) }, - { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) }, - { "max_mmu_page_hash_collisions", - VM_STAT(max_mmu_page_hash_collisions) }, + VCPU_STAT("pf_fixed", pf_fixed), + VCPU_STAT("pf_guest", pf_guest), + VCPU_STAT("tlb_flush", tlb_flush), + VCPU_STAT("invlpg", invlpg), + VCPU_STAT("exits", exits), + VCPU_STAT("io_exits", io_exits), + VCPU_STAT("mmio_exits", mmio_exits), + VCPU_STAT("signal_exits", signal_exits), + VCPU_STAT("irq_window", irq_window_exits), + VCPU_STAT("nmi_window", nmi_window_exits), + VCPU_STAT("halt_exits", halt_exits), + VCPU_STAT("halt_successful_poll", halt_successful_poll), + VCPU_STAT("halt_attempted_poll", halt_attempted_poll), + VCPU_STAT("halt_poll_invalid", halt_poll_invalid), + VCPU_STAT("halt_wakeup", halt_wakeup), + VCPU_STAT("hypercalls", hypercalls), + VCPU_STAT("request_irq", request_irq_exits), + VCPU_STAT("irq_exits", irq_exits), + VCPU_STAT("host_state_reload", host_state_reload), + VCPU_STAT("fpu_reload", fpu_reload), + VCPU_STAT("insn_emulation", insn_emulation), + VCPU_STAT("insn_emulation_fail", insn_emulation_fail), + VCPU_STAT("irq_injections", irq_injections), + VCPU_STAT("nmi_injections", nmi_injections), + VCPU_STAT("req_event", req_event), + VCPU_STAT("l1d_flush", l1d_flush), + VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns), + VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns), + VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped), + VM_STAT("mmu_pte_write", mmu_pte_write), + VM_STAT("mmu_pte_updated", mmu_pte_updated), + VM_STAT("mmu_pde_zapped", mmu_pde_zapped), + VM_STAT("mmu_flooded", mmu_flooded), + VM_STAT("mmu_recycled", mmu_recycled), + VM_STAT("mmu_cache_miss", mmu_cache_miss), + VM_STAT("mmu_unsync", mmu_unsync), + VM_STAT("remote_tlb_flush", remote_tlb_flush), + VM_STAT("largepages", lpages, .mode = 0444), + VM_STAT("nx_largepages_splitted", nx_lpage_splits, .mode = 0444), + VM_STAT("max_mmu_page_hash_collisions", max_mmu_page_hash_collisions), { NULL } }; @@ -261,7 +260,7 @@ static int emulator_fix_hypercall(struct x86_emulate_ctxt *ctxt); static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu) { int i; - for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++) + for (i = 0; i < ASYNC_PF_PER_VCPU; i++) vcpu->arch.apf.gfns[i] = ~0; } @@ -612,15 +611,28 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) } EXPORT_SYMBOL_GPL(kvm_inject_page_fault); -static bool kvm_propagate_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) +bool kvm_inject_emulated_page_fault(struct kvm_vcpu *vcpu, + struct x86_exception *fault) { - if (mmu_is_nested(vcpu) && !fault->nested_page_fault) - vcpu->arch.nested_mmu.inject_page_fault(vcpu, fault); - else - vcpu->arch.mmu->inject_page_fault(vcpu, fault); + struct kvm_mmu *fault_mmu; + WARN_ON_ONCE(fault->vector != PF_VECTOR); + + fault_mmu = fault->nested_page_fault ? vcpu->arch.mmu : + vcpu->arch.walk_mmu; + + /* + * Invalidate the TLB entry for the faulting address, if it exists, + * else the access will fault indefinitely (and to emulate hardware). + */ + if ((fault->error_code & PFERR_PRESENT_MASK) && + !(fault->error_code & PFERR_RSVD_MASK)) + kvm_mmu_invalidate_gva(vcpu, fault_mmu, fault->address, + fault_mmu->root_hpa); + fault_mmu->inject_page_fault(vcpu, fault); return fault->nested_page_fault; } +EXPORT_SYMBOL_GPL(kvm_inject_emulated_page_fault); void kvm_inject_nmi(struct kvm_vcpu *vcpu) { @@ -1008,7 +1020,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { if (!skip_tlb_flush) { kvm_mmu_sync_roots(vcpu); - kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); + kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); } return 0; } @@ -1020,7 +1032,7 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) !load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)) return 1; - kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush); + kvm_mmu_new_pgd(vcpu, cr3, skip_tlb_flush, skip_tlb_flush); vcpu->arch.cr3 = cr3; kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); @@ -1060,7 +1072,7 @@ static void kvm_update_dr0123(struct kvm_vcpu *vcpu) } } -static void kvm_update_dr7(struct kvm_vcpu *vcpu) +void kvm_update_dr7(struct kvm_vcpu *vcpu) { unsigned long dr7; @@ -1073,6 +1085,7 @@ static void kvm_update_dr7(struct kvm_vcpu *vcpu) if (dr7 & DR7_BP_EN_MASK) vcpu->arch.switch_db_regs |= KVM_DEBUGREG_BP_ENABLED; } +EXPORT_SYMBOL_GPL(kvm_update_dr7); static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu) { @@ -1233,13 +1246,18 @@ static const u32 emulated_msrs_all[] = { HV_X64_MSR_VP_ASSIST_PAGE, HV_X64_MSR_REENLIGHTENMENT_CONTROL, HV_X64_MSR_TSC_EMULATION_CONTROL, HV_X64_MSR_TSC_EMULATION_STATUS, + HV_X64_MSR_SYNDBG_OPTIONS, + HV_X64_MSR_SYNDBG_CONTROL, HV_X64_MSR_SYNDBG_STATUS, + HV_X64_MSR_SYNDBG_SEND_BUFFER, HV_X64_MSR_SYNDBG_RECV_BUFFER, + HV_X64_MSR_SYNDBG_PENDING_BUFFER, MSR_KVM_ASYNC_PF_EN, MSR_KVM_STEAL_TIME, - MSR_KVM_PV_EOI_EN, + MSR_KVM_PV_EOI_EN, MSR_KVM_ASYNC_PF_INT, MSR_KVM_ASYNC_PF_ACK, MSR_IA32_TSC_ADJUST, MSR_IA32_TSCDEADLINE, MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_PERF_CAPABILITIES, MSR_IA32_MISC_ENABLE, MSR_IA32_MCG_STATUS, MSR_IA32_MCG_CTL, @@ -1306,6 +1324,7 @@ static const u32 msr_based_features_all[] = { MSR_F10H_DECFG, MSR_IA32_UCODE_REV, MSR_IA32_ARCH_CAPABILITIES, + MSR_IA32_PERF_CAPABILITIES, }; static u32 msr_based_features[ARRAY_SIZE(msr_based_features_all)]; @@ -1564,6 +1583,13 @@ int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr); +bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu) +{ + return vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || + need_resched() || signal_pending(current); +} +EXPORT_SYMBOL_GPL(kvm_vcpu_exit_request); + /* * The fast path for frequent and performance sensitive wrmsr emulation, * i.e. the sending of IPI, sending IPI early in the VM-Exit flow reduces @@ -1592,27 +1618,44 @@ static int handle_fastpath_set_x2apic_icr_irqoff(struct kvm_vcpu *vcpu, u64 data return 1; } -enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) +static int handle_fastpath_set_tscdeadline(struct kvm_vcpu *vcpu, u64 data) +{ + if (!kvm_can_use_hv_timer(vcpu)) + return 1; + + kvm_set_lapic_tscdeadline_msr(vcpu, data); + return 0; +} + +fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu) { u32 msr = kvm_rcx_read(vcpu); u64 data; - int ret = 0; + fastpath_t ret = EXIT_FASTPATH_NONE; switch (msr) { case APIC_BASE_MSR + (APIC_ICR >> 4): data = kvm_read_edx_eax(vcpu); - ret = handle_fastpath_set_x2apic_icr_irqoff(vcpu, data); + if (!handle_fastpath_set_x2apic_icr_irqoff(vcpu, data)) { + kvm_skip_emulated_instruction(vcpu); + ret = EXIT_FASTPATH_EXIT_HANDLED; + } + break; + case MSR_IA32_TSCDEADLINE: + data = kvm_read_edx_eax(vcpu); + if (!handle_fastpath_set_tscdeadline(vcpu, data)) { + kvm_skip_emulated_instruction(vcpu); + ret = EXIT_FASTPATH_REENTER_GUEST; + } break; default: - return EXIT_FASTPATH_NONE; + break; } - if (!ret) { + if (ret != EXIT_FASTPATH_NONE) trace_kvm_msr_write(msr, data); - return EXIT_FASTPATH_SKIP_EMUL_INS; - } - return EXIT_FASTPATH_NONE; + return ret; } EXPORT_SYMBOL_GPL(handle_fastpath_set_msr_irqoff); @@ -1901,7 +1944,7 @@ static void kvm_track_tsc_matching(struct kvm_vcpu *vcpu) static void update_ia32_tsc_adjust_msr(struct kvm_vcpu *vcpu, s64 offset) { - u64 curr_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu); + u64 curr_offset = vcpu->arch.l1_tsc_offset; vcpu->arch.ia32_tsc_adjust_msr += offset - curr_offset; } @@ -1943,14 +1986,13 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { - u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu); - - return tsc_offset + kvm_scale_tsc(vcpu, host_tsc); + return vcpu->arch.l1_tsc_offset + kvm_scale_tsc(vcpu, host_tsc); } EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { + vcpu->arch.l1_tsc_offset = offset; vcpu->arch.tsc_offset = kvm_x86_ops.write_l1_tsc_offset(vcpu, offset); } @@ -2075,7 +2117,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { - u64 tsc_offset = kvm_x86_ops.read_l1_tsc_offset(vcpu); + u64 tsc_offset = vcpu->arch.l1_tsc_offset; kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment); } @@ -2637,29 +2679,54 @@ out: return r; } +static inline bool kvm_pv_async_pf_enabled(struct kvm_vcpu *vcpu) +{ + u64 mask = KVM_ASYNC_PF_ENABLED | KVM_ASYNC_PF_DELIVERY_AS_INT; + + return (vcpu->arch.apf.msr_en_val & mask) == mask; +} + static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) { gpa_t gpa = data & ~0x3f; - /* Bits 3:5 are reserved, Should be zero */ - if (data & 0x38) + /* Bits 4:5 are reserved, Should be zero */ + if (data & 0x30) return 1; - vcpu->arch.apf.msr_val = data; + vcpu->arch.apf.msr_en_val = data; - if (!(data & KVM_ASYNC_PF_ENABLED)) { + if (!kvm_pv_async_pf_enabled(vcpu)) { kvm_clear_async_pf_completion_queue(vcpu); kvm_async_pf_hash_reset(vcpu); return 0; } if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, - sizeof(u32))) + sizeof(u64))) return 1; vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT; + kvm_async_pf_wakeup_all(vcpu); + + return 0; +} + +static int kvm_pv_enable_async_pf_int(struct kvm_vcpu *vcpu, u64 data) +{ + /* Bits 8-63 are reserved */ + if (data >> 8) + return 1; + + if (!lapic_in_kernel(vcpu)) + return 1; + + vcpu->arch.apf.msr_int_val = data; + + vcpu->arch.apf.vec = data & KVM_ASYNC_PF_VEC_MASK; + return 0; } @@ -2669,10 +2736,16 @@ static void kvmclock_reset(struct kvm_vcpu *vcpu) vcpu->arch.time = 0; } -static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu, bool invalidate_gpa) +static void kvm_vcpu_flush_tlb_all(struct kvm_vcpu *vcpu) +{ + ++vcpu->stat.tlb_flush; + kvm_x86_ops.tlb_flush_all(vcpu); +} + +static void kvm_vcpu_flush_tlb_guest(struct kvm_vcpu *vcpu) { ++vcpu->stat.tlb_flush; - kvm_x86_ops.tlb_flush(vcpu, invalidate_gpa); + kvm_x86_ops.tlb_flush_guest(vcpu); } static void record_steal_time(struct kvm_vcpu *vcpu) @@ -2698,7 +2771,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) trace_kvm_pv_tlb_flush(vcpu->vcpu_id, st->preempted & KVM_VCPU_FLUSH_TLB); if (xchg(&st->preempted, 0) & KVM_VCPU_FLUSH_TLB) - kvm_vcpu_flush_tlb(vcpu, false); + kvm_vcpu_flush_tlb_guest(vcpu); vcpu->arch.st.preempted = 0; @@ -2875,6 +2948,16 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (kvm_pv_enable_async_pf(vcpu, data)) return 1; break; + case MSR_KVM_ASYNC_PF_INT: + if (kvm_pv_enable_async_pf_int(vcpu, data)) + return 1; + break; + case MSR_KVM_ASYNC_PF_ACK: + if (data & 0x1) { + vcpu->arch.apf.pageready_pending = false; + kvm_check_async_pf_completion(vcpu); + } + break; case MSR_KVM_STEAL_TIME: if (unlikely(!sched_info_on())) @@ -2932,6 +3015,8 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) */ break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: + case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: + case HV_X64_MSR_SYNDBG_OPTIONS: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: @@ -3071,7 +3156,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1: case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL1: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) - return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); + return kvm_pmu_get_msr(vcpu, msr_info); msr_info->data = 0; break; case MSR_IA32_UCODE_REV: @@ -3149,7 +3234,13 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = vcpu->arch.time; break; case MSR_KVM_ASYNC_PF_EN: - msr_info->data = vcpu->arch.apf.msr_val; + msr_info->data = vcpu->arch.apf.msr_en_val; + break; + case MSR_KVM_ASYNC_PF_INT: + msr_info->data = vcpu->arch.apf.msr_int_val; + break; + case MSR_KVM_ASYNC_PF_ACK: + msr_info->data = 0; break; case MSR_KVM_STEAL_TIME: msr_info->data = vcpu->arch.st.msr_val; @@ -3187,6 +3278,8 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) msr_info->data = 0x20000000; break; case HV_X64_MSR_GUEST_OS_ID ... HV_X64_MSR_SINT15: + case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER: + case HV_X64_MSR_SYNDBG_OPTIONS: case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: case HV_X64_MSR_CRASH_CTL: case HV_X64_MSR_STIMER0_CONFIG ... HV_X64_MSR_STIMER3_COUNT: @@ -3233,7 +3326,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; default: if (kvm_pmu_is_valid_msr(vcpu, msr_info->index)) - return kvm_pmu_get_msr(vcpu, msr_info->index, &msr_info->data); + return kvm_pmu_get_msr(vcpu, msr_info); if (!ignore_msrs) { vcpu_debug_ratelimited(vcpu, "unhandled rdmsr: 0x%x\n", msr_info->index); @@ -3363,6 +3456,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_X86_ROBUST_SINGLESTEP: case KVM_CAP_XSAVE: case KVM_CAP_ASYNC_PF: + case KVM_CAP_ASYNC_PF_INT: case KVM_CAP_GET_TSC_KHZ: case KVM_CAP_KVMCLOCK_CTRL: case KVM_CAP_READONLY_MEM: @@ -3431,14 +3525,14 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = KVM_X2APIC_API_VALID_FLAGS; break; case KVM_CAP_NESTED_STATE: - r = kvm_x86_ops.get_nested_state ? - kvm_x86_ops.get_nested_state(NULL, NULL, 0) : 0; + r = kvm_x86_ops.nested_ops->get_state ? + kvm_x86_ops.nested_ops->get_state(NULL, NULL, 0) : 0; break; case KVM_CAP_HYPERV_DIRECT_TLBFLUSH: r = kvm_x86_ops.enable_direct_tlbflush != NULL; break; case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: - r = kvm_x86_ops.nested_enable_evmcs != NULL; + r = kvm_x86_ops.nested_ops->enable_evmcs != NULL; break; default: break; @@ -4226,9 +4320,9 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return kvm_hv_activate_synic(vcpu, cap->cap == KVM_CAP_HYPERV_SYNIC2); case KVM_CAP_HYPERV_ENLIGHTENED_VMCS: - if (!kvm_x86_ops.nested_enable_evmcs) + if (!kvm_x86_ops.nested_ops->enable_evmcs) return -ENOTTY; - r = kvm_x86_ops.nested_enable_evmcs(vcpu, &vmcs_version); + r = kvm_x86_ops.nested_ops->enable_evmcs(vcpu, &vmcs_version); if (!r) { user_ptr = (void __user *)(uintptr_t)cap->args[0]; if (copy_to_user(user_ptr, &vmcs_version, @@ -4543,7 +4637,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, u32 user_data_size; r = -EINVAL; - if (!kvm_x86_ops.get_nested_state) + if (!kvm_x86_ops.nested_ops->get_state) break; BUILD_BUG_ON(sizeof(user_data_size) != sizeof(user_kvm_nested_state->size)); @@ -4551,8 +4645,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (get_user(user_data_size, &user_kvm_nested_state->size)) break; - r = kvm_x86_ops.get_nested_state(vcpu, user_kvm_nested_state, - user_data_size); + r = kvm_x86_ops.nested_ops->get_state(vcpu, user_kvm_nested_state, + user_data_size); if (r < 0) break; @@ -4573,7 +4667,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, int idx; r = -EINVAL; - if (!kvm_x86_ops.set_nested_state) + if (!kvm_x86_ops.nested_ops->set_state) break; r = -EFAULT; @@ -4586,7 +4680,8 @@ long kvm_arch_vcpu_ioctl(struct file *filp, if (kvm_state.flags & ~(KVM_STATE_NESTED_RUN_PENDING | KVM_STATE_NESTED_GUEST_MODE - | KVM_STATE_NESTED_EVMCS)) + | KVM_STATE_NESTED_EVMCS | KVM_STATE_NESTED_MTF_PENDING + | KVM_STATE_NESTED_GIF_SET)) break; /* nested_run_pending implies guest_mode. */ @@ -4595,7 +4690,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp, break; idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvm_x86_ops.set_nested_state(vcpu, user_kvm_nested_state, &kvm_state); + r = kvm_x86_ops.nested_ops->set_state(vcpu, user_kvm_nested_state, &kvm_state); srcu_read_unlock(&vcpu->kvm->srcu, idx); break; } @@ -5242,6 +5337,10 @@ static void kvm_init_msr_list(void) if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) continue; break; + case MSR_IA32_UMWAIT_CONTROL: + if (!kvm_cpu_cap_has(X86_FEATURE_WAITPKG)) + continue; + break; case MSR_IA32_RTIT_CTL: case MSR_IA32_RTIT_STATUS: if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT)) @@ -5259,7 +5358,7 @@ static void kvm_init_msr_list(void) !intel_pt_validate_hw_cap(PT_CAP_single_range_output))) continue; break; - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: { + case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: if (!kvm_cpu_cap_has(X86_FEATURE_INTEL_PT) || msrs_to_save_all[i] - MSR_IA32_RTIT_ADDR0_A >= intel_pt_validate_hw_cap(PT_CAP_num_address_ranges) * 2) @@ -5274,7 +5373,7 @@ static void kvm_init_msr_list(void) if (msrs_to_save_all[i] - MSR_ARCH_PERFMON_EVENTSEL0 >= min(INTEL_PMC_MAX_GENERIC, x86_pmu.num_counters_gp)) continue; - } + break; default: break; } @@ -6403,7 +6502,7 @@ static bool inject_emulated_exception(struct kvm_vcpu *vcpu) { struct x86_emulate_ctxt *ctxt = vcpu->arch.emulate_ctxt; if (ctxt->exception.vector == PF_VECTOR) - return kvm_propagate_fault(vcpu, &ctxt->exception); + return kvm_inject_emulated_page_fault(vcpu, &ctxt->exception); if (ctxt->exception.error_code_valid) kvm_queue_exception_e(vcpu, ctxt->exception.vector, @@ -7669,14 +7768,17 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu) kvm_x86_ops.update_cr8_intercept(vcpu, tpr, max_irr); } -static int inject_pending_event(struct kvm_vcpu *vcpu) +static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) { int r; + bool can_inject = true; /* try to reinject previous events if any */ - if (vcpu->arch.exception.injected) + if (vcpu->arch.exception.injected) { kvm_x86_ops.queue_exception(vcpu); + can_inject = false; + } /* * Do not inject an NMI or interrupt if there is a pending * exception. Exceptions and interrupts are recognized at @@ -7692,22 +7794,28 @@ static int inject_pending_event(struct kvm_vcpu *vcpu) * fully complete the previous instruction. */ else if (!vcpu->arch.exception.pending) { - if (vcpu->arch.nmi_injected) + if (vcpu->arch.nmi_injected) { kvm_x86_ops.set_nmi(vcpu); - else if (vcpu->arch.interrupt.injected) + can_inject = false; + } else if (vcpu->arch.interrupt.injected) { kvm_x86_ops.set_irq(vcpu); + can_inject = false; + } } + WARN_ON_ONCE(vcpu->arch.exception.injected && + vcpu->arch.exception.pending); + /* * Call check_nested_events() even if we reinjected a previous event * in order for caller to determine if it should require immediate-exit * from L2 to L1 due to pending L1 events which require exit * from L2 to L1. */ - if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) { - r = kvm_x86_ops.check_nested_events(vcpu); - if (r != 0) - return r; + if (is_guest_mode(vcpu)) { + r = kvm_x86_ops.nested_ops->check_events(vcpu); + if (r < 0) + goto busy; } /* try to inject new event if pending */ @@ -7716,7 +7824,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu) vcpu->arch.exception.has_error_code, vcpu->arch.exception.error_code); - WARN_ON_ONCE(vcpu->arch.exception.injected); vcpu->arch.exception.pending = false; vcpu->arch.exception.injected = true; @@ -7725,16 +7832,6 @@ static int inject_pending_event(struct kvm_vcpu *vcpu) X86_EFLAGS_RF); if (vcpu->arch.exception.nr == DB_VECTOR) { - /* - * This code assumes that nSVM doesn't use - * check_nested_events(). If it does, the - * DR6/DR7 changes should happen before L1 - * gets a #VMEXIT for an intercepted #DB in - * L2. (Under VMX, on the other hand, the - * DR6/DR7 changes should not happen in the - * event of a VM-exit to L1 for an intercepted - * #DB in L2.) - */ kvm_deliver_exception_payload(vcpu); if (vcpu->arch.dr7 & DR7_GD) { vcpu->arch.dr7 &= ~DR7_GD; @@ -7743,42 +7840,72 @@ static int inject_pending_event(struct kvm_vcpu *vcpu) } kvm_x86_ops.queue_exception(vcpu); + can_inject = false; } - /* Don't consider new event if we re-injected an event */ - if (kvm_event_needs_reinjection(vcpu)) - return 0; + /* + * Finally, inject interrupt events. If an event cannot be injected + * due to architectural conditions (e.g. IF=0) a window-open exit + * will re-request KVM_REQ_EVENT. Sometimes however an event is pending + * and can architecturally be injected, but we cannot do it right now: + * an interrupt could have arrived just now and we have to inject it + * as a vmexit, or there could already an event in the queue, which is + * indicated by can_inject. In that case we request an immediate exit + * in order to make progress and get back here for another iteration. + * The kvm_x86_ops hooks communicate this by returning -EBUSY. + */ + if (vcpu->arch.smi_pending) { + r = can_inject ? kvm_x86_ops.smi_allowed(vcpu, true) : -EBUSY; + if (r < 0) + goto busy; + if (r) { + vcpu->arch.smi_pending = false; + ++vcpu->arch.smi_count; + enter_smm(vcpu); + can_inject = false; + } else + kvm_x86_ops.enable_smi_window(vcpu); + } - if (vcpu->arch.smi_pending && !is_smm(vcpu) && - kvm_x86_ops.smi_allowed(vcpu)) { - vcpu->arch.smi_pending = false; - ++vcpu->arch.smi_count; - enter_smm(vcpu); - } else if (vcpu->arch.nmi_pending && kvm_x86_ops.nmi_allowed(vcpu)) { - --vcpu->arch.nmi_pending; - vcpu->arch.nmi_injected = true; - kvm_x86_ops.set_nmi(vcpu); - } else if (kvm_cpu_has_injectable_intr(vcpu)) { - /* - * Because interrupts can be injected asynchronously, we are - * calling check_nested_events again here to avoid a race condition. - * See https://lkml.org/lkml/2014/7/2/60 for discussion about this - * proposal and current concerns. Perhaps we should be setting - * KVM_REQ_EVENT only on certain events and not unconditionally? - */ - if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) { - r = kvm_x86_ops.check_nested_events(vcpu); - if (r != 0) - return r; + if (vcpu->arch.nmi_pending) { + r = can_inject ? kvm_x86_ops.nmi_allowed(vcpu, true) : -EBUSY; + if (r < 0) + goto busy; + if (r) { + --vcpu->arch.nmi_pending; + vcpu->arch.nmi_injected = true; + kvm_x86_ops.set_nmi(vcpu); + can_inject = false; + WARN_ON(kvm_x86_ops.nmi_allowed(vcpu, true) < 0); } - if (kvm_x86_ops.interrupt_allowed(vcpu)) { - kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), - false); + if (vcpu->arch.nmi_pending) + kvm_x86_ops.enable_nmi_window(vcpu); + } + + if (kvm_cpu_has_injectable_intr(vcpu)) { + r = can_inject ? kvm_x86_ops.interrupt_allowed(vcpu, true) : -EBUSY; + if (r < 0) + goto busy; + if (r) { + kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false); kvm_x86_ops.set_irq(vcpu); + WARN_ON(kvm_x86_ops.interrupt_allowed(vcpu, true) < 0); } + if (kvm_cpu_has_injectable_intr(vcpu)) + kvm_x86_ops.enable_irq_window(vcpu); } - return 0; + if (is_guest_mode(vcpu) && + kvm_x86_ops.nested_ops->hv_timer_pending && + kvm_x86_ops.nested_ops->hv_timer_pending(vcpu)) + *req_immediate_exit = true; + + WARN_ON(vcpu->arch.exception.pending); + return; + +busy: + *req_immediate_exit = true; + return; } static void process_nmi(struct kvm_vcpu *vcpu) @@ -8169,24 +8296,13 @@ int kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu) { - struct page *page = NULL; - if (!lapic_in_kernel(vcpu)) return; if (!kvm_x86_ops.set_apic_access_page_addr) return; - page = gfn_to_page(vcpu->kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); - if (is_error_page(page)) - return; - kvm_x86_ops.set_apic_access_page_addr(vcpu, page_to_phys(page)); - - /* - * Do not pin apic access page in memory, the MMU notifier - * will call us again if it is migrated or swapped out. - */ - put_page(page); + kvm_x86_ops.set_apic_access_page_addr(vcpu); } void __kvm_request_immediate_exit(struct kvm_vcpu *vcpu) @@ -8206,13 +8322,13 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_int_win = dm_request_for_irq_injection(vcpu) && kvm_cpu_accept_dm_intr(vcpu); - enum exit_fastpath_completion exit_fastpath = EXIT_FASTPATH_NONE; + fastpath_t exit_fastpath; bool req_immediate_exit = false; if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_GET_VMCS12_PAGES, vcpu)) { - if (unlikely(!kvm_x86_ops.get_vmcs12_pages(vcpu))) { + if (unlikely(!kvm_x86_ops.nested_ops->get_vmcs12_pages(vcpu))) { r = 0; goto out; } @@ -8234,8 +8350,17 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_mmu_sync_roots(vcpu); if (kvm_check_request(KVM_REQ_LOAD_MMU_PGD, vcpu)) kvm_mmu_load_pgd(vcpu); - if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) - kvm_vcpu_flush_tlb(vcpu, true); + if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { + kvm_vcpu_flush_tlb_all(vcpu); + + /* Flushing all ASIDs flushes the current ASID... */ + kvm_clear_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); + } + if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu)) + kvm_vcpu_flush_tlb_current(vcpu); + if (kvm_check_request(KVM_REQ_HV_TLB_FLUSH, vcpu)) + kvm_vcpu_flush_tlb_guest(vcpu); + if (kvm_check_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu)) { vcpu->run->exit_reason = KVM_EXIT_TPR_ACCESS; r = 0; @@ -8308,6 +8433,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_hv_process_stimers(vcpu); if (kvm_check_request(KVM_REQ_APICV_UPDATE, vcpu)) kvm_vcpu_update_apicv(vcpu); + if (kvm_check_request(KVM_REQ_APF_READY, vcpu)) + kvm_check_async_pf_completion(vcpu); } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { @@ -8318,32 +8445,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) goto out; } - if (inject_pending_event(vcpu) != 0) - req_immediate_exit = true; - else { - /* Enable SMI/NMI/IRQ window open exits if needed. - * - * SMIs have three cases: - * 1) They can be nested, and then there is nothing to - * do here because RSM will cause a vmexit anyway. - * 2) There is an ISA-specific reason why SMI cannot be - * injected, and the moment when this changes can be - * intercepted. - * 3) Or the SMI can be pending because - * inject_pending_event has completed the injection - * of an IRQ or NMI from the previous vmexit, and - * then we request an immediate exit to inject the - * SMI. - */ - if (vcpu->arch.smi_pending && !is_smm(vcpu)) - if (!kvm_x86_ops.enable_smi_window(vcpu)) - req_immediate_exit = true; - if (vcpu->arch.nmi_pending) - kvm_x86_ops.enable_nmi_window(vcpu); - if (kvm_cpu_has_injectable_intr(vcpu) || req_int_win) - kvm_x86_ops.enable_irq_window(vcpu); - WARN_ON(vcpu->arch.exception.pending); - } + inject_pending_event(vcpu, &req_immediate_exit); + if (req_int_win) + kvm_x86_ops.enable_irq_window(vcpu); if (kvm_lapic_enabled(vcpu)) { update_cr8_intercept(vcpu); @@ -8391,8 +8495,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) if (kvm_lapic_enabled(vcpu) && vcpu->arch.apicv_active) kvm_x86_ops.sync_pir_to_irr(vcpu); - if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) - || need_resched() || signal_pending(current)) { + if (kvm_vcpu_exit_request(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); local_irq_enable(); @@ -8424,7 +8527,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_RELOAD; } - kvm_x86_ops.run(vcpu); + exit_fastpath = kvm_x86_ops.run(vcpu); /* * Do this here before restoring debug registers on the host. And @@ -8455,7 +8558,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); - kvm_x86_ops.handle_exit_irqoff(vcpu, &exit_fastpath); + kvm_x86_ops.handle_exit_irqoff(vcpu); /* * Consume any pending interrupts, including the possible source of @@ -8502,6 +8605,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) return r; cancel_injection: + if (req_immediate_exit) + kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_x86_ops.cancel_injection(vcpu); if (unlikely(vcpu->arch.apic_attention)) kvm_lapic_sync_from_vapic(vcpu); @@ -8544,8 +8649,8 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu) && kvm_x86_ops.check_nested_events) - kvm_x86_ops.check_nested_events(vcpu); + if (is_guest_mode(vcpu)) + kvm_x86_ops.nested_ops->check_events(vcpu); return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted); @@ -8581,8 +8686,6 @@ static int vcpu_run(struct kvm_vcpu *vcpu) break; } - kvm_check_async_pf_completion(vcpu); - if (signal_pending(current)) { r = -EINTR; vcpu->run->exit_reason = KVM_EXIT_INTR; @@ -8727,8 +8830,9 @@ static void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) trace_kvm_fpu(0); } -int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) +int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu) { + struct kvm_run *kvm_run = vcpu->run; int r; vcpu_load(vcpu); @@ -8746,18 +8850,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) r = -EAGAIN; if (signal_pending(current)) { r = -EINTR; - vcpu->run->exit_reason = KVM_EXIT_INTR; + kvm_run->exit_reason = KVM_EXIT_INTR; ++vcpu->stat.signal_exits; } goto out; } - if (vcpu->run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) { + if (kvm_run->kvm_valid_regs & ~KVM_SYNC_X86_VALID_FIELDS) { r = -EINVAL; goto out; } - if (vcpu->run->kvm_dirty_regs) { + if (kvm_run->kvm_dirty_regs) { r = sync_regs(vcpu); if (r != 0) goto out; @@ -8787,7 +8891,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) out: kvm_put_guest_fpu(vcpu); - if (vcpu->run->kvm_valid_regs) + if (kvm_run->kvm_valid_regs) store_regs(vcpu); post_kvm_run_save(vcpu); kvm_sigset_deactivate(vcpu); @@ -9379,9 +9483,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) } fx_init(vcpu); - vcpu->arch.guest_xstate_size = XSAVE_HDR_SIZE + XSAVE_HDR_OFFSET; - vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu); + vcpu->arch.tdp_level = kvm_x86_ops.get_tdp_level(vcpu); vcpu->arch.pat = MSR_IA32_CR_PAT_DEFAULT; @@ -9502,7 +9605,8 @@ void kvm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.cr2 = 0; kvm_make_request(KVM_REQ_EVENT, vcpu); - vcpu->arch.apf.msr_val = 0; + vcpu->arch.apf.msr_en_val = 0; + vcpu->arch.apf.msr_int_val = 0; vcpu->arch.st.msr_val = 0; kvmclock_reset(vcpu); @@ -10040,7 +10144,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, { /* Still write protect RO slot */ if (new->flags & KVM_MEM_READONLY) { - kvm_mmu_slot_remove_write_access(kvm, new, PT_PAGE_TABLE_LEVEL); + kvm_mmu_slot_remove_write_access(kvm, new, PG_LEVEL_4K); return; } @@ -10080,7 +10184,7 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm, } else { int level = kvm_dirty_log_manual_protect_and_init_set(kvm) ? - PT_DIRECTORY_LEVEL : PT_PAGE_TABLE_LEVEL; + PG_LEVEL_2M : PG_LEVEL_4K; /* * If we're with initial-all-set, we don't need @@ -10182,11 +10286,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) if (kvm_test_request(KVM_REQ_NMI, vcpu) || (vcpu->arch.nmi_pending && - kvm_x86_ops.nmi_allowed(vcpu))) + kvm_x86_ops.nmi_allowed(vcpu, false))) return true; if (kvm_test_request(KVM_REQ_SMI, vcpu) || - (vcpu->arch.smi_pending && !is_smm(vcpu))) + (vcpu->arch.smi_pending && + kvm_x86_ops.smi_allowed(vcpu, false))) return true; if (kvm_arch_interrupt_allowed(vcpu) && @@ -10197,6 +10302,11 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) if (kvm_hv_has_stimer_pending(vcpu)) return true; + if (is_guest_mode(vcpu) && + kvm_x86_ops.nested_ops->hv_timer_pending && + kvm_x86_ops.nested_ops->hv_timer_pending(vcpu)) + return true; + return false; } @@ -10233,7 +10343,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu) { - return kvm_x86_ops.interrupt_allowed(vcpu); + return kvm_x86_ops.interrupt_allowed(vcpu, false); } unsigned long kvm_get_linear_rip(struct kvm_vcpu *vcpu) @@ -10298,12 +10408,14 @@ void kvm_arch_async_page_ready(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) static inline u32 kvm_async_pf_hash_fn(gfn_t gfn) { + BUILD_BUG_ON(!is_power_of_2(ASYNC_PF_PER_VCPU)); + return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU)); } static inline u32 kvm_async_pf_next_probe(u32 key) { - return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1); + return (key + 1) & (ASYNC_PF_PER_VCPU - 1); } static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) @@ -10321,7 +10433,7 @@ static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn) int i; u32 key = kvm_async_pf_hash_fn(gfn); - for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) && + for (i = 0; i < ASYNC_PF_PER_VCPU && (vcpu->arch.apf.gfns[key] != gfn && vcpu->arch.apf.gfns[key] != ~0); i++) key = kvm_async_pf_next_probe(key); @@ -10339,6 +10451,10 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) u32 i, j, k; i = j = kvm_async_pf_gfn_slot(vcpu, gfn); + + if (WARN_ON_ONCE(vcpu->arch.apf.gfns[i] != gfn)) + return; + while (true) { vcpu->arch.apf.gfns[i] = ~0; do { @@ -10357,18 +10473,32 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) } } -static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) +static inline int apf_put_user_notpresent(struct kvm_vcpu *vcpu) { + u32 reason = KVM_PV_REASON_PAGE_NOT_PRESENT; - return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, - sizeof(val)); + return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &reason, + sizeof(reason)); } -static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val) +static inline int apf_put_user_ready(struct kvm_vcpu *vcpu, u32 token) { + unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token); - return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, val, - sizeof(u32)); + return kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data, + &token, offset, sizeof(token)); +} + +static inline bool apf_pageready_slot_free(struct kvm_vcpu *vcpu) +{ + unsigned int offset = offsetof(struct kvm_vcpu_pv_apf_data, token); + u32 val; + + if (kvm_read_guest_offset_cached(vcpu->kvm, &vcpu->arch.apf.data, + &val, offset, sizeof(val))) + return false; + + return !val; } static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu) @@ -10376,9 +10506,8 @@ static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu) if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu)) return false; - if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) || - (vcpu->arch.apf.send_user_only && - kvm_x86_ops.get_cpl(vcpu) == 0)) + if (!kvm_pv_async_pf_enabled(vcpu) || + (vcpu->arch.apf.send_user_only && kvm_x86_ops.get_cpl(vcpu) == 0)) return false; return true; @@ -10398,7 +10527,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu) * If interrupts are off we cannot even use an artificial * halt state. */ - return kvm_x86_ops.interrupt_allowed(vcpu); + return kvm_arch_interrupt_allowed(vcpu); } void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, @@ -10410,7 +10539,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, kvm_add_async_pf_gfn(vcpu, work->arch.gfn); if (kvm_can_deliver_async_pf(vcpu) && - !apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) { + !apf_put_user_notpresent(vcpu)) { fault.vector = PF_VECTOR; fault.error_code_valid = true; fault.error_code = 0; @@ -10434,8 +10563,10 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, struct kvm_async_pf *work) { - struct x86_exception fault; - u32 val; + struct kvm_lapic_irq irq = { + .delivery_mode = APIC_DM_FIXED, + .vector = vcpu->arch.apf.vec + }; if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ @@ -10443,39 +10574,29 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, kvm_del_async_pf_gfn(vcpu, work->arch.gfn); trace_kvm_async_pf_ready(work->arch.token, work->cr2_or_gpa); - if (vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED && - !apf_get_user(vcpu, &val)) { - if (val == KVM_PV_REASON_PAGE_NOT_PRESENT && - vcpu->arch.exception.pending && - vcpu->arch.exception.nr == PF_VECTOR && - !apf_put_user(vcpu, 0)) { - vcpu->arch.exception.injected = false; - vcpu->arch.exception.pending = false; - vcpu->arch.exception.nr = 0; - vcpu->arch.exception.has_error_code = false; - vcpu->arch.exception.error_code = 0; - vcpu->arch.exception.has_payload = false; - vcpu->arch.exception.payload = 0; - } else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { - fault.vector = PF_VECTOR; - fault.error_code_valid = true; - fault.error_code = 0; - fault.nested_page_fault = false; - fault.address = work->arch.token; - fault.async_page_fault = true; - kvm_inject_page_fault(vcpu, &fault); - } + if (kvm_pv_async_pf_enabled(vcpu) && + !apf_put_user_ready(vcpu, work->arch.token)) { + vcpu->arch.apf.pageready_pending = true; + kvm_apic_set_irq(vcpu, &irq, NULL); } + vcpu->arch.apf.halted = false; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } -bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) +void kvm_arch_async_page_present_queued(struct kvm_vcpu *vcpu) +{ + kvm_make_request(KVM_REQ_APF_READY, vcpu); + if (!vcpu->arch.apf.pageready_pending) + kvm_vcpu_kick(vcpu); +} + +bool kvm_arch_can_dequeue_async_page_present(struct kvm_vcpu *vcpu) { - if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED)) + if (!kvm_pv_async_pf_enabled(vcpu)) return true; else - return kvm_can_do_async_pf(vcpu); + return apf_pageready_slot_free(vcpu); } void kvm_arch_start_assignment(struct kvm *kvm) diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index b968acc0516f..6eb62e97e59f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -125,6 +125,12 @@ static inline bool mmu_is_nested(struct kvm_vcpu *vcpu) return vcpu->arch.walk_mmu == &vcpu->arch.nested_mmu; } +static inline void kvm_vcpu_flush_tlb_current(struct kvm_vcpu *vcpu) +{ + ++vcpu->stat.tlb_flush; + kvm_x86_ops.tlb_flush_current(vcpu); +} + static inline int is_pae(struct kvm_vcpu *vcpu) { return kvm_read_cr4_bits(vcpu, X86_CR4_PAE); @@ -268,7 +274,7 @@ bool kvm_mtrr_check_gfn_range_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, bool kvm_vector_hashing_enabled(void); int x86_emulate_instruction(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, int emulation_type, void *insn, int insn_len); -enum exit_fastpath_completion handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); +fastpath_t handle_fastpath_set_msr_irqoff(struct kvm_vcpu *vcpu); extern u64 host_xcr0; extern u64 supported_xcr0; @@ -358,5 +364,6 @@ static inline bool kvm_dr7_valid(u64 data) void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu); void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu); u64 kvm_spec_ctrl_valid_bits(struct kvm_vcpu *vcpu); +bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu); #endif diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index 4742e8fa7ee7..d1d768912368 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -153,7 +153,7 @@ SYM_FUNC_START(csum_partial) negl %ebx lea 45f(%ebx,%ebx,2), %ebx testl %esi, %esi - JMP_NOSPEC %ebx + JMP_NOSPEC ebx # Handle 2-byte-aligned regions 20: addw (%esi), %ax @@ -436,7 +436,7 @@ SYM_FUNC_START(csum_partial_copy_generic) andl $-32,%edx lea 3f(%ebx,%ebx), %ebx testl %esi, %esi - JMP_NOSPEC %ebx + JMP_NOSPEC ebx 1: addl $64,%esi addl $64,%edi SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index c66c8b00f236..ee63d7576fd2 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c @@ -10,7 +10,7 @@ #include <asm/smap.h> /** - * csum_partial_copy_from_user - Copy and checksum from user space. + * csum_and_copy_from_user - Copy and checksum from user space. * @src: source address (user space) * @dst: destination address * @len: number of bytes to be copied. @@ -21,13 +21,13 @@ * src and dst are best aligned to 64bits. */ __wsum -csum_partial_copy_from_user(const void __user *src, void *dst, +csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum isum, int *errp) { might_sleep(); *errp = 0; - if (!likely(access_ok(src, len))) + if (!user_access_begin(src, len)) goto out_err; /* @@ -42,8 +42,7 @@ csum_partial_copy_from_user(const void __user *src, void *dst, while (((unsigned long)src & 6) && len >= 2) { __u16 val16; - if (__get_user(val16, (const __u16 __user *)src)) - goto out_err; + unsafe_get_user(val16, (const __u16 __user *)src, out); *(__u16 *)dst = val16; isum = (__force __wsum)add32_with_carry( @@ -53,25 +52,26 @@ csum_partial_copy_from_user(const void __user *src, void *dst, len -= 2; } } - stac(); isum = csum_partial_copy_generic((__force const void *)src, dst, len, isum, errp, NULL); - clac(); + user_access_end(); if (unlikely(*errp)) goto out_err; return isum; +out: + user_access_end(); out_err: *errp = -EFAULT; memset(dst, 0, len); return isum; } -EXPORT_SYMBOL(csum_partial_copy_from_user); +EXPORT_SYMBOL(csum_and_copy_from_user); /** - * csum_partial_copy_to_user - Copy and checksum to user space. + * csum_and_copy_to_user - Copy and checksum to user space. * @src: source address * @dst: destination address (user space) * @len: number of bytes to be copied. @@ -82,14 +82,14 @@ EXPORT_SYMBOL(csum_partial_copy_from_user); * src and dst are best aligned to 64bits. */ __wsum -csum_partial_copy_to_user(const void *src, void __user *dst, +csum_and_copy_to_user(const void *src, void __user *dst, int len, __wsum isum, int *errp) { __wsum ret; might_sleep(); - if (unlikely(!access_ok(dst, len))) { + if (!user_access_begin(dst, len)) { *errp = -EFAULT; return 0; } @@ -100,9 +100,7 @@ csum_partial_copy_to_user(const void *src, void __user *dst, isum = (__force __wsum)add32_with_carry( (__force unsigned)isum, val16); - *errp = __put_user(val16, (__u16 __user *)dst); - if (*errp) - return isum; + unsafe_put_user(val16, (__u16 __user *)dst, out); src += 2; dst += 2; len -= 2; @@ -110,13 +108,16 @@ csum_partial_copy_to_user(const void *src, void __user *dst, } *errp = 0; - stac(); ret = csum_partial_copy_generic(src, (void __force *)dst, len, isum, NULL, errp); - clac(); + user_access_end(); return ret; +out: + user_access_end(); + *errp = -EFAULT; + return isum; } -EXPORT_SYMBOL(csum_partial_copy_to_user); +EXPORT_SYMBOL(csum_and_copy_to_user); /** * csum_partial_copy_nocheck - Copy and checksum. diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index c126571e5e2e..65d15df6212d 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -27,9 +27,20 @@ # include <asm/smp.h> #endif +static void delay_loop(u64 __loops); + +/* + * Calibration and selection of the delay mechanism happens only once + * during boot. + */ +static void (*delay_fn)(u64) __ro_after_init = delay_loop; +static void (*delay_halt_fn)(u64 start, u64 cycles) __ro_after_init; + /* simple loop based delay: */ -static void delay_loop(unsigned long loops) +static void delay_loop(u64 __loops) { + unsigned long loops = (unsigned long)__loops; + asm volatile( " test %0,%0 \n" " jz 3f \n" @@ -49,9 +60,9 @@ static void delay_loop(unsigned long loops) } /* TSC based delay: */ -static void delay_tsc(unsigned long __loops) +static void delay_tsc(u64 cycles) { - u64 bclock, now, loops = __loops; + u64 bclock, now; int cpu; preempt_disable(); @@ -59,7 +70,7 @@ static void delay_tsc(unsigned long __loops) bclock = rdtsc_ordered(); for (;;) { now = rdtsc_ordered(); - if ((now - bclock) >= loops) + if ((now - bclock) >= cycles) break; /* Allow RT tasks to run */ @@ -77,7 +88,7 @@ static void delay_tsc(unsigned long __loops) * counter for this CPU. */ if (unlikely(cpu != smp_processor_id())) { - loops -= (now - bclock); + cycles -= (now - bclock); cpu = smp_processor_id(); bclock = rdtsc_ordered(); } @@ -86,65 +97,96 @@ static void delay_tsc(unsigned long __loops) } /* + * On Intel the TPAUSE instruction waits until any of: + * 1) the TSC counter exceeds the value provided in EDX:EAX + * 2) global timeout in IA32_UMWAIT_CONTROL is exceeded + * 3) an external interrupt occurs + */ +static void delay_halt_tpause(u64 start, u64 cycles) +{ + u64 until = start + cycles; + u32 eax, edx; + + eax = lower_32_bits(until); + edx = upper_32_bits(until); + + /* + * Hard code the deeper (C0.2) sleep state because exit latency is + * small compared to the "microseconds" that usleep() will delay. + */ + __tpause(TPAUSE_C02_STATE, edx, eax); +} + +/* * On some AMD platforms, MWAITX has a configurable 32-bit timer, that - * counts with TSC frequency. The input value is the loop of the - * counter, it will exit when the timer expires. + * counts with TSC frequency. The input value is the number of TSC cycles + * to wait. MWAITX will also exit when the timer expires. */ -static void delay_mwaitx(unsigned long __loops) +static void delay_halt_mwaitx(u64 unused, u64 cycles) { - u64 start, end, delay, loops = __loops; + u64 delay; + + delay = min_t(u64, MWAITX_MAX_WAIT_CYCLES, cycles); + /* + * Use cpu_tss_rw as a cacheline-aligned, seldomly accessed per-cpu + * variable as the monitor target. + */ + __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); + + /* + * AMD, like Intel, supports the EAX hint and EAX=0xf means, do not + * enter any deep C-state and we use it here in delay() to minimize + * wakeup latency. + */ + __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); +} + +/* + * Call a vendor specific function to delay for a given amount of time. Because + * these functions may return earlier than requested, check for actual elapsed + * time and call again until done. + */ +static void delay_halt(u64 __cycles) +{ + u64 start, end, cycles = __cycles; /* * Timer value of 0 causes MWAITX to wait indefinitely, unless there * is a store on the memory monitored by MONITORX. */ - if (loops == 0) + if (!cycles) return; start = rdtsc_ordered(); for (;;) { - delay = min_t(u64, MWAITX_MAX_LOOPS, loops); - - /* - * Use cpu_tss_rw as a cacheline-aligned, seldomly - * accessed per-cpu variable as the monitor target. - */ - __monitorx(raw_cpu_ptr(&cpu_tss_rw), 0, 0); - - /* - * AMD, like Intel's MWAIT version, supports the EAX hint and - * EAX=0xf0 means, do not enter any deep C-state and we use it - * here in delay() to minimize wakeup latency. - */ - __mwaitx(MWAITX_DISABLE_CSTATES, delay, MWAITX_ECX_TIMER_ENABLE); - + delay_halt_fn(start, cycles); end = rdtsc_ordered(); - if (loops <= end - start) + if (cycles <= end - start) break; - loops -= end - start; - + cycles -= end - start; start = end; } } -/* - * Since we calibrate only once at boot, this - * function should be set once at boot and not changed - */ -static void (*delay_fn)(unsigned long) = delay_loop; - -void use_tsc_delay(void) +void __init use_tsc_delay(void) { if (delay_fn == delay_loop) delay_fn = delay_tsc; } +void __init use_tpause_delay(void) +{ + delay_halt_fn = delay_halt_tpause; + delay_fn = delay_halt; +} + void use_mwaitx_delay(void) { - delay_fn = delay_mwaitx; + delay_halt_fn = delay_halt_mwaitx; + delay_fn = delay_halt; } int read_current_timer(unsigned long *timer_val) diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 363ec132df7e..b4c43a9b1483 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -7,15 +7,31 @@ #include <asm/alternative-asm.h> #include <asm/export.h> #include <asm/nospec-branch.h> +#include <asm/unwind_hints.h> +#include <asm/frame.h> .macro THUNK reg .section .text.__x86.indirect_thunk + .align 32 SYM_FUNC_START(__x86_indirect_thunk_\reg) - CFI_STARTPROC - JMP_NOSPEC %\reg - CFI_ENDPROC + JMP_NOSPEC \reg SYM_FUNC_END(__x86_indirect_thunk_\reg) + +SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg) + ANNOTATE_INTRA_FUNCTION_CALL + call .Ldo_rop_\@ +.Lspec_trap_\@: + UNWIND_HINT_EMPTY + pause + lfence + jmp .Lspec_trap_\@ +.Ldo_rop_\@: + mov %\reg, (%_ASM_SP) + UNWIND_HINT_RET_OFFSET + ret +SYM_FUNC_END(__x86_retpoline_\reg) + .endm /* @@ -24,25 +40,24 @@ SYM_FUNC_END(__x86_indirect_thunk_\reg) * only see one instance of "__x86_indirect_thunk_\reg" rather * than one per register with the correct names. So we do it * the simple and nasty way... + * + * Worse, you can only have a single EXPORT_SYMBOL per line, + * and CPP can't insert newlines, so we have to repeat everything + * at least twice. */ -#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) -#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) -#define GENERATE_THUNK(reg) THUNK reg ; EXPORT_THUNK(reg) - -GENERATE_THUNK(_ASM_AX) -GENERATE_THUNK(_ASM_BX) -GENERATE_THUNK(_ASM_CX) -GENERATE_THUNK(_ASM_DX) -GENERATE_THUNK(_ASM_SI) -GENERATE_THUNK(_ASM_DI) -GENERATE_THUNK(_ASM_BP) -#ifdef CONFIG_64BIT -GENERATE_THUNK(r8) -GENERATE_THUNK(r9) -GENERATE_THUNK(r10) -GENERATE_THUNK(r11) -GENERATE_THUNK(r12) -GENERATE_THUNK(r13) -GENERATE_THUNK(r14) -GENERATE_THUNK(r15) -#endif + +#define __EXPORT_THUNK(sym) _ASM_NOKPROBE(sym); EXPORT_SYMBOL(sym) +#define EXPORT_THUNK(reg) __EXPORT_THUNK(__x86_indirect_thunk_ ## reg) +#define EXPORT_RETPOLINE(reg) __EXPORT_THUNK(__x86_retpoline_ ## reg) + +#undef GEN +#define GEN(reg) THUNK reg +#include <asm/GEN-for-each-reg.h> + +#undef GEN +#define GEN(reg) EXPORT_THUNK(reg) +#include <asm/GEN-for-each-reg.h> + +#undef GEN +#define GEN(reg) EXPORT_RETPOLINE(reg) +#include <asm/GEN-for-each-reg.h> diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 56f9189bbadb..5199d8a1daf1 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -17,7 +17,7 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); #endif -#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT) +#ifdef CONFIG_X86_32 DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); #endif @@ -114,12 +114,10 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu) #else static inline void percpu_setup_exception_stacks(unsigned int cpu) { -#ifdef CONFIG_DOUBLEFAULT struct cpu_entry_area *cea = get_cpu_entry_area(cpu); cea_map_percpu_pages(&cea->doublefault_stack, &per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL); -#endif } #endif diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 69309cd56fdf..ea9010113f69 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -249,10 +249,22 @@ static void note_wx(struct pg_state *st, unsigned long addr) (void *)st->start_address); } -static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) +static void effective_prot(struct ptdump_state *pt_st, int level, u64 val) { - return (prot1 & prot2 & (_PAGE_USER | _PAGE_RW)) | - ((prot1 | prot2) & _PAGE_NX); + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + pgprotval_t prot = val & PTE_FLAGS_MASK; + pgprotval_t effective; + + if (level > 0) { + pgprotval_t higher_prot = st->prot_levels[level - 1]; + + effective = (higher_prot & prot & (_PAGE_USER | _PAGE_RW)) | + ((higher_prot | prot) & _PAGE_NX); + } else { + effective = prot; + } + + st->prot_levels[level] = effective; } /* @@ -261,7 +273,7 @@ static inline pgprotval_t effective_prot(pgprotval_t prot1, pgprotval_t prot2) * print what we collected so far. */ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, - unsigned long val) + u64 val) { struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); pgprotval_t new_prot, new_eff; @@ -270,16 +282,10 @@ static void note_page(struct ptdump_state *pt_st, unsigned long addr, int level, struct seq_file *m = st->seq; new_prot = val & PTE_FLAGS_MASK; - - if (level > 0) { - new_eff = effective_prot(st->prot_levels[level - 1], - new_prot); - } else { - new_eff = new_prot; - } - - if (level >= 0) - st->prot_levels[level] = new_eff; + if (!val) + new_eff = 0; + else + new_eff = st->prot_levels[level]; /* * If we have a "break" in the series, we need to flush the state that @@ -374,6 +380,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, struct pg_state st = { .ptdump = { .note_page = note_page, + .effective_prot = effective_prot, .range = ptdump_ranges }, .level = -1, diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index a51df516b87b..c5437f2964ee 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -30,6 +30,7 @@ #include <asm/desc.h> /* store_idt(), ... */ #include <asm/cpu_entry_area.h> /* exception stack */ #include <asm/pgtable_areas.h> /* VMALLOC_START, ... */ +#include <asm/kvm_para.h> /* kvm_handle_async_pf */ #define CREATE_TRACE_POINTS #include <asm/trace/exceptions.h> @@ -190,16 +191,13 @@ static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) return pmd_k; } -static void vmalloc_sync(void) +void arch_sync_kernel_mappings(unsigned long start, unsigned long end) { - unsigned long address; - - if (SHARED_KERNEL_PMD) - return; + unsigned long addr; - for (address = VMALLOC_START & PMD_MASK; - address >= TASK_SIZE_MAX && address < VMALLOC_END; - address += PMD_SIZE) { + for (addr = start & PMD_MASK; + addr >= TASK_SIZE_MAX && addr < VMALLOC_END; + addr += PMD_SIZE) { struct page *page; spin_lock(&pgd_lock); @@ -210,61 +208,13 @@ static void vmalloc_sync(void) pgt_lock = &pgd_page_get_mm(page)->page_table_lock; spin_lock(pgt_lock); - vmalloc_sync_one(page_address(page), address); + vmalloc_sync_one(page_address(page), addr); spin_unlock(pgt_lock); } spin_unlock(&pgd_lock); } } -void vmalloc_sync_mappings(void) -{ - vmalloc_sync(); -} - -void vmalloc_sync_unmappings(void) -{ - vmalloc_sync(); -} - -/* - * 32-bit: - * - * Handle a fault on the vmalloc or module mapping area - */ -static noinline int vmalloc_fault(unsigned long address) -{ - unsigned long pgd_paddr; - pmd_t *pmd_k; - pte_t *pte_k; - - /* Make sure we are in vmalloc area: */ - if (!(address >= VMALLOC_START && address < VMALLOC_END)) - return -1; - - /* - * Synchronize this task's top level page-table - * with the 'reference' page table. - * - * Do _not_ use "current" here. We might be inside - * an interrupt in the middle of a task switch.. - */ - pgd_paddr = read_cr3_pa(); - pmd_k = vmalloc_sync_one(__va(pgd_paddr), address); - if (!pmd_k) - return -1; - - if (pmd_large(*pmd_k)) - return 0; - - pte_k = pte_offset_kernel(pmd_k, address); - if (!pte_present(*pte_k)) - return -1; - - return 0; -} -NOKPROBE_SYMBOL(vmalloc_fault); - /* * Did it hit the DOS screen memory VA from vm86 mode? */ @@ -329,96 +279,6 @@ out: #else /* CONFIG_X86_64: */ -void vmalloc_sync_mappings(void) -{ - /* - * 64-bit mappings might allocate new p4d/pud pages - * that need to be propagated to all tasks' PGDs. - */ - sync_global_pgds(VMALLOC_START & PGDIR_MASK, VMALLOC_END); -} - -void vmalloc_sync_unmappings(void) -{ - /* - * Unmappings never allocate or free p4d/pud pages. - * No work is required here. - */ -} - -/* - * 64-bit: - * - * Handle a fault on the vmalloc area - */ -static noinline int vmalloc_fault(unsigned long address) -{ - pgd_t *pgd, *pgd_k; - p4d_t *p4d, *p4d_k; - pud_t *pud; - pmd_t *pmd; - pte_t *pte; - - /* Make sure we are in vmalloc area: */ - if (!(address >= VMALLOC_START && address < VMALLOC_END)) - return -1; - - /* - * Copy kernel mappings over when needed. This can also - * happen within a race in page table update. In the later - * case just flush: - */ - pgd = (pgd_t *)__va(read_cr3_pa()) + pgd_index(address); - pgd_k = pgd_offset_k(address); - if (pgd_none(*pgd_k)) - return -1; - - if (pgtable_l5_enabled()) { - if (pgd_none(*pgd)) { - set_pgd(pgd, *pgd_k); - arch_flush_lazy_mmu_mode(); - } else { - BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_k)); - } - } - - /* With 4-level paging, copying happens on the p4d level. */ - p4d = p4d_offset(pgd, address); - p4d_k = p4d_offset(pgd_k, address); - if (p4d_none(*p4d_k)) - return -1; - - if (p4d_none(*p4d) && !pgtable_l5_enabled()) { - set_p4d(p4d, *p4d_k); - arch_flush_lazy_mmu_mode(); - } else { - BUG_ON(p4d_pfn(*p4d) != p4d_pfn(*p4d_k)); - } - - BUILD_BUG_ON(CONFIG_PGTABLE_LEVELS < 4); - - pud = pud_offset(p4d, address); - if (pud_none(*pud)) - return -1; - - if (pud_large(*pud)) - return 0; - - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd)) - return -1; - - if (pmd_large(*pmd)) - return 0; - - pte = pte_offset_kernel(pmd, address); - if (!pte_present(*pte)) - return -1; - - return 0; -} -NOKPROBE_SYMBOL(vmalloc_fault); - #ifdef CONFIG_CPU_SUP_AMD static const char errata93_warning[] = KERN_ERR @@ -1257,29 +1117,6 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code, */ WARN_ON_ONCE(hw_error_code & X86_PF_PK); - /* - * We can fault-in kernel-space virtual memory on-demand. The - * 'reference' page table is init_mm.pgd. - * - * NOTE! We MUST NOT take any locks for this case. We may - * be in an interrupt or a critical region, and should - * only copy the information from the master page table, - * nothing more. - * - * Before doing this on-demand faulting, ensure that the - * fault is not any of the following: - * 1. A fault on a PTE with a reserved bit set. - * 2. A fault caused by a user-mode access. (Do not demand- - * fault kernel memory due to user-mode accesses). - * 3. A fault caused by a page-level protection violation. - * (A demand fault would be on a non-present page which - * would have X86_PF_PROT==0). - */ - if (!(hw_error_code & (X86_PF_RSVD | X86_PF_USER | X86_PF_PROT))) { - if (vmalloc_fault(address) >= 0) - return; - } - /* Was the fault spurious, caused by lazy TLB invalidation? */ if (spurious_kernel_fault(hw_error_code, address)) return; @@ -1523,6 +1360,24 @@ do_page_fault(struct pt_regs *regs, unsigned long hw_error_code, unsigned long address) { prefetchw(¤t->mm->mmap_sem); + /* + * KVM has two types of events that are, logically, interrupts, but + * are unfortunately delivered using the #PF vector. These events are + * "you just accessed valid memory, but the host doesn't have it right + * now, so I'll put you to sleep if you continue" and "that memory + * you tried to access earlier is available now." + * + * We are relying on the interrupted context being sane (valid RSP, + * relevant locks not held, etc.), which is fine as long as the + * interrupted context had IF=1. We are also relying on the KVM + * async pf type field and CR2 being read consistently instead of + * getting values from real and async page faults mixed up. + * + * Fingers crossed. + */ + if (kvm_handle_async_pf(regs, (u32)address)) + return; + trace_page_fault_entries(regs, hw_error_code, address); if (unlikely(kmmio_fault(regs, address))) diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 1bba16c5742b..a573a3e63f02 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -121,8 +121,6 @@ __ref void *alloc_low_pages(unsigned int num) } else { pfn = pgt_buf_end; pgt_buf_end += num; - printk(KERN_DEBUG "BRK [%#010lx, %#010lx] PGTABLE\n", - pfn << PAGE_SHIFT, (pgt_buf_end << PAGE_SHIFT) - 1); } for (i = 0; i < num; i++) { diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 8b5f73f5e207..96274a90c5ff 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -218,6 +218,11 @@ void sync_global_pgds(unsigned long start, unsigned long end) sync_global_pgds_l4(start, end); } +void arch_sync_kernel_mappings(unsigned long start, unsigned long end) +{ + sync_global_pgds(start, end); +} + /* * NOTE: This function is marked __ref because it calls __init function * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index cb91eccc4960..c90c20904a60 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -18,7 +18,9 @@ #include <linux/sched/signal.h> #include <linux/sched/mm.h> #include <linux/compat.h> +#include <linux/elf-randomize.h> #include <asm/elf.h> +#include <asm/io.h> #include "physaddr.h" diff --git a/arch/x86/mm/numa_32.c b/arch/x86/mm/numa_32.c index f2bd3d61e16b..104544359d69 100644 --- a/arch/x86/mm/numa_32.c +++ b/arch/x86/mm/numa_32.c @@ -27,40 +27,6 @@ #include "numa_internal.h" -#ifdef CONFIG_DISCONTIGMEM -/* - * 4) physnode_map - the mapping between a pfn and owning node - * physnode_map keeps track of the physical memory layout of a generic - * numa node on a 64Mb break (each element of the array will - * represent 64Mb of memory and will be marked by the node id. so, - * if the first gig is on node 0, and the second gig is on node 1 - * physnode_map will contain: - * - * physnode_map[0-15] = 0; - * physnode_map[16-31] = 1; - * physnode_map[32- ] = -1; - */ -s8 physnode_map[MAX_SECTIONS] __read_mostly = { [0 ... (MAX_SECTIONS - 1)] = -1}; -EXPORT_SYMBOL(physnode_map); - -void memory_present(int nid, unsigned long start, unsigned long end) -{ - unsigned long pfn; - - printk(KERN_INFO "Node: %d, start_pfn: %lx, end_pfn: %lx\n", - nid, start, end); - printk(KERN_DEBUG " Setting physnode_map array to node %d for pfns:\n", nid); - printk(KERN_DEBUG " "); - start = round_down(start, PAGES_PER_SECTION); - end = round_up(end, PAGES_PER_SECTION); - for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) { - physnode_map[pfn / PAGES_PER_SECTION] = nid; - printk(KERN_CONT "%lx ", pfn); - } - printk(KERN_CONT "\n"); -} -#endif - extern unsigned long highend_pfn, highstart_pfn; void __init initmem_init(void) diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 843aa10a4cb6..da0fb17a1a36 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -448,13 +448,7 @@ static void __init pti_clone_user_shared(void) * the sp1 and sp2 slots. * * This is done for all possible CPUs during boot to ensure - * that it's propagated to all mms. If we were to add one of - * these mappings during CPU hotplug, we would need to take - * some measure to make sure that every mm that subsequently - * ran on that CPU would have the relevant PGD entry in its - * pagetables. The usual vmalloc_fault() mechanism would not - * work for page faults taken in entry_SYSCALL_64 before RSP - * is set up. + * that it's propagated to all mms. */ unsigned long va = (unsigned long)&per_cpu(cpu_tss_rw, cpu); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 66f96f21a7b6..f3fe261e5936 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -161,34 +161,6 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next, local_irq_restore(flags); } -static void sync_current_stack_to_mm(struct mm_struct *mm) -{ - unsigned long sp = current_stack_pointer; - pgd_t *pgd = pgd_offset(mm, sp); - - if (pgtable_l5_enabled()) { - if (unlikely(pgd_none(*pgd))) { - pgd_t *pgd_ref = pgd_offset_k(sp); - - set_pgd(pgd, *pgd_ref); - } - } else { - /* - * "pgd" is faked. The top level entries are "p4d"s, so sync - * the p4d. This compiles to approximately the same code as - * the 5-level case. - */ - p4d_t *p4d = p4d_offset(pgd, sp); - - if (unlikely(p4d_none(*p4d))) { - pgd_t *pgd_ref = pgd_offset_k(sp); - p4d_t *p4d_ref = p4d_offset(pgd_ref, sp); - - set_p4d(p4d, *p4d_ref); - } - } -} - static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next) { unsigned long next_tif = task_thread_info(next)->flags; @@ -377,15 +349,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, */ cond_ibpb(tsk); - if (IS_ENABLED(CONFIG_VMAP_STACK)) { - /* - * If our current stack is in vmalloc space and isn't - * mapped in the new pgd, we'll double-fault. Forcibly - * map it. - */ - sync_current_stack_to_mm(next); - } - /* * Stop remote flushes for the previous mm. * Skip kernel threads; we never send init_mm TLB flushing IPIs, diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 1aae5302501d..e966115d105c 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -62,12 +62,12 @@ static unsigned long efi_runtime, efi_nr_tables; unsigned long efi_fw_vendor, efi_config_table; static const efi_config_table_type_t arch_tables[] __initconst = { - {EFI_PROPERTIES_TABLE_GUID, "PROP", &prop_phys}, - {UGA_IO_PROTOCOL_GUID, "UGA", &uga_phys}, + {EFI_PROPERTIES_TABLE_GUID, &prop_phys, "PROP" }, + {UGA_IO_PROTOCOL_GUID, &uga_phys, "UGA" }, #ifdef CONFIG_X86_UV - {UV_SYSTEM_TABLE_GUID, "UVsystab", &uv_systab_phys}, + {UV_SYSTEM_TABLE_GUID, &uv_systab_phys, "UVsystab" }, #endif - {NULL_GUID, NULL, NULL}, + {}, }; static const unsigned long * const efi_tables[] = { diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S index 15da118f04f0..90380a17ab23 100644 --- a/arch/x86/platform/efi/efi_stub_64.S +++ b/arch/x86/platform/efi/efi_stub_64.S @@ -21,7 +21,7 @@ SYM_FUNC_START(__efi_call) mov %r8, %r9 mov %rcx, %r8 mov %rsi, %rcx - CALL_NOSPEC %rdi + CALL_NOSPEC rdi leave ret SYM_FUNC_END(__efi_call) diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index c60255da5a6c..4494589a288a 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -45,7 +45,8 @@ static s64 __uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, return ret; } -s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) +static s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, + u64 a5) { s64 ret; @@ -57,10 +58,9 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) return ret; } -EXPORT_SYMBOL_GPL(uv_bios_call); -s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, - u64 a4, u64 a5) +static s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, + u64 a4, u64 a5) { unsigned long bios_flags; s64 ret; @@ -77,18 +77,13 @@ s64 uv_bios_call_irqsave(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, return ret; } - long sn_partition_id; EXPORT_SYMBOL_GPL(sn_partition_id); long sn_coherency_id; -EXPORT_SYMBOL_GPL(sn_coherency_id); long sn_region_size; EXPORT_SYMBOL_GPL(sn_region_size); long system_serial_number; -EXPORT_SYMBOL_GPL(system_serial_number); int uv_type; -EXPORT_SYMBOL_GPL(uv_type); - s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, long *region, long *ssn) @@ -115,7 +110,6 @@ s64 uv_bios_get_sn_info(int fc, int *uvtype, long *partid, long *coher, *ssn = v1; return ret; } -EXPORT_SYMBOL_GPL(uv_bios_get_sn_info); int uv_bios_mq_watchlist_alloc(unsigned long addr, unsigned int mq_size, @@ -166,7 +160,6 @@ s64 uv_bios_freq_base(u64 clock_type, u64 *ticks_per_second) return uv_bios_call(UV_BIOS_FREQ_BASE, clock_type, (u64)ticks_per_second, 0, 0, 0); } -EXPORT_SYMBOL_GPL(uv_bios_freq_base); /* * uv_bios_set_legacy_vga_target - Set Legacy VGA I/O Target @@ -185,7 +178,6 @@ int uv_bios_set_legacy_vga_target(bool decode, int domain, int bus) return uv_bios_call(UV_BIOS_SET_LEGACY_VGA_TARGET, (u64)decode, (u64)domain, (u64)bus, 0, 0); } -EXPORT_SYMBOL_GPL(uv_bios_set_legacy_vga_target); int uv_bios_init(void) { diff --git a/arch/x86/platform/uv/uv_sysfs.c b/arch/x86/platform/uv/uv_sysfs.c index 62214731fea5..266773e2fb37 100644 --- a/arch/x86/platform/uv/uv_sysfs.c +++ b/arch/x86/platform/uv/uv_sysfs.c @@ -21,7 +21,7 @@ static ssize_t partition_id_show(struct kobject *kobj, static ssize_t coherence_id_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%ld\n", uv_partition_coherence_id()); + return snprintf(buf, PAGE_SIZE, "%ld\n", sn_coherency_id); } static struct kobj_attribute partition_id_attr = diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index aaff9ed7ff45..fc3b757afb2c 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -307,7 +307,7 @@ int hibernate_resume_nonboot_cpu_disable(void) if (ret) return ret; smp_ops.play_dead = resume_play_dead; - ret = disable_nonboot_cpus(); + ret = freeze_secondary_cpus(0); smp_ops.play_dead = play_dead; return ret; } diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h index 2a56cac64687..ff6bba2c8ab6 100644 --- a/arch/x86/um/asm/checksum.h +++ b/arch/x86/um/asm/checksum.h @@ -36,26 +36,6 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst, return csum_partial(dst, len, sum); } -/* - * the same as csum_partial, but copies from src while it - * checksums, and handles user-space pointer exceptions correctly, when needed. - * - * here even more important to align src and dst on a 32-bit (or even - * better 64-bit) boundary - */ - -static __inline__ -__wsum csum_partial_copy_from_user(const void __user *src, void *dst, - int len, __wsum sum, int *err_ptr) -{ - if (copy_from_user(dst, src, len)) { - *err_ptr = -EFAULT; - return (__force __wsum)-1; - } - - return csum_partial(dst, len, sum); -} - /** * csum_fold - Fold and invert a 32bit checksum. * sum: 32bit unfolded sum diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index 1abe455d926a..205a9bc981b0 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -29,7 +29,7 @@ static efi_system_table_t efi_systab_xen __initdata = { .fw_vendor = EFI_INVALID_TABLE_ADDR, /* Initialized later. */ .fw_revision = 0, /* Initialized later. */ .con_in_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ - .con_in = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ + .con_in = NULL, /* Not used under Xen. */ .con_out_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ .con_out = NULL, /* Not used under Xen. */ .stderr_handle = EFI_INVALID_TABLE_ADDR, /* Not used under Xen. */ diff --git a/arch/xtensa/include/asm/checksum.h b/arch/xtensa/include/asm/checksum.h index 8b687176ad72..d8292cc9ebdf 100644 --- a/arch/xtensa/include/asm/checksum.h +++ b/arch/xtensa/include/asm/checksum.h @@ -44,8 +44,6 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst, /* * Note: when you get a NULL pointer exception here this means someone * passed in an incorrect kernel address to one of these functions. - * - * If you use these functions directly please don't forget the access_ok(). */ static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, @@ -54,12 +52,17 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst, return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL); } +#define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER static inline -__wsum csum_partial_copy_from_user(const void __user *src, void *dst, +__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr) { - return csum_partial_copy_generic((__force const void *)src, dst, + if (access_ok(dst, len)) + return csum_partial_copy_generic((__force const void *)src, dst, len, sum, err_ptr, NULL); + if (len) + *err_ptr = -EFAULT; + return sum; } /* diff --git a/arch/xtensa/include/asm/uaccess.h b/arch/xtensa/include/asm/uaccess.h index 47b7702aaa40..e57f0d0a88d8 100644 --- a/arch/xtensa/include/asm/uaccess.h +++ b/arch/xtensa/include/asm/uaccess.h @@ -84,7 +84,7 @@ extern long __put_user_bad(void); #define __put_user_check(x, ptr, size) \ ({ \ long __pu_err = -EFAULT; \ - __typeof__(*(ptr)) *__pu_addr = (ptr); \ + __typeof__(*(ptr)) __user *__pu_addr = (ptr); \ if (access_ok(__pu_addr, size)) \ __put_user_size((x), __pu_addr, (size), __pu_err); \ __pu_err; \ @@ -180,11 +180,11 @@ __asm__ __volatile__( \ #define __get_user_check(x, ptr, size) \ ({ \ long __gu_err = -EFAULT; \ - const __typeof__(*(ptr)) *__gu_addr = (ptr); \ + const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \ if (access_ok(__gu_addr, size)) \ __get_user_size((x), __gu_addr, (size), __gu_err); \ else \ - (x) = 0; \ + (x) = (__typeof__(*(ptr)))0; \ __gu_err; \ }) @@ -202,13 +202,15 @@ do { \ u64 __x; \ if (unlikely(__copy_from_user(&__x, ptr, 8))) { \ retval = -EFAULT; \ - (x) = 0; \ + (x) = (__typeof__(*(ptr)))0; \ } else { \ - (x) = *(__force __typeof__((ptr)))&__x; \ + (x) = *(__force __typeof__(*(ptr)) *)&__x; \ } \ break; \ } \ - default: (x) = 0; __get_user_bad(); \ + default: \ + (x) = (__typeof__(*(ptr)))0; \ + __get_user_bad(); \ } \ } while (0) @@ -270,15 +272,15 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n) */ static inline unsigned long -__xtensa_clear_user(void *addr, unsigned long size) +__xtensa_clear_user(void __user *addr, unsigned long size) { - if (!__memset(addr, 0, size)) + if (!__memset((void __force *)addr, 0, size)) return size; return 0; } static inline unsigned long -clear_user(void *addr, unsigned long size) +clear_user(void __user *addr, unsigned long size) { if (access_ok(addr, size)) return __xtensa_clear_user(addr, size); @@ -290,10 +292,10 @@ clear_user(void *addr, unsigned long size) #ifndef CONFIG_GENERIC_STRNCPY_FROM_USER -extern long __strncpy_user(char *, const char *, long); +extern long __strncpy_user(char *dst, const char __user *src, long count); static inline long -strncpy_from_user(char *dst, const char *src, long count) +strncpy_from_user(char *dst, const char __user *src, long count) { if (access_ok(src, 1)) return __strncpy_user(dst, src, count); @@ -306,13 +308,11 @@ long strncpy_from_user(char *dst, const char *src, long count); /* * Return the size of a string (including the ending 0!) */ -extern long __strnlen_user(const char *, long); +extern long __strnlen_user(const char __user *str, long len); -static inline long strnlen_user(const char *str, long len) +static inline long strnlen_user(const char __user *str, long len) { - unsigned long top = __kernel_ok ? ~0UL : TASK_SIZE - 1; - - if ((unsigned long)str > top) + if (!access_ok(str, 1)) return 0; return __strnlen_user(str, len); } diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 06fbb0a171f1..fae33ddcaebb 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -959,14 +959,14 @@ ENDPROC(unrecoverable_exception) * of the proper size instead. * * This algorithm simply backs out the register changes started by the user - * excpetion handler, makes it appear that we have started a window underflow + * exception handler, makes it appear that we have started a window underflow * by rotating the window back and then setting the old window base (OWB) in * the 'ps' register with the rolled back window base. The 'movsp' instruction * will be re-executed and this time since the next window frames is in the * active AR registers it won't cause an exception. * * If the WindowUnderflow code gets a TLB miss the page will get mapped - * the the partial windeowUnderflow will be handeled in the double exception + * the partial WindowUnderflow will be handled in the double exception * handler. * * Entry condition: diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 85a9ab1bc04d..69d0d73876b3 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -408,3 +408,4 @@ 435 common clone3 sys_clone3 437 common openat2 sys_openat2 438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 |