diff options
Diffstat (limited to 'arch')
447 files changed, 11503 insertions, 3673 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index aff2746c8af2..63c5d6a2022b 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -34,6 +34,9 @@ config ARCH_HAS_SUBPAGE_FAULTS config HOTPLUG_SMT bool +config SMT_NUM_THREADS_DYNAMIC + bool + # Selected by HOTPLUG_CORE_SYNC_DEAD or HOTPLUG_CORE_SYNC_FULL config HOTPLUG_CORE_SYNC bool diff --git a/arch/alpha/include/asm/processor.h b/arch/alpha/include/asm/processor.h index 714abe494e5f..55bb1c09fd39 100644 --- a/arch/alpha/include/asm/processor.h +++ b/arch/alpha/include/asm/processor.h @@ -47,12 +47,6 @@ unsigned long __get_wchan(struct task_struct *p); #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH - -#ifndef CONFIG_SMP -/* Nothing to prefetch. */ -#define spin_lock_prefetch(lock) do { } while (0) -#endif extern inline void prefetch(const void *ptr) { @@ -64,11 +58,4 @@ extern inline void prefetchw(const void *ptr) __builtin_prefetch(ptr, 1, 3); } -#ifdef CONFIG_SMP -extern inline void spin_lock_prefetch(const void *ptr) -{ - __builtin_prefetch(ptr, 1, 3); -} -#endif - #endif /* __ASM_ALPHA_PROCESSOR_H */ diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index d98701ee36c6..5db88b627439 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -97,7 +97,7 @@ struct osf_dirent { unsigned int d_ino; unsigned short d_reclen; unsigned short d_namlen; - char d_name[1]; + char d_name[]; }; struct osf_dirent_callback { diff --git a/arch/alpha/kernel/setup.c b/arch/alpha/kernel/setup.c index b650ff1cb022..3d7473531ab1 100644 --- a/arch/alpha/kernel/setup.c +++ b/arch/alpha/kernel/setup.c @@ -385,8 +385,7 @@ setup_memory(void *kernel_end) #endif /* CONFIG_BLK_DEV_INITRD */ } -int __init -page_is_ram(unsigned long pfn) +int page_is_ram(unsigned long pfn) { struct memclust_struct * cluster; struct memdesc_struct * memdesc; diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 1f13995d00d7..ad37569d0507 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -491,3 +491,4 @@ 559 common futex_waitv sys_futex_waitv 560 common set_mempolicy_home_node sys_ni_syscall 561 common cachestat sys_cachestat +562 common fchmodat2 sys_fchmodat2 diff --git a/arch/arc/include/asm/page.h b/arch/arc/include/asm/page.h index e43fe27ec54d..02b53ad811fb 100644 --- a/arch/arc/include/asm/page.h +++ b/arch/arc/include/asm/page.h @@ -108,7 +108,7 @@ extern int pfn_valid(unsigned long pfn); #else /* CONFIG_HIGHMEM */ -#define ARCH_PFN_OFFSET virt_to_pfn(CONFIG_LINUX_RAM_BASE) +#define ARCH_PFN_OFFSET virt_to_pfn((void *)CONFIG_LINUX_RAM_BASE) #endif /* CONFIG_HIGHMEM */ diff --git a/arch/arm/boot/dts/arm/integratorap.dts b/arch/arm/boot/dts/arm/integratorap.dts index 5b52d75bc6be..d9927d3181dc 100644 --- a/arch/arm/boot/dts/arm/integratorap.dts +++ b/arch/arm/boot/dts/arm/integratorap.dts @@ -158,7 +158,7 @@ valid-mask = <0x003fffff>; }; - pci: pciv3@62000000 { + pci: pci@62000000 { compatible = "arm,integrator-ap-pci", "v3,v360epc-pci"; device_type = "pci"; #interrupt-cells = <1>; diff --git a/arch/arm/boot/dts/microchip/sam9x60.dtsi b/arch/arm/boot/dts/microchip/sam9x60.dtsi index 8b53997675e7..73d570a17269 100644 --- a/arch/arm/boot/dts/microchip/sam9x60.dtsi +++ b/arch/arm/boot/dts/microchip/sam9x60.dtsi @@ -172,7 +172,7 @@ status = "disabled"; uart4: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <13 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -240,7 +240,7 @@ status = "disabled"; uart5: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; atmel,usart-mode = <AT91_USART_MODE_SERIAL>; interrupts = <14 IRQ_TYPE_LEVEL_HIGH 7>; @@ -370,7 +370,7 @@ status = "disabled"; uart11: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <32 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -419,7 +419,7 @@ status = "disabled"; uart12: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <33 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -576,7 +576,7 @@ status = "disabled"; uart6: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <9 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -625,7 +625,7 @@ status = "disabled"; uart7: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <10 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -674,7 +674,7 @@ status = "disabled"; uart8: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <11 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -723,7 +723,7 @@ status = "disabled"; uart0: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <5 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -791,7 +791,7 @@ status = "disabled"; uart1: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <6 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -859,7 +859,7 @@ status = "disabled"; uart2: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <7 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -927,7 +927,7 @@ status = "disabled"; uart3: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <8 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -1050,7 +1050,7 @@ status = "disabled"; uart9: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <15 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 @@ -1099,7 +1099,7 @@ status = "disabled"; uart10: serial@200 { - compatible = "microchip,sam9x60-dbgu", "microchip,sam9x60-usart", "atmel,at91sam9260-dbgu", "atmel,at91sam9260-usart"; + compatible = "microchip,sam9x60-usart", "atmel,at91sam9260-usart"; reg = <0x200 0x200>; interrupts = <16 IRQ_TYPE_LEVEL_HIGH 7>; dmas = <&dma0 diff --git a/arch/arm/boot/dts/nspire/nspire.dtsi b/arch/arm/boot/dts/nspire/nspire.dtsi index bb240e6a3a6f..088bcc38589f 100644 --- a/arch/arm/boot/dts/nspire/nspire.dtsi +++ b/arch/arm/boot/dts/nspire/nspire.dtsi @@ -161,7 +161,7 @@ }; watchdog: watchdog@90060000 { - compatible = "arm,amba-primecell"; + compatible = "arm,primecell"; reg = <0x90060000 0x1000>; interrupts = <3>; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx53-sk-imx53.dts b/arch/arm/boot/dts/nxp/imx/imx53-sk-imx53.dts index 103e73176e47..1a00d290092a 100644 --- a/arch/arm/boot/dts/nxp/imx/imx53-sk-imx53.dts +++ b/arch/arm/boot/dts/nxp/imx/imx53-sk-imx53.dts @@ -60,6 +60,16 @@ status = "okay"; }; +&cpu0 { + /* CPU rated to 800 MHz, not the default 1.2GHz. */ + operating-points = < + /* kHz uV */ + 166666 850000 + 400000 900000 + 800000 1050000 + >; +}; + &ecspi1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_ecspi1>; diff --git a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-mira.dtsi b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-mira.dtsi index 1a599c294ab8..1ca4d219609f 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-mira.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6qdl-phytec-mira.dtsi @@ -182,7 +182,7 @@ pinctrl-0 = <&pinctrl_rtc_int>; reg = <0x68>; interrupt-parent = <&gpio7>; - interrupts = <8 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <8 IRQ_TYPE_LEVEL_LOW>; status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx6sll.dtsi b/arch/arm/boot/dts/nxp/imx/imx6sll.dtsi index 2873369a57c0..3659fd5ecfa6 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6sll.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6sll.dtsi @@ -552,7 +552,7 @@ reg = <0x020ca000 0x1000>; interrupts = <GIC_SPI 41 IRQ_TYPE_LEVEL_HIGH>; clocks = <&clks IMX6SLL_CLK_USBPHY2>; - phy-reg_3p0-supply = <®_3p0>; + phy-3p0-supply = <®_3p0>; fsl,anatop = <&anatop>; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi b/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi index 3a4308666552..a05069d49cb8 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6sx.dtsi @@ -863,7 +863,6 @@ reg = <0>; ldb_from_lcdif1: endpoint { - remote-endpoint = <&lcdif1_to_ldb>; }; }; @@ -1010,6 +1009,8 @@ <&clks IMX6SX_CLK_USDHC1>; clock-names = "ipg", "ahb", "per"; bus-width = <4>; + fsl,tuning-start-tap = <20>; + fsl,tuning-step= <2>; status = "disabled"; }; @@ -1022,6 +1023,8 @@ <&clks IMX6SX_CLK_USDHC2>; clock-names = "ipg", "ahb", "per"; bus-width = <4>; + fsl,tuning-start-tap = <20>; + fsl,tuning-step= <2>; status = "disabled"; }; @@ -1034,6 +1037,8 @@ <&clks IMX6SX_CLK_USDHC3>; clock-names = "ipg", "ahb", "per"; bus-width = <4>; + fsl,tuning-start-tap = <20>; + fsl,tuning-step= <2>; status = "disabled"; }; @@ -1309,11 +1314,8 @@ power-domains = <&pd_disp>; status = "disabled"; - ports { - port { - lcdif1_to_ldb: endpoint { - remote-endpoint = <&ldb_from_lcdif1>; - }; + port { + lcdif1_to_ldb: endpoint { }; }; }; diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi index 54026c2c93fa..6ffb428dc939 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi @@ -1184,6 +1184,8 @@ <&clks IMX7D_USDHC1_ROOT_CLK>; clock-names = "ipg", "ahb", "per"; bus-width = <4>; + fsl,tuning-step = <2>; + fsl,tuning-start-tap = <20>; status = "disabled"; }; @@ -1196,6 +1198,8 @@ <&clks IMX7D_USDHC2_ROOT_CLK>; clock-names = "ipg", "ahb", "per"; bus-width = <4>; + fsl,tuning-step = <2>; + fsl,tuning-start-tap = <20>; status = "disabled"; }; @@ -1208,6 +1212,8 @@ <&clks IMX7D_USDHC3_ROOT_CLK>; clock-names = "ipg", "ahb", "per"; bus-width = <4>; + fsl,tuning-step = <2>; + fsl,tuning-start-tap = <20>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi b/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi index b958607c71dc..96451c8a815c 100644 --- a/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi +++ b/arch/arm/boot/dts/ti/omap/am335x-bone-common.dtsi @@ -145,6 +145,8 @@ /* MDIO */ AM33XX_PADCONF(AM335X_PIN_MDIO, PIN_INPUT_PULLUP | SLEWCTRL_FAST, MUX_MODE0) AM33XX_PADCONF(AM335X_PIN_MDC, PIN_OUTPUT_PULLUP, MUX_MODE0) + /* Added to support GPIO controlled PHY reset */ + AM33XX_PADCONF(AM335X_PIN_UART0_CTSN, PIN_OUTPUT_PULLUP, MUX_MODE7) >; }; @@ -153,6 +155,8 @@ /* MDIO reset value */ AM33XX_PADCONF(AM335X_PIN_MDIO, PIN_INPUT_PULLDOWN, MUX_MODE7) AM33XX_PADCONF(AM335X_PIN_MDC, PIN_INPUT_PULLDOWN, MUX_MODE7) + /* Added to support GPIO controlled PHY reset */ + AM33XX_PADCONF(AM335X_PIN_UART0_CTSN, PIN_INPUT_PULLDOWN, MUX_MODE7) >; }; @@ -215,6 +219,7 @@ baseboard_eeprom: baseboard_eeprom@50 { compatible = "atmel,24c256"; reg = <0x50>; + vcc-supply = <&ldo4_reg>; #address-cells = <1>; #size-cells = <1>; @@ -377,6 +382,10 @@ ethphy0: ethernet-phy@0 { reg = <0>; + /* Support GPIO reset on revision C3 boards */ + reset-gpios = <&gpio1 8 GPIO_ACTIVE_LOW>; + reset-assert-us = <300>; + reset-deassert-us = <6500>; }; }; diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h index dfeed440254a..fe4326d938c1 100644 --- a/arch/arm/include/asm/syscall.h +++ b/arch/arm/include/asm/syscall.h @@ -25,6 +25,9 @@ static inline int syscall_get_nr(struct task_struct *task, if (IS_ENABLED(CONFIG_AEABI) && !IS_ENABLED(CONFIG_OABI_COMPAT)) return task_thread_info(task)->abi_syscall; + if (task_thread_info(task)->abi_syscall == -1) + return -1; + return task_thread_info(task)->abi_syscall & __NR_SYSCALL_MASK; } diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index bcc4c9ec3aa4..5c31e9de7a60 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -90,6 +90,7 @@ slow_work_pending: cmp r0, #0 beq no_work_pending movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE) + str scno, [tsk, #TI_ABI_SYSCALL] @ make sure tracers see update ldmia sp, {r0 - r6} @ have to reload r0 - r6 b local_restart @ ... and off we go ENDPROC(ret_fast_syscall) diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 054e9199f30d..dc0fb7a81371 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -626,7 +626,7 @@ int hw_breakpoint_arch_parse(struct perf_event *bp, hw->address &= ~alignment_mask; hw->ctrl.len <<= offset; - if (is_default_overflow_handler(bp)) { + if (uses_default_overflow_handler(bp)) { /* * Mismatch breakpoints are required for single-stepping * breakpoints. @@ -798,7 +798,7 @@ static void watchpoint_handler(unsigned long addr, unsigned int fsr, * Otherwise, insert a temporary mismatch breakpoint so that * we can single-step over the watchpoint trigger. */ - if (!is_default_overflow_handler(wp)) + if (!uses_default_overflow_handler(wp)) continue; step: enable_single_step(wp, instruction_pointer(regs)); @@ -811,7 +811,7 @@ step: info->trigger = addr; pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); perf_bp_event(wp, regs); - if (is_default_overflow_handler(wp)) + if (uses_default_overflow_handler(wp)) enable_single_step(wp, instruction_pointer(regs)); } @@ -886,7 +886,7 @@ static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) info->trigger = addr; pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); - if (is_default_overflow_handler(bp)) + if (uses_default_overflow_handler(bp)) enable_single_step(bp, addr); goto unlock; } diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 2d8e2516906b..fef32d73f912 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -783,8 +783,9 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_SET_SYSCALL: - task_thread_info(child)->abi_syscall = data & - __NR_SYSCALL_MASK; + if (data != -1) + data &= __NR_SYSCALL_MASK; + task_thread_info(child)->abi_syscall = data; ret = 0; break; diff --git a/arch/arm/mach-pxa/sharpsl_pm.h b/arch/arm/mach-pxa/sharpsl_pm.h index 20e4cab64d85..623167f30ec2 100644 --- a/arch/arm/mach-pxa/sharpsl_pm.h +++ b/arch/arm/mach-pxa/sharpsl_pm.h @@ -105,5 +105,4 @@ void sharpsl_pm_led(int val); #define MAX1111_ACIN_VOLT 6u int sharpsl_pm_pxa_read_max1111(int channel); -void corgi_lcd_limit_intensity(int limit); #endif diff --git a/arch/arm/mach-pxa/spitz_pm.c b/arch/arm/mach-pxa/spitz_pm.c index 1c021cef965f..8bc4ea51a0c1 100644 --- a/arch/arm/mach-pxa/spitz_pm.c +++ b/arch/arm/mach-pxa/spitz_pm.c @@ -15,6 +15,7 @@ #include <linux/interrupt.h> #include <linux/platform_device.h> #include <linux/apm-emulation.h> +#include <linux/spi/corgi_lcd.h> #include <asm/irq.h> #include <asm/mach-types.h> diff --git a/arch/arm/mach-zynq/pm.c b/arch/arm/mach-zynq/pm.c index 8ba450ab559c..61ad965ef3ac 100644 --- a/arch/arm/mach-zynq/pm.c +++ b/arch/arm/mach-zynq/pm.c @@ -8,8 +8,8 @@ */ #include <linux/io.h> +#include <linux/of.h> #include <linux/of_address.h> -#include <linux/of_device.h> #include "common.h" /* register offsets */ diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 8ebed8a13874..c572d6c3dee0 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -465,3 +465,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a7a88322bf46..c060a26dec28 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1792,9 +1792,6 @@ config ARM64_PAN The feature is detected at runtime, and will remain as a 'nop' instruction if the cpu does not implement the feature. -config AS_HAS_LDAPR - def_bool $(as-instr,.arch_extension rcpc) - config AS_HAS_LSE_ATOMICS def_bool $(as-instr,.arch_extension lse) @@ -1932,6 +1929,9 @@ config AS_HAS_ARMV8_3 config AS_HAS_CFI_NEGATE_RA_STATE def_bool $(as-instr,.cfi_startproc\n.cfi_negate_ra_state\n.cfi_endproc\n) +config AS_HAS_LDAPR + def_bool $(as-instr,.arch_extension rcpc) + endmenu # "ARMv8.3 architectural features" menu "ARMv8.4 architectural features" diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts index 38ae674f2f02..3037f58057c9 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk.dts @@ -145,7 +145,7 @@ status = "okay"; clock-frequency = <100000>; i2c-sda-falling-time-ns = <890>; /* hcnt */ - i2c-sdl-falling-time-ns = <890>; /* lcnt */ + i2c-scl-falling-time-ns = <890>; /* lcnt */ pinctrl-names = "default", "gpio"; pinctrl-0 = <&i2c1_pmx_func>; diff --git a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts index ede99dcc0558..f4cf30bac557 100644 --- a/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts +++ b/arch/arm64/boot/dts/altera/socfpga_stratix10_socdk_nand.dts @@ -141,7 +141,7 @@ status = "okay"; clock-frequency = <100000>; i2c-sda-falling-time-ns = <890>; /* hcnt */ - i2c-sdl-falling-time-ns = <890>; /* lcnt */ + i2c-scl-falling-time-ns = <890>; /* lcnt */ adc@14 { compatible = "lltc,ltc2497"; diff --git a/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi b/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi deleted file mode 120000 index 68fd0f8f1dee..000000000000 --- a/arch/arm64/boot/dts/arm/vexpress-v2m-rs1.dtsi +++ /dev/null @@ -1 +0,0 @@ -../../../../arm/boot/dts/vexpress-v2m-rs1.dtsi
\ No newline at end of file diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts b/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts index 03e7679217b2..479948f8a4b7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-phyboard-polis-rdk.dts @@ -141,7 +141,7 @@ }; &gpio1 { - gpio-line-names = "nINT_ETHPHY", "LED_RED", "WDOG_INT", "X_RTC_INT", + gpio-line-names = "", "LED_RED", "WDOG_INT", "X_RTC_INT", "", "", "", "RESET_ETHPHY", "CAN_nINT", "CAN_EN", "nENABLE_FLATLINK", "", "USB_OTG_VBUS_EN", "", "LED_GREEN", "LED_BLUE"; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi index 92616bc4f71f..847f08537b48 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm-phycore-som.dtsi @@ -111,7 +111,7 @@ }; &gpio1 { - gpio-line-names = "nINT_ETHPHY", "", "WDOG_INT", "X_RTC_INT", + gpio-line-names = "", "", "WDOG_INT", "X_RTC_INT", "", "", "", "RESET_ETHPHY", "", "", "nENABLE_FLATLINK"; }; @@ -210,7 +210,7 @@ }; }; - reg_vdd_gpu: buck3 { + reg_vdd_vpu: buck3 { regulator-always-on; regulator-boot-on; regulator-max-microvolt = <1000000>; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts index 6f26914602c8..07b07dc954fd 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7903.dts @@ -567,6 +567,10 @@ status = "okay"; }; +&disp_blk_ctrl { + status = "disabled"; +}; + &pgc_mipi { status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts index 93088fa1c3b9..d5b716855812 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts +++ b/arch/arm64/boot/dts/freescale/imx8mm-venice-gw7904.dts @@ -628,6 +628,10 @@ status = "okay"; }; +&disp_blk_ctrl { + status = "disabled"; +}; + &pgc_mipi { status = "disabled"; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mm.dtsi b/arch/arm64/boot/dts/freescale/imx8mm.dtsi index d6b36f04f3dc..1a647d4072ba 100644 --- a/arch/arm64/boot/dts/freescale/imx8mm.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mm.dtsi @@ -1221,10 +1221,9 @@ compatible = "fsl,imx8mm-mipi-csi2"; reg = <0x32e30000 0x1000>; interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; - assigned-clocks = <&clk IMX8MM_CLK_CSI1_CORE>, - <&clk IMX8MM_CLK_CSI1_PHY_REF>; - assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_1000M>, - <&clk IMX8MM_SYS_PLL2_1000M>; + assigned-clocks = <&clk IMX8MM_CLK_CSI1_CORE>; + assigned-clock-parents = <&clk IMX8MM_SYS_PLL2_1000M>; + clock-frequency = <333000000>; clocks = <&clk IMX8MM_CLK_DISP_APB_ROOT>, <&clk IMX8MM_CLK_CSI1_ROOT>, diff --git a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi index d3a67109d55b..b8946edf317b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn-var-som.dtsi @@ -358,7 +358,7 @@ MX8MN_IOMUXC_ENET_RXC_ENET1_RGMII_RXC 0x91 MX8MN_IOMUXC_ENET_RX_CTL_ENET1_RGMII_RX_CTL 0x91 MX8MN_IOMUXC_ENET_TX_CTL_ENET1_RGMII_TX_CTL 0x1f - MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x19 + MX8MN_IOMUXC_GPIO1_IO09_GPIO1_IO9 0x159 >; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mn.dtsi b/arch/arm64/boot/dts/freescale/imx8mn.dtsi index 9869fe7652fc..aa38dd6dc9ba 100644 --- a/arch/arm64/boot/dts/freescale/imx8mn.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mn.dtsi @@ -1175,10 +1175,8 @@ compatible = "fsl,imx8mm-mipi-csi2"; reg = <0x32e30000 0x1000>; interrupts = <GIC_SPI 17 IRQ_TYPE_LEVEL_HIGH>; - assigned-clocks = <&clk IMX8MN_CLK_CAMERA_PIXEL>, - <&clk IMX8MN_CLK_CSI1_PHY_REF>; - assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_1000M>, - <&clk IMX8MN_SYS_PLL2_1000M>; + assigned-clocks = <&clk IMX8MN_CLK_CAMERA_PIXEL>; + assigned-clock-parents = <&clk IMX8MN_SYS_PLL2_1000M>; assigned-clock-rates = <333000000>; clock-frequency = <333000000>; clocks = <&clk IMX8MN_CLK_DISP_APB_ROOT>, diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 1a2d2c04db32..01eec424f7f7 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -772,7 +772,7 @@ <&clk IMX8MQ_SYS1_PLL_800M>, <&clk IMX8MQ_VPU_PLL>; assigned-clock-rates = <600000000>, - <600000000>, + <300000000>, <800000000>, <0>; }; diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 8643612ace8c..1d8dd14b65cf 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -340,7 +340,7 @@ anatop: anatop@44480000 { compatible = "fsl,imx93-anatop", "syscon"; - reg = <0x44480000 0x10000>; + reg = <0x44480000 0x2000>; }; adc1: adc@44530000 { diff --git a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts index 9022ad726741..a9e7b832c18c 100644 --- a/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts +++ b/arch/arm64/boot/dts/qcom/qrb5165-rb5.dts @@ -121,7 +121,7 @@ }; }; - pm8150l-thermal { + pm8150l-pcb-thermal { polling-delay-passive = <0>; polling-delay = <0>; thermal-sensors = <&pm8150l_adc_tm 1>; diff --git a/arch/arm64/boot/dts/qcom/sa8775p-ride.dts b/arch/arm64/boot/dts/qcom/sa8775p-ride.dts index ab767cfa51ff..26f5a4e0ffed 100644 --- a/arch/arm64/boot/dts/qcom/sa8775p-ride.dts +++ b/arch/arm64/boot/dts/qcom/sa8775p-ride.dts @@ -153,8 +153,8 @@ vreg_l4c: ldo4 { regulator-name = "vreg_l4c"; - regulator-min-microvolt = <1100000>; - regulator-max-microvolt = <1300000>; + regulator-min-microvolt = <1200000>; + regulator-max-microvolt = <1200000>; regulator-initial-mode = <RPMH_REGULATOR_MODE_HPM>; /* * FIXME: This should have regulator-allow-set-load but diff --git a/arch/arm64/boot/dts/qcom/sc7180.dtsi b/arch/arm64/boot/dts/qcom/sc7180.dtsi index e25dc2bb52a7..06df931d8cad 100644 --- a/arch/arm64/boot/dts/qcom/sc7180.dtsi +++ b/arch/arm64/boot/dts/qcom/sc7180.dtsi @@ -3120,8 +3120,8 @@ reg = <0 0x0ae94400 0 0x200>, <0 0x0ae94600 0 0x280>, <0 0x0ae94a00 0 0x1e0>; - reg-names = "dsi0_phy", - "dsi0_phy_lane", + reg-names = "dsi_phy", + "dsi_phy_lane", "dsi_pll"; #clock-cells = <1>; diff --git a/arch/arm64/boot/dts/qcom/sc8180x.dtsi b/arch/arm64/boot/dts/qcom/sc8180x.dtsi index d3ae18535636..be78a933d8eb 100644 --- a/arch/arm64/boot/dts/qcom/sc8180x.dtsi +++ b/arch/arm64/boot/dts/qcom/sc8180x.dtsi @@ -3561,7 +3561,7 @@ }; osm_l3: interconnect@18321000 { - compatible = "qcom,sc8180x-osm-l3"; + compatible = "qcom,sc8180x-osm-l3", "qcom,osm-l3"; reg = <0 0x18321000 0 0x1400>; clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>; diff --git a/arch/arm64/boot/dts/qcom/sm8150.dtsi b/arch/arm64/boot/dts/qcom/sm8150.dtsi index 18c822abdb88..b46e55bb8bde 100644 --- a/arch/arm64/boot/dts/qcom/sm8150.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8150.dtsi @@ -56,7 +56,7 @@ qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>; + <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>; power-domains = <&CPU_PD0>; power-domain-names = "psci"; #cooling-cells = <2>; @@ -85,7 +85,7 @@ qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>; + <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>; power-domains = <&CPU_PD1>; power-domain-names = "psci"; #cooling-cells = <2>; @@ -109,7 +109,7 @@ qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>; + <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>; power-domains = <&CPU_PD2>; power-domain-names = "psci"; #cooling-cells = <2>; @@ -133,7 +133,7 @@ qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>; + <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>; power-domains = <&CPU_PD3>; power-domain-names = "psci"; #cooling-cells = <2>; @@ -157,7 +157,7 @@ qcom,freq-domain = <&cpufreq_hw 1>; operating-points-v2 = <&cpu4_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>; + <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>; power-domains = <&CPU_PD4>; power-domain-names = "psci"; #cooling-cells = <2>; @@ -181,7 +181,7 @@ qcom,freq-domain = <&cpufreq_hw 1>; operating-points-v2 = <&cpu4_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>; + <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>; power-domains = <&CPU_PD5>; power-domain-names = "psci"; #cooling-cells = <2>; @@ -205,7 +205,7 @@ qcom,freq-domain = <&cpufreq_hw 1>; operating-points-v2 = <&cpu4_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>; + <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>; power-domains = <&CPU_PD6>; power-domain-names = "psci"; #cooling-cells = <2>; @@ -229,7 +229,7 @@ qcom,freq-domain = <&cpufreq_hw 2>; operating-points-v2 = <&cpu7_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&osm_l3 MASTER_OSM_L3_APPS 0 &osm_l3 SLAVE_OSM_L3 0>; + <&osm_l3 MASTER_OSM_L3_APPS &osm_l3 SLAVE_OSM_L3>; power-domains = <&CPU_PD7>; power-domain-names = "psci"; #cooling-cells = <2>; @@ -4342,7 +4342,7 @@ clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>; clock-names = "xo", "alternate"; - #interconnect-cells = <2>; + #interconnect-cells = <1>; }; cpufreq_hw: cpufreq@18323000 { diff --git a/arch/arm64/boot/dts/qcom/sm8250.dtsi b/arch/arm64/boot/dts/qcom/sm8250.dtsi index 83ab6de459bc..1efa07f2caff 100644 --- a/arch/arm64/boot/dts/qcom/sm8250.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8250.dtsi @@ -107,7 +107,7 @@ qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>; + <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>; #cooling-cells = <2>; L2_0: l2-cache { compatible = "cache"; @@ -138,7 +138,7 @@ qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>; + <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>; #cooling-cells = <2>; L2_100: l2-cache { compatible = "cache"; @@ -163,7 +163,7 @@ qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>; + <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>; #cooling-cells = <2>; L2_200: l2-cache { compatible = "cache"; @@ -188,7 +188,7 @@ qcom,freq-domain = <&cpufreq_hw 0>; operating-points-v2 = <&cpu0_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>; + <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>; #cooling-cells = <2>; L2_300: l2-cache { compatible = "cache"; @@ -213,7 +213,7 @@ qcom,freq-domain = <&cpufreq_hw 1>; operating-points-v2 = <&cpu4_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>; + <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>; #cooling-cells = <2>; L2_400: l2-cache { compatible = "cache"; @@ -238,7 +238,7 @@ qcom,freq-domain = <&cpufreq_hw 1>; operating-points-v2 = <&cpu4_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>; + <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>; #cooling-cells = <2>; L2_500: l2-cache { compatible = "cache"; @@ -263,7 +263,7 @@ qcom,freq-domain = <&cpufreq_hw 1>; operating-points-v2 = <&cpu4_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>; + <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>; #cooling-cells = <2>; L2_600: l2-cache { compatible = "cache"; @@ -288,7 +288,7 @@ qcom,freq-domain = <&cpufreq_hw 2>; operating-points-v2 = <&cpu7_opp_table>; interconnects = <&gem_noc MASTER_AMPSS_M0 0 &mc_virt SLAVE_EBI_CH0 0>, - <&epss_l3 MASTER_OSM_L3_APPS 0 &epss_l3 SLAVE_OSM_L3 0>; + <&epss_l3 MASTER_OSM_L3_APPS &epss_l3 SLAVE_OSM_L3>; #cooling-cells = <2>; L2_700: l2-cache { compatible = "cache"; @@ -5679,7 +5679,7 @@ clocks = <&rpmhcc RPMH_CXO_CLK>, <&gcc GPLL0>; clock-names = "xo", "alternate"; - #interconnect-cells = <2>; + #interconnect-cells = <1>; }; cpufreq_hw: cpufreq@18591000 { diff --git a/arch/arm64/boot/dts/qcom/sm8350.dtsi b/arch/arm64/boot/dts/qcom/sm8350.dtsi index 88ef478cb5cc..ec451c616f3e 100644 --- a/arch/arm64/boot/dts/qcom/sm8350.dtsi +++ b/arch/arm64/boot/dts/qcom/sm8350.dtsi @@ -1744,6 +1744,8 @@ qcom,controlled-remotely; iommus = <&apps_smmu 0x594 0x0011>, <&apps_smmu 0x596 0x0011>; + /* FIXME: Probing BAM DMA causes some abort and system hang */ + status = "fail"; }; crypto: crypto@1dfa000 { @@ -1755,6 +1757,8 @@ <&apps_smmu 0x596 0x0011>; interconnects = <&aggre2_noc MASTER_CRYPTO 0 &mc_virt SLAVE_EBI1 0>; interconnect-names = "memory"; + /* FIXME: dependency BAM DMA is disabled */ + status = "disabled"; }; ipa: ipa@1e40000 { diff --git a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi index 232910e07444..66f68fc2b241 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g044.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g044.dtsi @@ -223,20 +223,20 @@ <GIC_SPI 212 IRQ_TYPE_EDGE_RISING>, <GIC_SPI 213 IRQ_TYPE_EDGE_RISING>; interrupt-names = "tgia0", "tgib0", "tgic0", "tgid0", - "tgiv0", "tgie0", "tgif0", - "tgia1", "tgib1", "tgiv1", "tgiu1", - "tgia2", "tgib2", "tgiv2", "tgiu2", + "tciv0", "tgie0", "tgif0", + "tgia1", "tgib1", "tciv1", "tciu1", + "tgia2", "tgib2", "tciv2", "tciu2", "tgia3", "tgib3", "tgic3", "tgid3", - "tgiv3", + "tciv3", "tgia4", "tgib4", "tgic4", "tgid4", - "tgiv4", + "tciv4", "tgiu5", "tgiv5", "tgiw5", "tgia6", "tgib6", "tgic6", "tgid6", - "tgiv6", + "tciv6", "tgia7", "tgib7", "tgic7", "tgid7", - "tgiv7", + "tciv7", "tgia8", "tgib8", "tgic8", "tgid8", - "tgiv8", "tgiu8"; + "tciv8", "tciu8"; clocks = <&cpg CPG_MOD R9A07G044_MTU_X_MCK_MTU3>; power-domains = <&cpg>; resets = <&cpg R9A07G044_MTU_X_PRESET_MTU3>; diff --git a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi index 2eba3a8a100d..1f1d481dc783 100644 --- a/arch/arm64/boot/dts/renesas/r9a07g054.dtsi +++ b/arch/arm64/boot/dts/renesas/r9a07g054.dtsi @@ -223,20 +223,20 @@ <GIC_SPI 212 IRQ_TYPE_EDGE_RISING>, <GIC_SPI 213 IRQ_TYPE_EDGE_RISING>; interrupt-names = "tgia0", "tgib0", "tgic0", "tgid0", - "tgiv0", "tgie0", "tgif0", - "tgia1", "tgib1", "tgiv1", "tgiu1", - "tgia2", "tgib2", "tgiv2", "tgiu2", + "tciv0", "tgie0", "tgif0", + "tgia1", "tgib1", "tciv1", "tciu1", + "tgia2", "tgib2", "tciv2", "tciu2", "tgia3", "tgib3", "tgic3", "tgid3", - "tgiv3", + "tciv3", "tgia4", "tgib4", "tgic4", "tgid4", - "tgiv4", + "tciv4", "tgiu5", "tgiv5", "tgiw5", "tgia6", "tgib6", "tgic6", "tgid6", - "tgiv6", + "tciv6", "tgia7", "tgib7", "tgic7", "tgid7", - "tgiv7", + "tciv7", "tgia8", "tgib8", "tgic8", "tgid8", - "tgiv8", "tgiu8"; + "tciv8", "tciu8"; clocks = <&cpg CPG_MOD R9A07G054_MTU_X_MCK_MTU3>; power-domains = <&cpg>; resets = <&cpg R9A07G054_MTU_X_PRESET_MTU3>; diff --git a/arch/arm64/boot/dts/rockchip/px30.dtsi b/arch/arm64/boot/dts/rockchip/px30.dtsi index 8332c8aaf49b..42ce78beb413 100644 --- a/arch/arm64/boot/dts/rockchip/px30.dtsi +++ b/arch/arm64/boot/dts/rockchip/px30.dtsi @@ -291,14 +291,14 @@ }; power-domain@PX30_PD_MMC_NAND { reg = <PX30_PD_MMC_NAND>; - clocks = <&cru HCLK_NANDC>, - <&cru HCLK_EMMC>, - <&cru HCLK_SDIO>, - <&cru HCLK_SFC>, - <&cru SCLK_EMMC>, - <&cru SCLK_NANDC>, - <&cru SCLK_SDIO>, - <&cru SCLK_SFC>; + clocks = <&cru HCLK_NANDC>, + <&cru HCLK_EMMC>, + <&cru HCLK_SDIO>, + <&cru HCLK_SFC>, + <&cru SCLK_EMMC>, + <&cru SCLK_NANDC>, + <&cru SCLK_SDIO>, + <&cru SCLK_SFC>; pm_qos = <&qos_emmc>, <&qos_nand>, <&qos_sdio>, <&qos_sfc>; #power-domain-cells = <0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts index 7ea48167747c..9232357f4fec 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-roc-cc.dts @@ -106,7 +106,6 @@ regulator-name = "vdd_core"; regulator-min-microvolt = <827000>; regulator-max-microvolt = <1340000>; - regulator-init-microvolt = <1015000>; regulator-settling-time-up-us = <250>; regulator-always-on; regulator-boot-on; diff --git a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts index a71f249ed384..e9810d2f0407 100644 --- a/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts +++ b/arch/arm64/boot/dts/rockchip/rk3308-rock-pi-s.dts @@ -105,7 +105,6 @@ regulator-name = "vdd_core"; regulator-min-microvolt = <827000>; regulator-max-microvolt = <1340000>; - regulator-init-microvolt = <1015000>; regulator-settling-time-up-us = <250>; regulator-always-on; regulator-boot-on; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts b/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts index d1f343345f67..6464ef4d113d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-eaidk-610.dts @@ -773,7 +773,7 @@ compatible = "brcm,bcm4329-fmac"; reg = <1>; interrupt-parent = <&gpio0>; - interrupts = <RK_PA3 GPIO_ACTIVE_HIGH>; + interrupts = <RK_PA3 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "host-wake"; pinctrl-names = "default"; pinctrl-0 = <&wifi_host_wake_l>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi index b6e082f1f6d9..7c5f441a2219 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-nanopi4.dtsi @@ -375,7 +375,6 @@ vcc_sdio: LDO_REG4 { regulator-always-on; regulator-boot-on; - regulator-init-microvolt = <3000000>; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; regulator-name = "vcc_sdio"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-4c-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-4c-plus.dts index 028eb508ae30..8bfd5f88d1ef 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-4c-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-4c-plus.dts @@ -548,9 +548,8 @@ &sdhci { max-frequency = <150000000>; bus-width = <8>; - mmc-hs400-1_8v; + mmc-hs200-1_8v; non-removable; - mmc-hs400-enhanced-strobe; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi index 907071d4fe80..980c4534313a 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4.dtsi @@ -45,7 +45,7 @@ sdio_pwrseq: sdio-pwrseq { compatible = "mmc-pwrseq-simple"; clocks = <&rk808 1>; - clock-names = "ext_clock"; + clock-names = "lpo"; pinctrl-names = "default"; pinctrl-0 = <&wifi_enable_h>; reset-gpios = <&gpio0 RK_PB2 GPIO_ACTIVE_LOW>; @@ -645,9 +645,9 @@ }; &sdhci { + max-frequency = <150000000>; bus-width = <8>; - mmc-hs400-1_8v; - mmc-hs400-enhanced-strobe; + mmc-hs200-1_8v; non-removable; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts index cec3b7b1b947..8a17c1eaae15 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock-pi-4b-plus.dts @@ -31,7 +31,7 @@ compatible = "brcm,bcm4329-fmac"; reg = <1>; interrupt-parent = <&gpio0>; - interrupts = <RK_PA3 GPIO_ACTIVE_HIGH>; + interrupts = <RK_PA3 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "host-wake"; pinctrl-names = "default"; pinctrl-0 = <&wifi_host_wake_l>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi index a2c31d53b45b..8cbf3d9a4f22 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-anbernic-rgxx3.dtsi @@ -356,7 +356,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; regulator-name = "vdd_logic"; @@ -371,7 +370,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; regulator-name = "vdd_gpu"; @@ -533,7 +531,6 @@ regulator-boot-on; regulator-min-microvolt = <712500>; regulator-max-microvolt = <1390000>; - regulator-init-microvolt = <900000>; regulator-name = "vdd_cpu"; regulator-ramp-delay = <2300>; vin-supply = <&vcc_sys>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts index 410cd3e5e7bc..0c18406e4c59 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-box-demo.dts @@ -239,7 +239,7 @@ &gmac1 { assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1>; - assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&gmac1_clkin>; + assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&gmac1_clkin>; phy-mode = "rgmii"; clock_in_out = "input"; pinctrl-names = "default"; @@ -416,7 +416,7 @@ compatible = "brcm,bcm4329-fmac"; reg = <1>; interrupt-parent = <&gpio2>; - interrupts = <RK_PB2 GPIO_ACTIVE_HIGH>; + interrupts = <RK_PB2 IRQ_TYPE_LEVEL_HIGH>; interrupt-names = "host-wake"; pinctrl-names = "default"; pinctrl-0 = <&wifi_host_wake_h>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts index ff936b713579..1c6d83b47cd2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-lubancat-1.dts @@ -218,7 +218,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; @@ -233,7 +232,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; @@ -259,7 +257,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi index 8d61f824c12d..d899087bf0b5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-pinenote.dtsi @@ -264,7 +264,6 @@ regulator-always-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; @@ -278,7 +277,6 @@ regulator-name = "vdd_gpu_npu"; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts index 25a8c781f4e7..854d02b46e6f 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-a.dts @@ -366,7 +366,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; regulator-name = "vdd_logic"; @@ -381,7 +380,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; regulator-name = "vdd_gpu"; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts index b276eb0810c7..2d92713be2a0 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-quartz64-b.dts @@ -277,7 +277,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-state-mem { @@ -292,7 +291,6 @@ regulator-boot-on; regulator-min-microvolt = <900000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-state-mem { diff --git a/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3-io.dts b/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3-io.dts index 5e4236af4fcb..1b1c67d5b1ef 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3-io.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-radxa-cm3-io.dts @@ -137,8 +137,8 @@ &mdio1 { rgmii_phy1: ethernet-phy@0 { - compatible="ethernet-phy-ieee802.3-c22"; - reg= <0x0>; + compatible = "ethernet-phy-ieee802.3-c22"; + reg = <0x0>; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts index 42889c5900bd..938092fce186 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts +++ b/arch/arm64/boot/dts/rockchip/rk3566-roc-pc.dts @@ -278,7 +278,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-state-mem { @@ -291,7 +290,6 @@ regulator-name = "vdd_gpu"; regulator-min-microvolt = <900000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-state-mem { diff --git a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi index 31aa2b8efe39..63bae36b8f7e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3566-soquartz.dtsi @@ -234,7 +234,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; regulator-state-mem { @@ -249,7 +248,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; regulator-state-mem { @@ -272,7 +270,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-name = "vdd_npu"; regulator-state-mem { diff --git a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts index ff0bf24cc1a2..f9127ddfbb7d 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-bpi-r2-pro.dts @@ -308,7 +308,6 @@ regulator-name = "vdd_logic"; regulator-always-on; regulator-boot-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -322,7 +321,6 @@ vdd_gpu: DCDC_REG2 { regulator-name = "vdd_gpu"; regulator-always-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -346,7 +344,6 @@ vdd_npu: DCDC_REG4 { regulator-name = "vdd_npu"; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts b/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts index 674792567fa6..19f8fc369b13 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-evb1-v10.dts @@ -293,7 +293,6 @@ regulator-name = "vdd_logic"; regulator-always-on; regulator-boot-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -307,7 +306,6 @@ vdd_gpu: DCDC_REG2 { regulator-name = "vdd_gpu"; regulator-always-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -331,7 +329,6 @@ vdd_npu: DCDC_REG4 { regulator-name = "vdd_npu"; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-fastrhino-r66s.dtsi b/arch/arm64/boot/dts/rockchip/rk3568-fastrhino-r66s.dtsi index 25e205632a68..89e84e3a9262 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-fastrhino-r66s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568-fastrhino-r66s.dtsi @@ -173,7 +173,6 @@ regulator-name = "vdd_logic"; regulator-always-on; regulator-boot-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -187,7 +186,6 @@ vdd_gpu: DCDC_REG2 { regulator-name = "vdd_gpu"; regulator-always-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -211,7 +209,6 @@ vdd_npu: DCDC_REG4 { regulator-name = "vdd_npu"; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -330,7 +327,6 @@ vcca1v8_image: LDO_REG9 { regulator-name = "vcca1v8_image"; - regulator-init-microvolt = <950000>; regulator-min-microvolt = <950000>; regulator-max-microvolt = <1800000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts index e653b067aa5d..a8a4cc190eb3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-lubancat-2.dts @@ -243,7 +243,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; @@ -258,7 +257,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; @@ -284,7 +282,6 @@ regulator-boot-on; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; - regulator-init-microvolt = <900000>; regulator-ramp-delay = <6001>; regulator-initial-mode = <0x2>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi index 58ba328ea782..93189f830640 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568-nanopi-r5s.dtsi @@ -232,7 +232,6 @@ regulator-name = "vdd_logic"; regulator-always-on; regulator-boot-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -246,7 +245,6 @@ vdd_gpu: DCDC_REG2 { regulator-name = "vdd_gpu"; regulator-always-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -270,7 +268,6 @@ vdd_npu: DCDC_REG4 { regulator-name = "vdd_npu"; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts b/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts index 59ecf868dbd0..a337f547caf5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-odroid-m1.dts @@ -291,7 +291,6 @@ regulator-name = "vdd_logic"; regulator-always-on; regulator-boot-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -305,7 +304,6 @@ vdd_gpu: DCDC_REG2 { regulator-name = "vdd_gpu"; regulator-always-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -329,7 +327,6 @@ vdd_npu: DCDC_REG4 { regulator-name = "vdd_npu"; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-radxa-cm3i.dtsi b/arch/arm64/boot/dts/rockchip/rk3568-radxa-cm3i.dtsi index c50fbdd48680..45b03dcbbad4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-radxa-cm3i.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3568-radxa-cm3i.dtsi @@ -163,7 +163,6 @@ regulator-name = "vdd_logic"; regulator-always-on; regulator-boot-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -177,7 +176,6 @@ vdd_gpu: DCDC_REG2 { regulator-name = "vdd_gpu"; regulator-always-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -201,7 +199,6 @@ vdd_npu: DCDC_REG4 { regulator-name = "vdd_npu"; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts index 917f5b2b8aab..e05ab11981f5 100644 --- a/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts +++ b/arch/arm64/boot/dts/rockchip/rk3568-rock-3a.dts @@ -350,7 +350,6 @@ regulator-name = "vdd_logic"; regulator-always-on; regulator-boot-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -364,7 +363,6 @@ vdd_gpu: DCDC_REG2 { regulator-name = "vdd_gpu"; regulator-always-on; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; @@ -388,7 +386,6 @@ vdd_npu: DCDC_REG4 { regulator-name = "vdd_npu"; - regulator-init-microvolt = <900000>; regulator-initial-mode = <0x2>; regulator-min-microvolt = <500000>; regulator-max-microvolt = <1350000>; diff --git a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts index afda976680bc..51537030f8e3 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588-rock-5b.dts @@ -337,7 +337,6 @@ regulator-boot-on; regulator-min-microvolt = <550000>; regulator-max-microvolt = <950000>; - regulator-init-microvolt = <750000>; regulator-ramp-delay = <12500>; regulator-name = "vdd_vdenc_s0"; diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts index 4d9ed2a02736..1a60a275ddf9 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-indiedroid-nova.dts @@ -125,19 +125,19 @@ cpu-supply = <&vdd_cpu_lit_s0>; }; -&cpu_b0{ +&cpu_b0 { cpu-supply = <&vdd_cpu_big0_s0>; }; -&cpu_b1{ +&cpu_b1 { cpu-supply = <&vdd_cpu_big0_s0>; }; -&cpu_b2{ +&cpu_b2 { cpu-supply = <&vdd_cpu_big1_s0>; }; -&cpu_b3{ +&cpu_b3 { cpu-supply = <&vdd_cpu_big1_s0>; }; diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 4818e204c2ac..fbe64dce66e0 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -81,11 +81,6 @@ aes-arm64-y := aes-cipher-core.o aes-cipher-glue.o obj-$(CONFIG_CRYPTO_AES_ARM64_BS) += aes-neon-bs.o aes-neon-bs-y := aes-neonbs-core.o aes-neonbs-glue.o -CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS - -$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE - $(call if_changed_rule,cc_o_c) - quiet_cmd_perlasm = PERLASM $@ cmd_perlasm = $(PERL) $(<) void $(@) diff --git a/arch/arm64/crypto/aes-glue-ce.c b/arch/arm64/crypto/aes-glue-ce.c new file mode 100644 index 000000000000..7d309ceeddf3 --- /dev/null +++ b/arch/arm64/crypto/aes-glue-ce.c @@ -0,0 +1,2 @@ +#define USE_V8_CRYPTO_EXTENSIONS +#include "aes-glue.c" diff --git a/arch/arm64/crypto/aes-glue-neon.c b/arch/arm64/crypto/aes-glue-neon.c new file mode 100644 index 000000000000..8ba046321064 --- /dev/null +++ b/arch/arm64/crypto/aes-glue-neon.c @@ -0,0 +1 @@ +#include "aes-glue.c" diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index bd68e1b7f29f..4d537d56eb84 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -42,6 +42,9 @@ #define ACPI_MADT_GICC_SPE (offsetof(struct acpi_madt_generic_interrupt, \ spe_interrupt) + sizeof(u16)) +#define ACPI_MADT_GICC_TRBE (offsetof(struct acpi_madt_generic_interrupt, \ + trbe_interrupt) + sizeof(u16)) + /* Basic configuration for ACPI */ #ifdef CONFIG_ACPI pgprot_t __acpi_get_mem_attribute(phys_addr_t addr); diff --git a/arch/arm64/include/asm/efi.h b/arch/arm64/include/asm/efi.h index 4cf2cb053bc8..f482b994c608 100644 --- a/arch/arm64/include/asm/efi.h +++ b/arch/arm64/include/asm/efi.h @@ -30,28 +30,16 @@ int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md, bool has_bti); -#define arch_efi_call_virt_setup() \ -({ \ - efi_virtmap_load(); \ - __efi_fpsimd_begin(); \ - raw_spin_lock(&efi_rt_lock); \ -}) - #undef arch_efi_call_virt #define arch_efi_call_virt(p, f, args...) \ __efi_rt_asm_wrapper((p)->f, #f, args) -#define arch_efi_call_virt_teardown() \ -({ \ - raw_spin_unlock(&efi_rt_lock); \ - __efi_fpsimd_end(); \ - efi_virtmap_unload(); \ -}) - -extern raw_spinlock_t efi_rt_lock; extern u64 *efi_rt_stack_top; efi_status_t __efi_rt_asm_wrapper(void *, const char *, ...); +void arch_efi_call_virt_setup(void); +void arch_efi_call_virt_teardown(void); + /* * efi_rt_stack_top[-1] contains the value the stack pointer had before * switching to the EFI runtime stack. diff --git a/arch/arm64/include/asm/el2_setup.h b/arch/arm64/include/asm/el2_setup.h index 8e5ffb58f83e..b7afaa026842 100644 --- a/arch/arm64/include/asm/el2_setup.h +++ b/arch/arm64/include/asm/el2_setup.h @@ -31,6 +31,13 @@ .Lskip_hcrx_\@: .endm +/* Check if running in host at EL2 mode, i.e., (h)VHE. Jump to fail if not. */ +.macro __check_hvhe fail, tmp + mrs \tmp, hcr_el2 + and \tmp, \tmp, #HCR_E2H + cbz \tmp, \fail +.endm + /* * Allow Non-secure EL1 and EL0 to access physical timer and counter. * This is not necessary for VHE, since the host kernel runs in EL2, @@ -43,9 +50,7 @@ */ .macro __init_el2_timers mov x0, #3 // Enable EL1 physical timers - mrs x1, hcr_el2 - and x1, x1, #HCR_E2H - cbz x1, .LnVHE_\@ + __check_hvhe .LnVHE_\@, x1 lsl x0, x0, #10 .LnVHE_\@: msr cnthctl_el2, x0 @@ -139,15 +144,14 @@ /* Coprocessor traps */ .macro __init_el2_cptr - mrs x1, hcr_el2 - and x1, x1, #HCR_E2H - cbz x1, .LnVHE_\@ + __check_hvhe .LnVHE_\@, x1 mov x0, #(CPACR_EL1_FPEN_EL1EN | CPACR_EL1_FPEN_EL0EN) - b .Lset_cptr_\@ + msr cpacr_el1, x0 + b .Lskip_set_cptr_\@ .LnVHE_\@: mov x0, #0x33ff -.Lset_cptr_\@: msr cptr_el2, x0 // Disable copro. traps to EL2 +.Lskip_set_cptr_\@: .endm /* Disable any fine grained traps */ @@ -268,19 +272,19 @@ check_override id_aa64pfr0, ID_AA64PFR0_EL1_SVE_SHIFT, .Linit_sve_\@, .Lskip_sve_\@, x1, x2 .Linit_sve_\@: /* SVE register access */ - mrs x0, cptr_el2 // Disable SVE traps - mrs x1, hcr_el2 - and x1, x1, #HCR_E2H - cbz x1, .Lcptr_nvhe_\@ + __check_hvhe .Lcptr_nvhe_\@, x1 - // VHE case + // (h)VHE case + mrs x0, cpacr_el1 // Disable SVE traps orr x0, x0, #(CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) - b .Lset_cptr_\@ + msr cpacr_el1, x0 + b .Lskip_set_cptr_\@ .Lcptr_nvhe_\@: // nVHE case + mrs x0, cptr_el2 // Disable SVE traps bic x0, x0, #CPTR_EL2_TZ -.Lset_cptr_\@: msr cptr_el2, x0 +.Lskip_set_cptr_\@: isb mov x1, #ZCR_ELx_LEN_MASK // SVE: Enable full vector msr_s SYS_ZCR_EL2, x1 // length for EL1. @@ -289,9 +293,19 @@ check_override id_aa64pfr1, ID_AA64PFR1_EL1_SME_SHIFT, .Linit_sme_\@, .Lskip_sme_\@, x1, x2 .Linit_sme_\@: /* SME register access and priority mapping */ + __check_hvhe .Lcptr_nvhe_sme_\@, x1 + + // (h)VHE case + mrs x0, cpacr_el1 // Disable SME traps + orr x0, x0, #(CPACR_EL1_SMEN_EL0EN | CPACR_EL1_SMEN_EL1EN) + msr cpacr_el1, x0 + b .Lskip_set_cptr_sme_\@ + +.Lcptr_nvhe_sme_\@: // nVHE case mrs x0, cptr_el2 // Disable SME traps bic x0, x0, #CPTR_EL2_TSM msr cptr_el2, x0 +.Lskip_set_cptr_sme_\@: isb mrs x1, sctlr_el2 diff --git a/arch/arm64/include/asm/fpsimd.h b/arch/arm64/include/asm/fpsimd.h index 67f2fb781f59..8df46f186c64 100644 --- a/arch/arm64/include/asm/fpsimd.h +++ b/arch/arm64/include/asm/fpsimd.h @@ -356,7 +356,7 @@ static inline int sme_max_virtualisable_vl(void) return vec_max_virtualisable_vl(ARM64_VEC_SME); } -extern void sme_alloc(struct task_struct *task); +extern void sme_alloc(struct task_struct *task, bool flush); extern unsigned int sme_get_vl(void); extern int sme_set_current_vl(unsigned long arg); extern int sme_get_current_vl(void); @@ -388,7 +388,7 @@ static inline void sme_smstart_sm(void) { } static inline void sme_smstop_sm(void) { } static inline void sme_smstop(void) { } -static inline void sme_alloc(struct task_struct *task) { } +static inline void sme_alloc(struct task_struct *task, bool flush) { } static inline void sme_setup(void) { } static inline unsigned int sme_get_vl(void) { return 0; } static inline int sme_max_vl(void) { return 0; } diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index 692b1ec663b2..521267478d18 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -138,6 +138,7 @@ #define KERNEL_HWCAP_SME_B16B16 __khwcap2_feature(SME_B16B16) #define KERNEL_HWCAP_SME_F16F16 __khwcap2_feature(SME_F16F16) #define KERNEL_HWCAP_MOPS __khwcap2_feature(MOPS) +#define KERNEL_HWCAP_HBC __khwcap2_feature(HBC) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 139a88e4e852..db1aeacd4cd9 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -186,6 +186,8 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_LOAD_ACQ_EX, AARCH64_INSN_LDST_STORE_EX, AARCH64_INSN_LDST_STORE_REL_EX, + AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET, + AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET, }; enum aarch64_insn_adsb_type { @@ -324,6 +326,7 @@ __AARCH64_INSN_FUNCS(prfm, 0x3FC00000, 0x39800000) __AARCH64_INSN_FUNCS(prfm_lit, 0xFF000000, 0xD8000000) __AARCH64_INSN_FUNCS(store_imm, 0x3FC00000, 0x39000000) __AARCH64_INSN_FUNCS(load_imm, 0x3FC00000, 0x39400000) +__AARCH64_INSN_FUNCS(signed_load_imm, 0X3FC00000, 0x39800000) __AARCH64_INSN_FUNCS(store_pre, 0x3FE00C00, 0x38000C00) __AARCH64_INSN_FUNCS(load_pre, 0x3FE00C00, 0x38400C00) __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400) @@ -337,6 +340,7 @@ __AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000) __AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000) __AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) +__AARCH64_INSN_FUNCS(signed_ldr_reg, 0X3FE0FC00, 0x38A0E800) __AARCH64_INSN_FUNCS(ldr_imm, 0x3FC00000, 0x39400000) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) diff --git a/arch/arm64/include/asm/kernel-pgtable.h b/arch/arm64/include/asm/kernel-pgtable.h index 577773870b66..85d26143faa5 100644 --- a/arch/arm64/include/asm/kernel-pgtable.h +++ b/arch/arm64/include/asm/kernel-pgtable.h @@ -118,31 +118,4 @@ #define SWAPPER_RX_MMUFLAGS (SWAPPER_RW_MMUFLAGS | PTE_RDONLY) #endif -/* - * To make optimal use of block mappings when laying out the linear - * mapping, round down the base of physical memory to a size that can - * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE - * (64k granule), or a multiple that can be mapped using contiguous bits - * in the page tables: 32 * PMD_SIZE (16k granule) - */ -#if defined(CONFIG_ARM64_4K_PAGES) -#define ARM64_MEMSTART_SHIFT PUD_SHIFT -#elif defined(CONFIG_ARM64_16K_PAGES) -#define ARM64_MEMSTART_SHIFT CONT_PMD_SHIFT -#else -#define ARM64_MEMSTART_SHIFT PMD_SHIFT -#endif - -/* - * sparsemem vmemmap imposes an additional requirement on the alignment of - * memstart_addr, due to the fact that the base of the vmemmap region - * has a direct correspondence, and needs to appear sufficiently aligned - * in the virtual address space. - */ -#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS -#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS) -#else -#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT) -#endif - #endif /* __ASM_KERNEL_PGTABLE_H */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 7d170aaa2db4..24e28bb2d95b 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -278,7 +278,7 @@ asmlinkage void __noreturn hyp_panic_bad_stack(void); asmlinkage void kvm_unexpected_el2_exception(void); struct kvm_cpu_context; void handle_trap(struct kvm_cpu_context *host_ctxt); -asmlinkage void __noreturn kvm_host_psci_cpu_entry(bool is_cpu_on); +asmlinkage void __noreturn __kvm_host_psci_cpu_entry(bool is_cpu_on); void __noreturn __pkvm_init_finalise(void); void kvm_nvhe_prepare_backtrace(unsigned long fp, unsigned long pc); void kvm_patch_vector_branch(struct alt_instr *alt, diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index efc0b45d79c3..3d6725ff0bf6 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -571,6 +571,14 @@ static inline bool vcpu_has_feature(struct kvm_vcpu *vcpu, int feature) return test_bit(feature, vcpu->arch.features); } +static __always_inline void kvm_write_cptr_el2(u64 val) +{ + if (has_vhe() || has_hvhe()) + write_sysreg(val, cpacr_el1); + else + write_sysreg(val, cptr_el2); +} + static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) { u64 val; @@ -578,8 +586,16 @@ static __always_inline u64 kvm_get_reset_cptr_el2(struct kvm_vcpu *vcpu) if (has_vhe()) { val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN | CPACR_EL1_ZEN_EL1EN); + if (cpus_have_final_cap(ARM64_SME)) + val |= CPACR_EL1_SMEN_EL1EN; } else if (has_hvhe()) { val = (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); + + if (!vcpu_has_sve(vcpu) || + (vcpu->arch.fp_state != FP_STATE_GUEST_OWNED)) + val |= CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN; + if (cpus_have_final_cap(ARM64_SME)) + val |= CPACR_EL1_SMEN_EL1EN | CPACR_EL1_SMEN_EL0EN; } else { val = CPTR_NVHE_EL2_RES1; @@ -597,9 +613,6 @@ static __always_inline void kvm_reset_cptr_el2(struct kvm_vcpu *vcpu) { u64 val = kvm_get_reset_cptr_el2(vcpu); - if (has_vhe() || has_hvhe()) - write_sysreg(val, cpacr_el1); - else - write_sysreg(val, cptr_el2); + kvm_write_cptr_el2(val); } #endif /* __ARM64_KVM_EMULATE_H__ */ diff --git a/arch/arm64/include/asm/mmu.h b/arch/arm64/include/asm/mmu.h index 4384eaa0aeb7..94b68850cb9f 100644 --- a/arch/arm64/include/asm/mmu.h +++ b/arch/arm64/include/asm/mmu.h @@ -64,7 +64,6 @@ extern void arm64_memblock_init(void); extern void paging_init(void); extern void bootmem_init(void); extern void __iomem *early_io_map(phys_addr_t phys, unsigned long virt); -extern void init_mem_pgprot(void); extern void create_mapping_noalloc(phys_addr_t phys, unsigned long virt, phys_addr_t size, pgprot_t prot); extern void create_pgd_mapping(struct mm_struct *mm, phys_addr_t phys, diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 76bba654b5d7..432932ad087f 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -103,6 +103,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) #define pte_young(pte) (!!(pte_val(pte) & PTE_AF)) #define pte_special(pte) (!!(pte_val(pte) & PTE_SPECIAL)) #define pte_write(pte) (!!(pte_val(pte) & PTE_WRITE)) +#define pte_rdonly(pte) (!!(pte_val(pte) & PTE_RDONLY)) #define pte_user(pte) (!!(pte_val(pte) & PTE_USER)) #define pte_user_exec(pte) (!(pte_val(pte) & PTE_UXN)) #define pte_cont(pte) (!!(pte_val(pte) & PTE_CONT)) @@ -120,7 +121,7 @@ static inline pteval_t __phys_to_pte_val(phys_addr_t phys) (__boundary - 1 < (end) - 1) ? __boundary : (end); \ }) -#define pte_hw_dirty(pte) (pte_write(pte) && !(pte_val(pte) & PTE_RDONLY)) +#define pte_hw_dirty(pte) (pte_write(pte) && !pte_rdonly(pte)) #define pte_sw_dirty(pte) (!!(pte_val(pte) & PTE_DIRTY)) #define pte_dirty(pte) (pte_sw_dirty(pte) || pte_hw_dirty(pte)) @@ -212,7 +213,7 @@ static inline pte_t pte_wrprotect(pte_t pte) * clear), set the PTE_DIRTY bit. */ if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); pte = clear_pte_bit(pte, __pgprot(PTE_WRITE)); pte = set_pte_bit(pte, __pgprot(PTE_RDONLY)); @@ -822,7 +823,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) PTE_ATTRINDX_MASK; /* preserve the hardware dirty information */ if (pte_hw_dirty(pte)) - pte = pte_mkdirty(pte); + pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); + pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); return pte; } diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 3918f2a67970..e5bc54522e71 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -359,14 +359,6 @@ static inline void prefetchw(const void *ptr) asm volatile("prfm pstl1keep, %a0\n" : : "p" (ptr)); } -#define ARCH_HAS_SPINLOCK_PREFETCH -static inline void spin_lock_prefetch(const void *ptr) -{ - asm volatile(ARM64_LSE_ATOMIC_INSN( - "prfm pstl1strm, %a0", - "nop") : : "p" (ptr)); -} - extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ extern void __init minsigstksz_setup(void); diff --git a/arch/arm64/include/asm/sdei.h b/arch/arm64/include/asm/sdei.h index 4292d9bafb9d..484cb6972e99 100644 --- a/arch/arm64/include/asm/sdei.h +++ b/arch/arm64/include/asm/sdei.h @@ -17,6 +17,9 @@ #include <asm/virt.h> +DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event); +DECLARE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event); + extern unsigned long sdei_exit_mode; /* Software Delegated Exception entry point from firmware*/ @@ -29,6 +32,9 @@ asmlinkage void __sdei_asm_entry_trampoline(unsigned long event_num, unsigned long pc, unsigned long pstate); +/* Abort a running handler. Context is discarded. */ +void __sdei_handler_abort(void); + /* * The above entry point does the minimum to call C code. This function does * anything else, before calling the driver. diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b481935e9314..16464bf9a8aa 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -803,15 +803,21 @@ /* * For registers without architectural names, or simply unsupported by * GAS. + * + * __check_r forces warnings to be generated by the compiler when + * evaluating r which wouldn't normally happen due to being passed to + * the assembler via __stringify(r). */ #define read_sysreg_s(r) ({ \ u64 __val; \ + u32 __maybe_unused __check_r = (u32)(r); \ asm volatile(__mrs_s("%0", r) : "=r" (__val)); \ __val; \ }) #define write_sysreg_s(v, r) do { \ u64 __val = (u64)(v); \ + u32 __maybe_unused __check_r = (u32)(r); \ asm volatile(__msr_s(r, "%x0") : : "rZ" (__val)); \ } while (0) diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 64a514f90131..bd77253b62e0 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 452 +#define __NR_compat_syscalls 453 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index d952a28463e0..78b68311ec81 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -909,6 +909,8 @@ __SYSCALL(__NR_futex_waitv, sys_futex_waitv) __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) #define __NR_cachestat 451 __SYSCALL(__NR_cachestat, sys_cachestat) +#define __NR_fchmodat2 452 +__SYSCALL(__NR_fchmodat2, sys_fchmodat2) /* * Please add new compat syscalls above this comment and update diff --git a/arch/arm64/include/uapi/asm/bitsperlong.h b/arch/arm64/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000000..485d60bee26c --- /dev/null +++ b/arch/arm64/include/uapi/asm/bitsperlong.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2012 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#ifndef __ASM_BITSPERLONG_H +#define __ASM_BITSPERLONG_H + +#define __BITS_PER_LONG 64 + +#include <asm-generic/bitsperlong.h> + +#endif /* __ASM_BITSPERLONG_H */ diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index a2cac4305b1e..53026f45a509 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -103,5 +103,6 @@ #define HWCAP2_SME_B16B16 (1UL << 41) #define HWCAP2_SME_F16F16 (1UL << 42) #define HWCAP2_MOPS (1UL << 43) +#define HWCAP2_HBC (1UL << 44) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index f9d456fe132d..a5f533f63b60 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -222,7 +222,7 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_CSSC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_RPRFM_SHIFT, 4, 0), - ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_EL1_BC_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_EL1_MOPS_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_EL1_APA3_SHIFT, 4, 0), @@ -2708,12 +2708,8 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .desc = "Enhanced Virtualization Traps", .capability = ARM64_HAS_EVT, .type = ARM64_CPUCAP_SYSTEM_FEATURE, - .sys_reg = SYS_ID_AA64MMFR2_EL1, - .sign = FTR_UNSIGNED, - .field_pos = ID_AA64MMFR2_EL1_EVT_SHIFT, - .field_width = 4, - .min_field_value = ID_AA64MMFR2_EL1_EVT_IMP, .matches = has_cpuid_feature, + ARM64_CPUID_FIELDS(ID_AA64MMFR2_EL1, EVT, IMP) }, {}, }; @@ -2844,6 +2840,7 @@ static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { HWCAP_CAP(ID_AA64ISAR2_EL1, RPRES, IMP, CAP_HWCAP, KERNEL_HWCAP_RPRES), HWCAP_CAP(ID_AA64ISAR2_EL1, WFxT, IMP, CAP_HWCAP, KERNEL_HWCAP_WFXT), HWCAP_CAP(ID_AA64ISAR2_EL1, MOPS, IMP, CAP_HWCAP, KERNEL_HWCAP_MOPS), + HWCAP_CAP(ID_AA64ISAR2_EL1, BC, IMP, CAP_HWCAP, KERNEL_HWCAP_HBC), #ifdef CONFIG_ARM64_SME HWCAP_CAP(ID_AA64PFR1_EL1, SME, IMP, CAP_HWCAP, KERNEL_HWCAP_SME), HWCAP_CAP(ID_AA64SMFR0_EL1, FA64, IMP, CAP_HWCAP, KERNEL_HWCAP_SME_FA64), diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index d1f68599c29f..f372295207fb 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -9,8 +9,6 @@ #include <linux/acpi.h> #include <linux/cpuidle.h> #include <linux/cpu_pm.h> -#include <linux/of.h> -#include <linux/of_device.h> #include <linux/psci.h> #ifdef CONFIG_ACPI_PROCESSOR_IDLE diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 58622dc85917..98fda8500535 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -126,6 +126,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_SME_B16B16] = "smeb16b16", [KERNEL_HWCAP_SME_F16F16] = "smef16f16", [KERNEL_HWCAP_MOPS] = "mops", + [KERNEL_HWCAP_HBC] = "hbc", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c index baab8dd3ead3..49efbdbd6f7a 100644 --- a/arch/arm64/kernel/efi.c +++ b/arch/arm64/kernel/efi.c @@ -158,7 +158,21 @@ asmlinkage efi_status_t efi_handle_corrupted_x18(efi_status_t s, const char *f) return s; } -DEFINE_RAW_SPINLOCK(efi_rt_lock); +static DEFINE_RAW_SPINLOCK(efi_rt_lock); + +void arch_efi_call_virt_setup(void) +{ + efi_virtmap_load(); + __efi_fpsimd_begin(); + raw_spin_lock(&efi_rt_lock); +} + +void arch_efi_call_virt_teardown(void) +{ + raw_spin_unlock(&efi_rt_lock); + __efi_fpsimd_end(); + efi_virtmap_unload(); +} asmlinkage u64 *efi_rt_stack_top __ro_after_init; diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 6b2e0c367702..0fc94207e69a 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -355,6 +355,35 @@ static bool cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) } #endif /* CONFIG_ARM64_ERRATUM_1463225 */ +/* + * As per the ABI exit SME streaming mode and clear the SVE state not + * shared with FPSIMD on syscall entry. + */ +static inline void fp_user_discard(void) +{ + /* + * If SME is active then exit streaming mode. If ZA is active + * then flush the SVE registers but leave userspace access to + * both SVE and SME enabled, otherwise disable SME for the + * task and fall through to disabling SVE too. This means + * that after a syscall we never have any streaming mode + * register state to track, if this changes the KVM code will + * need updating. + */ + if (system_supports_sme()) + sme_smstop_sm(); + + if (!system_supports_sve()) + return; + + if (test_thread_flag(TIF_SVE)) { + unsigned int sve_vq_minus_one; + + sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1; + sve_flush_live(true, sve_vq_minus_one); + } +} + UNHANDLED(el1t, 64, sync) UNHANDLED(el1t, 64, irq) UNHANDLED(el1t, 64, fiq) @@ -644,6 +673,8 @@ static void noinstr el0_svc(struct pt_regs *regs) { enter_from_user_mode(regs); cortex_a76_erratum_1463225_svc_handler(); + fp_user_discard(); + local_daif_restore(DAIF_PROCCTX); do_el0_svc(regs); exit_to_user_mode(regs); } @@ -783,6 +814,7 @@ static void noinstr el0_svc_compat(struct pt_regs *regs) { enter_from_user_mode(regs); cortex_a76_erratum_1463225_svc_handler(); + local_daif_restore(DAIF_PROCCTX); do_el0_svc_compat(regs); exit_to_user_mode(regs); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index a40e5e50fa55..6ad61de03d0a 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -986,9 +986,13 @@ SYM_CODE_START(__sdei_asm_handler) mov x19, x1 -#if defined(CONFIG_VMAP_STACK) || defined(CONFIG_SHADOW_CALL_STACK) + /* Store the registered-event for crash_smp_send_stop() */ ldrb w4, [x19, #SDEI_EVENT_PRIORITY] -#endif + cbnz w4, 1f + adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6 + b 2f +1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 +2: str x19, [x5] #ifdef CONFIG_VMAP_STACK /* @@ -1055,6 +1059,14 @@ SYM_CODE_START(__sdei_asm_handler) ldr_l x2, sdei_exit_mode + /* Clear the registered-event seen by crash_smp_send_stop() */ + ldrb w3, [x4, #SDEI_EVENT_PRIORITY] + cbnz w3, 1f + adr_this_cpu dst=x5, sym=sdei_active_normal_event, tmp=x6 + b 2f +1: adr_this_cpu dst=x5, sym=sdei_active_critical_event, tmp=x6 +2: str xzr, [x5] + alternative_if_not ARM64_UNMAP_KERNEL_AT_EL0 sdei_handler_exit exit_mode=x2 alternative_else_nop_endif @@ -1065,4 +1077,15 @@ alternative_else_nop_endif #endif SYM_CODE_END(__sdei_asm_handler) NOKPROBE(__sdei_asm_handler) + +SYM_CODE_START(__sdei_handler_abort) + mov_q x0, SDEI_1_0_FN_SDEI_EVENT_COMPLETE_AND_RESUME + adr x1, 1f + ldr_l x2, sdei_exit_mode + sdei_handler_exit exit_mode=x2 + // exit the handler and jump to the next instruction. + // Exit will stomp x0-x17, PSTATE, ELR_ELx, and SPSR_ELx. +1: ret +SYM_CODE_END(__sdei_handler_abort) +NOKPROBE(__sdei_handler_abort) #endif /* CONFIG_ARM_SDE_INTERFACE */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 520b681a07bb..91e44ac7150f 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -679,7 +679,7 @@ static void fpsimd_to_sve(struct task_struct *task) void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; - if (!system_supports_sve()) + if (!system_supports_sve() && !system_supports_sme()) return; vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); @@ -705,7 +705,7 @@ static void sve_to_fpsimd(struct task_struct *task) unsigned int i; __uint128_t const *p; - if (!system_supports_sve()) + if (!system_supports_sve() && !system_supports_sme()) return; vl = thread_get_cur_vl(&task->thread); @@ -835,7 +835,8 @@ void sve_sync_from_fpsimd_zeropad(struct task_struct *task) void *sst = task->thread.sve_state; struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state; - if (!test_tsk_thread_flag(task, TIF_SVE)) + if (!test_tsk_thread_flag(task, TIF_SVE) && + !thread_sm_enabled(&task->thread)) return; vq = sve_vq_from_vl(thread_get_cur_vl(&task->thread)); @@ -909,7 +910,7 @@ int vec_set_vector_length(struct task_struct *task, enum vec_type type, */ task->thread.svcr &= ~(SVCR_SM_MASK | SVCR_ZA_MASK); - clear_thread_flag(TIF_SME); + clear_tsk_thread_flag(task, TIF_SME); free_sme = true; } } @@ -1178,9 +1179,6 @@ void sve_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) */ u64 read_zcr_features(void) { - u64 zcr; - unsigned int vq_max; - /* * Set the maximum possible VL, and write zeroes to all other * bits to see if they stick. @@ -1188,12 +1186,8 @@ u64 read_zcr_features(void) sve_kernel_enable(NULL); write_sysreg_s(ZCR_ELx_LEN_MASK, SYS_ZCR_EL1); - zcr = read_sysreg_s(SYS_ZCR_EL1); - zcr &= ~(u64)ZCR_ELx_LEN_MASK; /* find sticky 1s outside LEN field */ - vq_max = sve_vq_from_vl(sve_get_vl()); - zcr |= vq_max - 1; /* set LEN field to maximum effective value */ - - return zcr; + /* Return LEN value that would be written to get the maximum VL */ + return sve_vq_from_vl(sve_get_vl()) - 1; } void __init sve_setup(void) @@ -1284,9 +1278,9 @@ void fpsimd_release_task(struct task_struct *dead_task) * the interest of testability and predictability, the architecture * guarantees that when ZA is enabled it will be zeroed. */ -void sme_alloc(struct task_struct *task) +void sme_alloc(struct task_struct *task, bool flush) { - if (task->thread.sme_state) { + if (task->thread.sme_state && flush) { memset(task->thread.sme_state, 0, sme_state_size(task)); return; } @@ -1348,9 +1342,6 @@ void fa64_kernel_enable(const struct arm64_cpu_capabilities *__always_unused p) */ u64 read_smcr_features(void) { - u64 smcr; - unsigned int vq_max; - sme_kernel_enable(NULL); /* @@ -1359,12 +1350,8 @@ u64 read_smcr_features(void) write_sysreg_s(read_sysreg_s(SYS_SMCR_EL1) | SMCR_ELx_LEN_MASK, SYS_SMCR_EL1); - smcr = read_sysreg_s(SYS_SMCR_EL1); - smcr &= ~(u64)SMCR_ELx_LEN_MASK; /* Only the LEN field */ - vq_max = sve_vq_from_vl(sme_get_vl()); - smcr |= vq_max - 1; /* set LEN field to maximum effective value */ - - return smcr; + /* Return LEN value that would be written to get the maximum VL */ + return sve_vq_from_vl(sme_get_vl()) - 1; } void __init sme_setup(void) @@ -1514,7 +1501,7 @@ void do_sme_acc(unsigned long esr, struct pt_regs *regs) } sve_alloc(current, false); - sme_alloc(current); + sme_alloc(current, true); if (!current->thread.sve_state || !current->thread.sme_state) { force_sig(SIGKILL); return; diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 757a0de07f91..7b236994f0e1 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -113,7 +113,7 @@ SYM_CODE_START(primary_entry) */ #if VA_BITS > 48 mrs_s x0, SYS_ID_AA64MMFR2_EL1 - tst x0, #0xf << ID_AA64MMFR2_EL1_VARange_SHIFT + tst x0, ID_AA64MMFR2_EL1_VARange_MASK mov x0, #VA_BITS mov x25, #VA_BITS_MIN csel x25, x25, x0, eq @@ -756,7 +756,7 @@ SYM_FUNC_START(__cpu_secondary_check52bitva) b.ne 2f mrs_s x0, SYS_ID_AA64MMFR2_EL1 - and x0, x0, #(0xf << ID_AA64MMFR2_EL1_VARange_SHIFT) + and x0, x0, ID_AA64MMFR2_EL1_VARange_MASK cbnz x0, 2f update_early_cpu_boot_status \ diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index db2a1861bb97..35225632d70a 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -654,7 +654,7 @@ static int breakpoint_handler(unsigned long unused, unsigned long esr, perf_bp_event(bp, regs); /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(bp)) + if (uses_default_overflow_handler(bp)) step = 1; unlock: rcu_read_unlock(); @@ -733,7 +733,7 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, static int watchpoint_report(struct perf_event *wp, unsigned long addr, struct pt_regs *regs) { - int step = is_default_overflow_handler(wp); + int step = uses_default_overflow_handler(wp); struct arch_hw_breakpoint *info = counter_arch_bp(wp); info->trigger = addr; diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index 2fe2491b692c..aee12c75b738 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -262,9 +262,9 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases) if (!len) return; - len = min(len, ARRAY_SIZE(buf) - 1); - strncpy(buf, cmdline, len); - buf[len] = 0; + len = strscpy(buf, cmdline, ARRAY_SIZE(buf)); + if (len == -E2BIG) + len = ARRAY_SIZE(buf) - 1; if (strcmp(buf, "--") == 0) return; diff --git a/arch/arm64/kernel/pci.c b/arch/arm64/kernel/pci.c index 2276689b5411..f872c57e9909 100644 --- a/arch/arm64/kernel/pci.c +++ b/arch/arm64/kernel/pci.c @@ -11,8 +11,6 @@ #include <linux/io.h> #include <linux/kernel.h> #include <linux/mm.h> -#include <linux/of_pci.h> -#include <linux/of_platform.h> #include <linux/pci.h> #include <linux/pci-acpi.h> #include <linux/pci-ecam.h> diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index d7f4f0d1ae12..20d7ef82de90 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -881,10 +881,18 @@ static int sve_set_common(struct task_struct *target, break; case ARM64_VEC_SME: target->thread.svcr |= SVCR_SM_MASK; + + /* + * Disable traps and ensure there is SME storage but + * preserve any currently set values in ZA/ZT. + */ + sme_alloc(target, false); + set_tsk_thread_flag(target, TIF_SME); break; default: WARN_ON_ONCE(1); - return -EINVAL; + ret = -EINVAL; + goto out; } /* @@ -932,11 +940,13 @@ static int sve_set_common(struct task_struct *target, /* * Ensure target->thread.sve_state is up to date with target's * FPSIMD regs, so that a short copyin leaves trailing - * registers unmodified. Always enable SVE even if going into - * streaming mode. + * registers unmodified. Only enable SVE if we are + * configuring normal SVE, a system with streaming SVE may not + * have normal SVE. */ fpsimd_sync_to_sve(target); - set_tsk_thread_flag(target, TIF_SVE); + if (type == ARM64_VEC_SVE) + set_tsk_thread_flag(target, TIF_SVE); target->thread.fp_type = FP_STATE_SVE; BUILD_BUG_ON(SVE_PT_SVE_OFFSET != sizeof(header)); @@ -1098,7 +1108,7 @@ static int za_set(struct task_struct *target, } /* Allocate/reinit ZA storage */ - sme_alloc(target); + sme_alloc(target, true); if (!target->thread.sme_state) { ret = -ENOMEM; goto out; @@ -1168,8 +1178,13 @@ static int zt_set(struct task_struct *target, if (!system_supports_sme2()) return -EINVAL; + /* Ensure SVE storage in case this is first use of SME */ + sve_alloc(target, false); + if (!target->thread.sve_state) + return -ENOMEM; + if (!thread_za_enabled(&target->thread)) { - sme_alloc(target); + sme_alloc(target, true); if (!target->thread.sme_state) return -ENOMEM; } @@ -1177,8 +1192,12 @@ static int zt_set(struct task_struct *target, ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, thread_zt_state(&target->thread), 0, ZT_SIG_REG_BYTES); - if (ret == 0) + if (ret == 0) { target->thread.svcr |= SVCR_ZA_MASK; + set_tsk_thread_flag(target, TIF_SME); + } + + fpsimd_flush_task_state(target); return ret; } diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index 830be01af32d..255d12f881c2 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -47,6 +47,9 @@ DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_normal_ptr); DEFINE_PER_CPU(unsigned long *, sdei_shadow_call_stack_critical_ptr); #endif +DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_normal_event); +DEFINE_PER_CPU(struct sdei_registered_event *, sdei_active_critical_event); + static void _free_sdei_stack(unsigned long * __percpu *ptr, int cpu) { unsigned long *p; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e304f7ebec2a..c7ebe744c64e 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -475,7 +475,7 @@ static int restore_za_context(struct user_ctxs *user) fpsimd_flush_task_state(current); /* From now, fpsimd_thread_switch() won't touch thread.sve_state */ - sme_alloc(current); + sme_alloc(current, true); if (!current->thread.sme_state) { current->thread.svcr &= ~SVCR_ZA_MASK; clear_thread_flag(TIF_SME); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index edd63894d61e..960b98b43506 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -1044,10 +1044,8 @@ void crash_smp_send_stop(void) * If this cpu is the only one alive at this point in time, online or * not, there are no stop messages to be sent around, so just back out. */ - if (num_other_online_cpus() == 0) { - sdei_mask_local_cpu(); - return; - } + if (num_other_online_cpus() == 0) + goto skip_ipi; cpumask_copy(&mask, cpu_online_mask); cpumask_clear_cpu(smp_processor_id(), &mask); @@ -1066,7 +1064,9 @@ void crash_smp_send_stop(void) pr_warn("SMP: failed to stop secondary CPUs %*pbl\n", cpumask_pr_args(&mask)); +skip_ipi: sdei_mask_local_cpu(); + sdei_handler_abort(); } bool smp_crash_stop_failed(void) diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index b1ae2f2eaf77..9a70d9746b66 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -8,7 +8,6 @@ #include <linux/randomize_kstack.h> #include <linux/syscalls.h> -#include <asm/daifflags.h> #include <asm/debug-monitors.h> #include <asm/exception.h> #include <asm/fpsimd.h> @@ -101,8 +100,6 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr, * (Similarly for HVC and SMC elsewhere.) */ - local_daif_restore(DAIF_PROCCTX); - if (flags & _TIF_MTE_ASYNC_FAULT) { /* * Process the asynchronous tag check fault before the actual @@ -153,38 +150,8 @@ trace_exit: syscall_trace_exit(regs); } -/* - * As per the ABI exit SME streaming mode and clear the SVE state not - * shared with FPSIMD on syscall entry. - */ -static inline void fp_user_discard(void) -{ - /* - * If SME is active then exit streaming mode. If ZA is active - * then flush the SVE registers but leave userspace access to - * both SVE and SME enabled, otherwise disable SME for the - * task and fall through to disabling SVE too. This means - * that after a syscall we never have any streaming mode - * register state to track, if this changes the KVM code will - * need updating. - */ - if (system_supports_sme()) - sme_smstop_sm(); - - if (!system_supports_sve()) - return; - - if (test_thread_flag(TIF_SVE)) { - unsigned int sve_vq_minus_one; - - sve_vq_minus_one = sve_vq_from_vl(task_get_sve_vl(current)) - 1; - sve_flush_live(true, sve_vq_minus_one); - } -} - void do_el0_svc(struct pt_regs *regs) { - fp_user_discard(); el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); } diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 6028f1fe2d1c..45354f2ddf70 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -50,9 +50,7 @@ SECTIONS . = ALIGN(4); .altinstructions : { - __alt_instructions = .; *(.altinstructions) - __alt_instructions_end = .; } .dynamic : { *(.dynamic) } :text :dynamic diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 72dc53a75d1c..d1cb298a58a0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -55,7 +55,7 @@ DECLARE_KVM_NVHE_PER_CPU(struct kvm_cpu_context, kvm_hyp_ctxt); static bool vgic_present, kvm_arm_initialised; -static DEFINE_PER_CPU(unsigned char, kvm_arm_hardware_enabled); +static DEFINE_PER_CPU(unsigned char, kvm_hyp_initialized); DEFINE_STATIC_KEY_FALSE(userspace_irqchip_in_use); bool is_kvm_arm_initialised(void) @@ -1864,18 +1864,24 @@ static void cpu_hyp_reinit(void) cpu_hyp_init_features(); } -static void _kvm_arch_hardware_enable(void *discard) +static void cpu_hyp_init(void *discard) { - if (!__this_cpu_read(kvm_arm_hardware_enabled)) { + if (!__this_cpu_read(kvm_hyp_initialized)) { cpu_hyp_reinit(); - __this_cpu_write(kvm_arm_hardware_enabled, 1); + __this_cpu_write(kvm_hyp_initialized, 1); } } -int kvm_arch_hardware_enable(void) +static void cpu_hyp_uninit(void *discard) { - int was_enabled; + if (__this_cpu_read(kvm_hyp_initialized)) { + cpu_hyp_reset(); + __this_cpu_write(kvm_hyp_initialized, 0); + } +} +int kvm_arch_hardware_enable(void) +{ /* * Most calls to this function are made with migration * disabled, but not with preemption disabled. The former is @@ -1884,36 +1890,23 @@ int kvm_arch_hardware_enable(void) */ preempt_disable(); - was_enabled = __this_cpu_read(kvm_arm_hardware_enabled); - _kvm_arch_hardware_enable(NULL); + cpu_hyp_init(NULL); - if (!was_enabled) { - kvm_vgic_cpu_up(); - kvm_timer_cpu_up(); - } + kvm_vgic_cpu_up(); + kvm_timer_cpu_up(); preempt_enable(); return 0; } -static void _kvm_arch_hardware_disable(void *discard) -{ - if (__this_cpu_read(kvm_arm_hardware_enabled)) { - cpu_hyp_reset(); - __this_cpu_write(kvm_arm_hardware_enabled, 0); - } -} - void kvm_arch_hardware_disable(void) { - if (__this_cpu_read(kvm_arm_hardware_enabled)) { - kvm_timer_cpu_down(); - kvm_vgic_cpu_down(); - } + kvm_timer_cpu_down(); + kvm_vgic_cpu_down(); if (!is_protected_kvm_enabled()) - _kvm_arch_hardware_disable(NULL); + cpu_hyp_uninit(NULL); } #ifdef CONFIG_CPU_PM @@ -1922,16 +1915,16 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, void *v) { /* - * kvm_arm_hardware_enabled is left with its old value over + * kvm_hyp_initialized is left with its old value over * PM_ENTER->PM_EXIT. It is used to indicate PM_EXIT should * re-enable hyp. */ switch (cmd) { case CPU_PM_ENTER: - if (__this_cpu_read(kvm_arm_hardware_enabled)) + if (__this_cpu_read(kvm_hyp_initialized)) /* - * don't update kvm_arm_hardware_enabled here - * so that the hardware will be re-enabled + * don't update kvm_hyp_initialized here + * so that the hyp will be re-enabled * when we resume. See below. */ cpu_hyp_reset(); @@ -1939,8 +1932,8 @@ static int hyp_init_cpu_pm_notifier(struct notifier_block *self, return NOTIFY_OK; case CPU_PM_ENTER_FAILED: case CPU_PM_EXIT: - if (__this_cpu_read(kvm_arm_hardware_enabled)) - /* The hardware was enabled before suspend. */ + if (__this_cpu_read(kvm_hyp_initialized)) + /* The hyp was enabled before suspend. */ cpu_hyp_reinit(); return NOTIFY_OK; @@ -2021,7 +2014,7 @@ static int __init init_subsystems(void) /* * Enable hardware so that subsystem initialisation can access EL2. */ - on_each_cpu(_kvm_arch_hardware_enable, NULL, 1); + on_each_cpu(cpu_hyp_init, NULL, 1); /* * Register CPU lower-power notifier @@ -2059,7 +2052,7 @@ out: hyp_cpu_pm_exit(); if (err || !is_protected_kvm_enabled()) - on_each_cpu(_kvm_arch_hardware_disable, NULL, 1); + on_each_cpu(cpu_hyp_uninit, NULL, 1); return err; } @@ -2097,7 +2090,7 @@ static int __init do_pkvm_init(u32 hyp_va_bits) * The stub hypercalls are now disabled, so set our local flag to * prevent a later re-init attempt in kvm_arch_hardware_enable(). */ - __this_cpu_write(kvm_arm_hardware_enabled, 1); + __this_cpu_write(kvm_hyp_initialized, 1); preempt_enable(); return ret; diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 4bddb8541bec..34f222af6165 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -457,6 +457,7 @@ static bool handle_ampere1_tcr(struct kvm_vcpu *vcpu) */ val &= ~(TCR_HD | TCR_HA); write_sysreg_el1(val, SYS_TCR); + __kvm_skip_instr(vcpu); return true; } diff --git a/arch/arm64/kvm/hyp/nvhe/Makefile b/arch/arm64/kvm/hyp/nvhe/Makefile index 9ddc025e4b86..2250253a6429 100644 --- a/arch/arm64/kvm/hyp/nvhe/Makefile +++ b/arch/arm64/kvm/hyp/nvhe/Makefile @@ -25,7 +25,7 @@ hyp-obj-y := timer-sr.o sysreg-sr.o debug-sr.o switch.o tlb.o hyp-init.o host.o cache.o setup.o mm.o mem_protect.o sys_regs.o pkvm.o stacktrace.o ffa.o hyp-obj-y += ../vgic-v3-sr.o ../aarch32.o ../vgic-v2-cpuif-proxy.o ../entry.o \ ../fpsimd.o ../hyp-entry.o ../exception.o ../pgtable.o -hyp-obj-$(CONFIG_DEBUG_LIST) += list_debug.o +hyp-obj-$(CONFIG_LIST_HARDENED) += list_debug.o hyp-obj-y += $(lib-objs) ## diff --git a/arch/arm64/kvm/hyp/nvhe/ffa.c b/arch/arm64/kvm/hyp/nvhe/ffa.c index 58dcd92bf346..ab4f5d160c58 100644 --- a/arch/arm64/kvm/hyp/nvhe/ffa.c +++ b/arch/arm64/kvm/hyp/nvhe/ffa.c @@ -705,7 +705,20 @@ int hyp_ffa_init(void *pages) if (res.a0 == FFA_RET_NOT_SUPPORTED) return 0; - if (res.a0 != FFA_VERSION_1_0) + /* + * Firmware returns the maximum supported version of the FF-A + * implementation. Check that the returned version is + * backwards-compatible with the hyp according to the rules in DEN0077A + * v1.1 REL0 13.2.1. + * + * Of course, things are never simple when dealing with firmware. v1.1 + * broke ABI with v1.0 on several structures, which is itself + * incompatible with the aforementioned versioning scheme. The + * expectation is that v1.x implementations that do not support the v1.0 + * ABI return NOT_SUPPORTED rather than a version number, according to + * DEN0077A v1.1 REL0 18.6.4. + */ + if (FFA_MAJOR_VERSION(res.a0) != 1) return -EOPNOTSUPP; arm_smccc_1_1_smc(FFA_ID_GET, 0, 0, 0, 0, 0, 0, 0, &res); diff --git a/arch/arm64/kvm/hyp/nvhe/list_debug.c b/arch/arm64/kvm/hyp/nvhe/list_debug.c index d68abd7ea124..46a2d4f2b3c6 100644 --- a/arch/arm64/kvm/hyp/nvhe/list_debug.c +++ b/arch/arm64/kvm/hyp/nvhe/list_debug.c @@ -26,8 +26,9 @@ static inline __must_check bool nvhe_check_data_corruption(bool v) /* The predicates checked here are taken from lib/list_debug.c. */ -bool __list_add_valid(struct list_head *new, struct list_head *prev, - struct list_head *next) +__list_valid_slowpath +bool __list_add_valid_or_report(struct list_head *new, struct list_head *prev, + struct list_head *next) { if (NVHE_CHECK_DATA_CORRUPTION(next->prev != prev) || NVHE_CHECK_DATA_CORRUPTION(prev->next != next) || @@ -37,7 +38,8 @@ bool __list_add_valid(struct list_head *new, struct list_head *prev, return true; } -bool __list_del_entry_valid(struct list_head *entry) +__list_valid_slowpath +bool __list_del_entry_valid_or_report(struct list_head *entry) { struct list_head *prev, *next; diff --git a/arch/arm64/kvm/hyp/nvhe/switch.c b/arch/arm64/kvm/hyp/nvhe/switch.c index 0a6271052def..e89a23153e85 100644 --- a/arch/arm64/kvm/hyp/nvhe/switch.c +++ b/arch/arm64/kvm/hyp/nvhe/switch.c @@ -63,7 +63,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) __activate_traps_fpsimd32(vcpu); } - write_sysreg(val, cptr_el2); + kvm_write_cptr_el2(val); write_sysreg(__this_cpu_read(kvm_hyp_vector), vbar_el2); if (cpus_have_final_cap(ARM64_WORKAROUND_SPECULATIVE_AT)) { diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index 924934cb85ee..a635ab83fee3 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -385,6 +385,9 @@ u32 aarch64_insn_gen_load_store_reg(enum aarch64_insn_register reg, case AARCH64_INSN_LDST_LOAD_REG_OFFSET: insn = aarch64_insn_get_ldr_reg_value(); break; + case AARCH64_INSN_LDST_SIGNED_LOAD_REG_OFFSET: + insn = aarch64_insn_get_signed_ldr_reg_value(); + break; case AARCH64_INSN_LDST_STORE_REG_OFFSET: insn = aarch64_insn_get_str_reg_value(); break; @@ -430,6 +433,9 @@ u32 aarch64_insn_gen_load_store_imm(enum aarch64_insn_register reg, case AARCH64_INSN_LDST_LOAD_IMM_OFFSET: insn = aarch64_insn_get_ldr_imm_value(); break; + case AARCH64_INSN_LDST_SIGNED_LOAD_IMM_OFFSET: + insn = aarch64_insn_get_signed_load_imm_value(); + break; case AARCH64_INSN_LDST_STORE_IMM_OFFSET: insn = aarch64_insn_get_str_imm_value(); break; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index d31c3a9290c5..4fcb88a445ef 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -73,6 +73,33 @@ phys_addr_t __ro_after_init arm64_dma_phys_limit; #define DEFAULT_CRASH_KERNEL_LOW_SIZE (128UL << 20) +/* + * To make optimal use of block mappings when laying out the linear + * mapping, round down the base of physical memory to a size that can + * be mapped efficiently, i.e., either PUD_SIZE (4k granule) or PMD_SIZE + * (64k granule), or a multiple that can be mapped using contiguous bits + * in the page tables: 32 * PMD_SIZE (16k granule) + */ +#if defined(CONFIG_ARM64_4K_PAGES) +#define ARM64_MEMSTART_SHIFT PUD_SHIFT +#elif defined(CONFIG_ARM64_16K_PAGES) +#define ARM64_MEMSTART_SHIFT CONT_PMD_SHIFT +#else +#define ARM64_MEMSTART_SHIFT PMD_SHIFT +#endif + +/* + * sparsemem vmemmap imposes an additional requirement on the alignment of + * memstart_addr, due to the fact that the base of the vmemmap region + * has a direct correspondence, and needs to appear sufficiently aligned + * in the virtual address space. + */ +#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS +#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS) +#else +#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT) +#endif + static int __init reserve_crashkernel_low(unsigned long long low_size) { unsigned long long low_base; diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index 2baeec419f62..14fdf645edc8 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -447,7 +447,7 @@ SYM_FUNC_START(__cpu_setup) * via capabilities. */ mrs x9, ID_AA64MMFR1_EL1 - and x9, x9, #0xf + and x9, x9, ID_AA64MMFR1_EL1_HAFDBS_MASK cbz x9, 1f orr tcr, tcr, #TCR_HA // hardware Access flag update 1: diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index c2edadb8ec6a..23b1b34db088 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -59,10 +59,13 @@ AARCH64_INSN_LDST_##type##_REG_OFFSET) #define A64_STRB(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 8, STORE) #define A64_LDRB(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 8, LOAD) +#define A64_LDRSB(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 8, SIGNED_LOAD) #define A64_STRH(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 16, STORE) #define A64_LDRH(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 16, LOAD) +#define A64_LDRSH(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 16, SIGNED_LOAD) #define A64_STR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, STORE) #define A64_LDR32(Wt, Xn, Xm) A64_LS_REG(Wt, Xn, Xm, 32, LOAD) +#define A64_LDRSW(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 32, SIGNED_LOAD) #define A64_STR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, STORE) #define A64_LDR64(Xt, Xn, Xm) A64_LS_REG(Xt, Xn, Xm, 64, LOAD) @@ -73,10 +76,13 @@ AARCH64_INSN_LDST_##type##_IMM_OFFSET) #define A64_STRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, STORE) #define A64_LDRBI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 8, LOAD) +#define A64_LDRSBI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 8, SIGNED_LOAD) #define A64_STRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, STORE) #define A64_LDRHI(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 16, LOAD) +#define A64_LDRSHI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 16, SIGNED_LOAD) #define A64_STR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, STORE) #define A64_LDR32I(Wt, Xn, imm) A64_LS_IMM(Wt, Xn, imm, 32, LOAD) +#define A64_LDRSWI(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 32, SIGNED_LOAD) #define A64_STR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, STORE) #define A64_LDR64I(Xt, Xn, imm) A64_LS_IMM(Xt, Xn, imm, 64, LOAD) @@ -186,6 +192,11 @@ #define A64_UXTH(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 15) #define A64_UXTW(sf, Rd, Rn) A64_UBFM(sf, Rd, Rn, 0, 31) +/* Sign extend */ +#define A64_SXTB(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 7) +#define A64_SXTH(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 15) +#define A64_SXTW(sf, Rd, Rn) A64_SBFM(sf, Rd, Rn, 0, 31) + /* Move wide (immediate) */ #define A64_MOVEW(sf, Rd, imm16, shift, type) \ aarch64_insn_gen_movewide(Rd, imm16, shift, \ @@ -223,6 +234,7 @@ #define A64_DATA2(sf, Rd, Rn, Rm, type) aarch64_insn_gen_data2(Rd, Rn, Rm, \ A64_VARIANT(sf), AARCH64_INSN_DATA2_##type) #define A64_UDIV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, UDIV) +#define A64_SDIV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, SDIV) #define A64_LSLV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSLV) #define A64_LSRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, LSRV) #define A64_ASRV(sf, Rd, Rn, Rm) A64_DATA2(sf, Rd, Rn, Rm, ASRV) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index ec2174838f2a..150d1c6543f7 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -715,7 +715,8 @@ static int add_exception_handler(const struct bpf_insn *insn, /* First pass */ return 0; - if (BPF_MODE(insn->code) != BPF_PROBE_MEM) + if (BPF_MODE(insn->code) != BPF_PROBE_MEM && + BPF_MODE(insn->code) != BPF_PROBE_MEMSX) return 0; if (!ctx->prog->aux->extable || @@ -779,12 +780,26 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, u8 dst_adj; int off_adj; int ret; + bool sign_extend; switch (code) { /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: - emit(A64_MOV(is64, dst, src), ctx); + switch (insn->off) { + case 0: + emit(A64_MOV(is64, dst, src), ctx); + break; + case 8: + emit(A64_SXTB(is64, dst, src), ctx); + break; + case 16: + emit(A64_SXTH(is64, dst, src), ctx); + break; + case 32: + emit(A64_SXTW(is64, dst, src), ctx); + break; + } break; /* dst = dst OP src */ case BPF_ALU | BPF_ADD | BPF_X: @@ -813,11 +828,17 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: - emit(A64_UDIV(is64, dst, dst, src), ctx); + if (!off) + emit(A64_UDIV(is64, dst, dst, src), ctx); + else + emit(A64_SDIV(is64, dst, dst, src), ctx); break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: - emit(A64_UDIV(is64, tmp, dst, src), ctx); + if (!off) + emit(A64_UDIV(is64, tmp, dst, src), ctx); + else + emit(A64_SDIV(is64, tmp, dst, src), ctx); emit(A64_MSUB(is64, dst, dst, tmp, src), ctx); break; case BPF_ALU | BPF_LSH | BPF_X: @@ -840,11 +861,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, /* dst = BSWAP##imm(dst) */ case BPF_ALU | BPF_END | BPF_FROM_LE: case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: #ifdef CONFIG_CPU_BIG_ENDIAN - if (BPF_SRC(code) == BPF_FROM_BE) + if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_BE) goto emit_bswap_uxt; #else /* !CONFIG_CPU_BIG_ENDIAN */ - if (BPF_SRC(code) == BPF_FROM_LE) + if (BPF_CLASS(code) == BPF_ALU && BPF_SRC(code) == BPF_FROM_LE) goto emit_bswap_uxt; #endif switch (imm) { @@ -943,12 +965,18 @@ emit_bswap_uxt: case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: emit_a64_mov_i(is64, tmp, imm, ctx); - emit(A64_UDIV(is64, dst, dst, tmp), ctx); + if (!off) + emit(A64_UDIV(is64, dst, dst, tmp), ctx); + else + emit(A64_SDIV(is64, dst, dst, tmp), ctx); break; case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K: emit_a64_mov_i(is64, tmp2, imm, ctx); - emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); + if (!off) + emit(A64_UDIV(is64, tmp, dst, tmp2), ctx); + else + emit(A64_SDIV(is64, tmp, dst, tmp2), ctx); emit(A64_MSUB(is64, dst, dst, tmp, tmp2), ctx); break; case BPF_ALU | BPF_LSH | BPF_K: @@ -966,7 +994,11 @@ emit_bswap_uxt: /* JUMP off */ case BPF_JMP | BPF_JA: - jmp_offset = bpf2a64_offset(i, off, ctx); + case BPF_JMP32 | BPF_JA: + if (BPF_CLASS(code) == BPF_JMP) + jmp_offset = bpf2a64_offset(i, off, ctx); + else + jmp_offset = bpf2a64_offset(i, imm, ctx); check_imm26(jmp_offset); emit(A64_B(jmp_offset), ctx); break; @@ -1122,7 +1154,7 @@ emit_cond_jmp: return 1; } - /* LDX: dst = *(size *)(src + off) */ + /* LDX: dst = (u64)*(unsigned size *)(src + off) */ case BPF_LDX | BPF_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_B: @@ -1131,6 +1163,13 @@ emit_cond_jmp: case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_B: + /* LDXS: dst_reg = (s64)*(signed size *)(src_reg + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: if (ctx->fpb_offset > 0 && src == fp) { src_adj = fpb; off_adj = off + ctx->fpb_offset; @@ -1138,29 +1177,49 @@ emit_cond_jmp: src_adj = src; off_adj = off; } + sign_extend = (BPF_MODE(insn->code) == BPF_MEMSX || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX); switch (BPF_SIZE(code)) { case BPF_W: if (is_lsi_offset(off_adj, 2)) { - emit(A64_LDR32I(dst, src_adj, off_adj), ctx); + if (sign_extend) + emit(A64_LDRSWI(dst, src_adj, off_adj), ctx); + else + emit(A64_LDR32I(dst, src_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_LDR32(dst, src, tmp), ctx); + if (sign_extend) + emit(A64_LDRSW(dst, src_adj, off_adj), ctx); + else + emit(A64_LDR32(dst, src, tmp), ctx); } break; case BPF_H: if (is_lsi_offset(off_adj, 1)) { - emit(A64_LDRHI(dst, src_adj, off_adj), ctx); + if (sign_extend) + emit(A64_LDRSHI(dst, src_adj, off_adj), ctx); + else + emit(A64_LDRHI(dst, src_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_LDRH(dst, src, tmp), ctx); + if (sign_extend) + emit(A64_LDRSH(dst, src, tmp), ctx); + else + emit(A64_LDRH(dst, src, tmp), ctx); } break; case BPF_B: if (is_lsi_offset(off_adj, 0)) { - emit(A64_LDRBI(dst, src_adj, off_adj), ctx); + if (sign_extend) + emit(A64_LDRSBI(dst, src_adj, off_adj), ctx); + else + emit(A64_LDRBI(dst, src_adj, off_adj), ctx); } else { emit_a64_mov_i(1, tmp, off, ctx); - emit(A64_LDRB(dst, src, tmp), ctx); + if (sign_extend) + emit(A64_LDRSB(dst, src, tmp), ctx); + else + emit(A64_LDRB(dst, src, tmp), ctx); } break; case BPF_DW: diff --git a/arch/csky/abiv2/cacheflush.c b/arch/csky/abiv2/cacheflush.c index d05a551af5d5..876028b1083f 100644 --- a/arch/csky/abiv2/cacheflush.c +++ b/arch/csky/abiv2/cacheflush.c @@ -24,6 +24,7 @@ void update_mmu_cache_range(struct vm_fault *vmf, struct vm_area_struct *vma, if (test_and_set_bit(PG_dcache_clean, &folio->flags)) return; + icache_inv_range(address, address + nr*PAGE_SIZE); for (i = 0; i < folio_nr_pages(folio); i++) { unsigned long addr = (unsigned long) kmap_local_folio(folio, i * PAGE_SIZE); diff --git a/arch/csky/include/asm/page.h b/arch/csky/include/asm/page.h index b23e3006a9e0..4a0502e324a6 100644 --- a/arch/csky/include/asm/page.h +++ b/arch/csky/include/asm/page.h @@ -34,9 +34,6 @@ #include <linux/pfn.h> -#define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) -#define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) - #define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && \ (void *)(kaddr) < high_memory) @@ -80,6 +77,16 @@ extern unsigned long va_pa_offset; #define __pa_symbol(x) __pa(RELOC_HIDE((unsigned long)(x), 0)) +static inline unsigned long virt_to_pfn(const void *kaddr) +{ + return __pa(kaddr) >> PAGE_SHIFT; +} + +static inline void * pfn_to_virt(unsigned long pfn) +{ + return (void *)((unsigned long)__va(pfn) << PAGE_SHIFT); +} + #define MAP_NR(x) PFN_DOWN((unsigned long)(x) - PAGE_OFFSET - \ PHYS_OFFSET_OFFSET) #define virt_to_page(x) (mem_map + MAP_NR(x)) diff --git a/arch/csky/include/asm/ptrace.h b/arch/csky/include/asm/ptrace.h index 4202aab6df42..0634b7895d81 100644 --- a/arch/csky/include/asm/ptrace.h +++ b/arch/csky/include/asm/ptrace.h @@ -96,5 +96,7 @@ static inline unsigned long regs_get_register(struct pt_regs *regs, return *(unsigned long *)((unsigned long)regs + offset); } +asmlinkage int syscall_trace_enter(struct pt_regs *regs); +asmlinkage void syscall_trace_exit(struct pt_regs *regs); #endif /* __ASSEMBLY__ */ #endif /* __ASM_CSKY_PTRACE_H */ diff --git a/arch/csky/include/asm/sections.h b/arch/csky/include/asm/sections.h index 4192cba8445d..83e82b7c0f6c 100644 --- a/arch/csky/include/asm/sections.h +++ b/arch/csky/include/asm/sections.h @@ -7,4 +7,6 @@ extern char _start[]; +asmlinkage void csky_start(unsigned int unused, void *dtb_start); + #endif /* __ASM_SECTIONS_H */ diff --git a/arch/csky/include/asm/traps.h b/arch/csky/include/asm/traps.h index 421a4195e2fe..1e7d303b91e9 100644 --- a/arch/csky/include/asm/traps.h +++ b/arch/csky/include/asm/traps.h @@ -40,4 +40,19 @@ do { \ void csky_alignment(struct pt_regs *regs); +asmlinkage void do_trap_unknown(struct pt_regs *regs); +asmlinkage void do_trap_zdiv(struct pt_regs *regs); +asmlinkage void do_trap_buserr(struct pt_regs *regs); +asmlinkage void do_trap_misaligned(struct pt_regs *regs); +asmlinkage void do_trap_bkpt(struct pt_regs *regs); +asmlinkage void do_trap_illinsn(struct pt_regs *regs); +asmlinkage void do_trap_fpe(struct pt_regs *regs); +asmlinkage void do_trap_priv(struct pt_regs *regs); +asmlinkage void trap_c(struct pt_regs *regs); + +asmlinkage void do_notify_resume(struct pt_regs *regs, + unsigned long thread_info_flags); + +void trap_init(void); + #endif /* __ASM_CSKY_TRAPS_H */ diff --git a/arch/csky/kernel/module.c b/arch/csky/kernel/module.c index f11b3e573344..0b56a8cd12a3 100644 --- a/arch/csky/kernel/module.c +++ b/arch/csky/kernel/module.c @@ -40,7 +40,7 @@ static void jsri_2_lrw_jsr(uint32_t *location) } } #else -static void inline jsri_2_lrw_jsr(uint32_t *location) +static inline void jsri_2_lrw_jsr(uint32_t *location) { return; } diff --git a/arch/csky/kernel/vdso/vgettimeofday.c b/arch/csky/kernel/vdso/vgettimeofday.c index da491832c098..c4831145eed5 100644 --- a/arch/csky/kernel/vdso/vgettimeofday.c +++ b/arch/csky/kernel/vdso/vgettimeofday.c @@ -3,6 +3,9 @@ #include <linux/time.h> #include <linux/types.h> +extern +int __vdso_clock_gettime(clockid_t clock, + struct old_timespec32 *ts); int __vdso_clock_gettime(clockid_t clock, struct old_timespec32 *ts) { @@ -10,17 +13,25 @@ int __vdso_clock_gettime(clockid_t clock, } int __vdso_clock_gettime64(clockid_t clock, + struct __kernel_timespec *ts); +int __vdso_clock_gettime64(clockid_t clock, struct __kernel_timespec *ts) { return __cvdso_clock_gettime(clock, ts); } +extern +int __vdso_gettimeofday(struct __kernel_old_timeval *tv, + struct timezone *tz); int __vdso_gettimeofday(struct __kernel_old_timeval *tv, struct timezone *tz) { return __cvdso_gettimeofday(tv, tz); } +extern +int __vdso_clock_getres(clockid_t clock_id, + struct old_timespec32 *res); int __vdso_clock_getres(clockid_t clock_id, struct old_timespec32 *res) { diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h index 87927eb824cc..58500a964238 100644 --- a/arch/ia64/include/asm/acpi.h +++ b/arch/ia64/include/asm/acpi.h @@ -11,7 +11,7 @@ #ifdef __KERNEL__ -#include <acpi/pdc_intel.h> +#include <acpi/proc_cap_intel.h> #include <linux/init.h> #include <linux/numa.h> @@ -69,9 +69,9 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; #endif static inline bool arch_has_acpi_pdc(void) { return true; } -static inline void arch_acpi_set_pdc_bits(u32 *buf) +static inline void arch_acpi_set_proc_cap_bits(u32 *cap) { - buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP; + *cap |= ACPI_PROC_CAP_EST_CAPABILITY_SMP; } #ifdef CONFIG_ACPI_NUMA diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h index d1978e004054..47e3801b526a 100644 --- a/arch/ia64/include/asm/processor.h +++ b/arch/ia64/include/asm/processor.h @@ -634,7 +634,6 @@ ia64_imva (void *addr) #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH #define PREFETCH_STRIDE L1_CACHE_BYTES static inline void @@ -649,8 +648,6 @@ prefetchw (const void *x) ia64_lfetch_excl(ia64_lfhint_none, x); } -#define spin_lock_prefetch(x) prefetchw(x) - extern unsigned long boot_option_idle_override; enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT, diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index f8c74ffeeefb..83d8609aec03 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -372,3 +372,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig index dc56ddf9ba03..34d7a3de9a79 100644 --- a/arch/loongarch/Kconfig +++ b/arch/loongarch/Kconfig @@ -662,5 +662,3 @@ source "kernel/power/Kconfig" source "drivers/acpi/Kconfig" endmenu - -source "drivers/firmware/Kconfig" diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index b1e5db51b61c..ef87bab46754 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -83,8 +83,8 @@ KBUILD_CFLAGS_KERNEL += -fPIE LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext endif -cflags-y += -ffreestanding cflags-y += $(call cc-option, -mno-check-zero-division) +cflags-y += -fno-builtin-memcpy -fno-builtin-memmove -fno-builtin-memset load-y = 0x9000000000200000 bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) diff --git a/arch/loongarch/include/asm/Kbuild b/arch/loongarch/include/asm/Kbuild index 6b222f227342..93783fa24f6e 100644 --- a/arch/loongarch/include/asm/Kbuild +++ b/arch/loongarch/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += dma-contiguous.h -generic-y += export.h generic-y += mcs_spinlock.h generic-y += parport.h generic-y += early_ioremap.h diff --git a/arch/loongarch/include/asm/fpu.h b/arch/loongarch/include/asm/fpu.h index b541f6248837..c2d8962fda00 100644 --- a/arch/loongarch/include/asm/fpu.h +++ b/arch/loongarch/include/asm/fpu.h @@ -173,16 +173,30 @@ static inline void restore_fp(struct task_struct *tsk) _restore_fp(&tsk->thread.fpu); } -static inline union fpureg *get_fpu_regs(struct task_struct *tsk) +static inline void save_fpu_regs(struct task_struct *tsk) { + unsigned int euen; + if (tsk == current) { preempt_disable(); - if (is_fpu_owner()) + + euen = csr_read32(LOONGARCH_CSR_EUEN); + +#ifdef CONFIG_CPU_HAS_LASX + if (euen & CSR_EUEN_LASXEN) + _save_lasx(¤t->thread.fpu); + else +#endif +#ifdef CONFIG_CPU_HAS_LSX + if (euen & CSR_EUEN_LSXEN) + _save_lsx(¤t->thread.fpu); + else +#endif + if (euen & CSR_EUEN_FPEN) _save_fp(¤t->thread.fpu); + preempt_enable(); } - - return tsk->thread.fpu.fpr; } static inline int is_simd_owner(void) diff --git a/arch/loongarch/include/asm/local.h b/arch/loongarch/include/asm/local.h index 83e995b30e47..c49675852bdc 100644 --- a/arch/loongarch/include/asm/local.h +++ b/arch/loongarch/include/asm/local.h @@ -63,8 +63,8 @@ static inline long local_cmpxchg(local_t *l, long old, long new) static inline bool local_try_cmpxchg(local_t *l, long *old, long new) { - typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old; - return try_cmpxchg_local(&l->a.counter, __old, new); + return try_cmpxchg_local(&l->a.counter, + (typeof(l->a.counter) *) old, new); } #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n))) diff --git a/arch/loongarch/include/asm/ptrace.h b/arch/loongarch/include/asm/ptrace.h index 35f0958163ac..f3ddaed9ef7f 100644 --- a/arch/loongarch/include/asm/ptrace.h +++ b/arch/loongarch/include/asm/ptrace.h @@ -162,7 +162,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long val #define instruction_pointer(regs) ((regs)->csr_era) #define profile_pc(regs) instruction_pointer(regs) -extern void die(const char *, struct pt_regs *) __noreturn; +extern void die(const char *str, struct pt_regs *regs); static inline void die_if_kernel(const char *str, struct pt_regs *regs) { diff --git a/arch/loongarch/include/asm/smp.h b/arch/loongarch/include/asm/smp.h index 416b653bccb4..66ecb480c894 100644 --- a/arch/loongarch/include/asm/smp.h +++ b/arch/loongarch/include/asm/smp.h @@ -98,8 +98,6 @@ static inline void __cpu_die(unsigned int cpu) { loongson_cpu_die(cpu); } - -extern void __noreturn play_dead(void); #endif #endif /* __ASM_SMP_H */ diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index f3df5f0a4509..501094a09f5d 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -6,12 +6,12 @@ * * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include <linux/export.h> #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/asm-extable.h> #include <asm/asm-offsets.h> #include <asm/errno.h> -#include <asm/export.h> #include <asm/fpregdef.h> #include <asm/loongarch.h> #include <asm/regdef.h> diff --git a/arch/loongarch/kernel/hw_breakpoint.c b/arch/loongarch/kernel/hw_breakpoint.c index 021b59c248fa..fc55c4de2a11 100644 --- a/arch/loongarch/kernel/hw_breakpoint.c +++ b/arch/loongarch/kernel/hw_breakpoint.c @@ -207,8 +207,7 @@ static int hw_breakpoint_control(struct perf_event *bp, write_wb_reg(CSR_CFG_CTRL, i, 0, CTRL_PLV_ENABLE); } else { ctrl = encode_ctrl_reg(info->ctrl); - write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE | - 1 << MWPnCFG3_LoadEn | 1 << MWPnCFG3_StoreEn); + write_wb_reg(CSR_CFG_CTRL, i, 1, ctrl | CTRL_PLV_ENABLE); } enable = csr_read64(LOONGARCH_CSR_CRMD); csr_write64(CSR_CRMD_WE | enable, LOONGARCH_CSR_CRMD); diff --git a/arch/loongarch/kernel/mcount.S b/arch/loongarch/kernel/mcount.S index cb8e5803de4b..3015896016a0 100644 --- a/arch/loongarch/kernel/mcount.S +++ b/arch/loongarch/kernel/mcount.S @@ -5,7 +5,7 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ -#include <asm/export.h> +#include <linux/export.h> #include <asm/ftrace.h> #include <asm/regdef.h> #include <asm/stackframe.h> diff --git a/arch/loongarch/kernel/mcount_dyn.S b/arch/loongarch/kernel/mcount_dyn.S index e16ab0b98e5a..482aa553aa2d 100644 --- a/arch/loongarch/kernel/mcount_dyn.S +++ b/arch/loongarch/kernel/mcount_dyn.S @@ -3,7 +3,6 @@ * Copyright (C) 2022 Loongson Technology Corporation Limited */ -#include <asm/export.h> #include <asm/ftrace.h> #include <asm/regdef.h> #include <asm/stackframe.h> diff --git a/arch/loongarch/kernel/process.c b/arch/loongarch/kernel/process.c index 2e04eb07abb6..4ee1e9d6a65f 100644 --- a/arch/loongarch/kernel/process.c +++ b/arch/loongarch/kernel/process.c @@ -61,13 +61,6 @@ EXPORT_SYMBOL(__stack_chk_guard); unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; EXPORT_SYMBOL(boot_option_idle_override); -#ifdef CONFIG_HOTPLUG_CPU -void __noreturn arch_cpu_idle_dead(void) -{ - play_dead(); -} -#endif - asmlinkage void ret_from_fork(void); asmlinkage void ret_from_kernel_thread(void); diff --git a/arch/loongarch/kernel/ptrace.c b/arch/loongarch/kernel/ptrace.c index a0767c3a0f0a..f72adbf530c6 100644 --- a/arch/loongarch/kernel/ptrace.c +++ b/arch/loongarch/kernel/ptrace.c @@ -147,6 +147,8 @@ static int fpr_get(struct task_struct *target, { int r; + save_fpu_regs(target); + if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) r = gfpr_get(target, &to); else @@ -278,6 +280,8 @@ static int simd_get(struct task_struct *target, { const unsigned int wr_size = NUM_FPU_REGS * regset->size; + save_fpu_regs(target); + if (!tsk_used_math(target)) { /* The task hasn't used FP or LSX, fill with 0xff */ copy_pad_fprs(target, regset, &to, 0); diff --git a/arch/loongarch/kernel/smp.c b/arch/loongarch/kernel/smp.c index 8ea1bbcf13a7..6667b0a90f81 100644 --- a/arch/loongarch/kernel/smp.c +++ b/arch/loongarch/kernel/smp.c @@ -317,7 +317,7 @@ void loongson_cpu_die(unsigned int cpu) mb(); } -void play_dead(void) +void __noreturn arch_cpu_idle_dead(void) { register uint64_t addr; register void (*init_fn)(void); diff --git a/arch/loongarch/kernel/traps.c b/arch/loongarch/kernel/traps.c index 8fb5e7a77145..89699db45cec 100644 --- a/arch/loongarch/kernel/traps.c +++ b/arch/loongarch/kernel/traps.c @@ -383,16 +383,15 @@ void show_registers(struct pt_regs *regs) static DEFINE_RAW_SPINLOCK(die_lock); -void __noreturn die(const char *str, struct pt_regs *regs) +void die(const char *str, struct pt_regs *regs) { + int ret; static int die_counter; - int sig = SIGSEGV; oops_enter(); - if (notify_die(DIE_OOPS, str, regs, 0, current->thread.trap_nr, - SIGSEGV) == NOTIFY_STOP) - sig = 0; + ret = notify_die(DIE_OOPS, str, regs, 0, + current->thread.trap_nr, SIGSEGV); console_verbose(); raw_spin_lock_irq(&die_lock); @@ -405,6 +404,9 @@ void __noreturn die(const char *str, struct pt_regs *regs) oops_exit(); + if (ret == NOTIFY_STOP) + return; + if (regs && kexec_should_crash(current)) crash_kexec(regs); @@ -414,7 +416,7 @@ void __noreturn die(const char *str, struct pt_regs *regs) if (panic_on_oops) panic("Fatal exception"); - make_task_dead(sig); + make_task_dead(SIGSEGV); } static inline void setup_vint_size(unsigned int size) diff --git a/arch/loongarch/lib/clear_user.S b/arch/loongarch/lib/clear_user.S index 9dcf71719387..0790eadce166 100644 --- a/arch/loongarch/lib/clear_user.S +++ b/arch/loongarch/lib/clear_user.S @@ -3,12 +3,12 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include <linux/export.h> #include <asm/alternative-asm.h> #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/asm-extable.h> #include <asm/cpu.h> -#include <asm/export.h> #include <asm/regdef.h> .irp to, 0, 1, 2, 3, 4, 5, 6, 7 diff --git a/arch/loongarch/lib/copy_user.S b/arch/loongarch/lib/copy_user.S index fecd08cad702..bfe3d2793d00 100644 --- a/arch/loongarch/lib/copy_user.S +++ b/arch/loongarch/lib/copy_user.S @@ -3,12 +3,12 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include <linux/export.h> #include <asm/alternative-asm.h> #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/asm-extable.h> #include <asm/cpu.h> -#include <asm/export.h> #include <asm/regdef.h> .irp to, 0, 1, 2, 3, 4, 5, 6, 7 diff --git a/arch/loongarch/lib/memcpy.S b/arch/loongarch/lib/memcpy.S index 39ce6621c704..cc30b3b6252f 100644 --- a/arch/loongarch/lib/memcpy.S +++ b/arch/loongarch/lib/memcpy.S @@ -3,11 +3,11 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include <linux/export.h> #include <asm/alternative-asm.h> #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/cpu.h> -#include <asm/export.h> #include <asm/regdef.h> SYM_FUNC_START(memcpy) diff --git a/arch/loongarch/lib/memmove.S b/arch/loongarch/lib/memmove.S index 45b725ba7867..7dc76d1484b6 100644 --- a/arch/loongarch/lib/memmove.S +++ b/arch/loongarch/lib/memmove.S @@ -3,11 +3,11 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include <linux/export.h> #include <asm/alternative-asm.h> #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/cpu.h> -#include <asm/export.h> #include <asm/regdef.h> SYM_FUNC_START(memmove) diff --git a/arch/loongarch/lib/memset.S b/arch/loongarch/lib/memset.S index b39c6194e3ae..3f20f7996e8e 100644 --- a/arch/loongarch/lib/memset.S +++ b/arch/loongarch/lib/memset.S @@ -3,11 +3,11 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include <linux/export.h> #include <asm/alternative-asm.h> #include <asm/asm.h> #include <asm/asmmacro.h> #include <asm/cpu.h> -#include <asm/export.h> #include <asm/regdef.h> .macro fill_to_64 r0 diff --git a/arch/loongarch/lib/unaligned.S b/arch/loongarch/lib/unaligned.S index 9177fd638f07..185f82d85810 100644 --- a/arch/loongarch/lib/unaligned.S +++ b/arch/loongarch/lib/unaligned.S @@ -9,7 +9,6 @@ #include <asm/asmmacro.h> #include <asm/asm-extable.h> #include <asm/errno.h> -#include <asm/export.h> #include <asm/regdef.h> .L_fixup_handle_unaligned: diff --git a/arch/loongarch/mm/page.S b/arch/loongarch/mm/page.S index 4c874a7af0ad..7ad76551d313 100644 --- a/arch/loongarch/mm/page.S +++ b/arch/loongarch/mm/page.S @@ -2,9 +2,9 @@ /* * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/asm.h> -#include <asm/export.h> #include <asm/page.h> #include <asm/regdef.h> diff --git a/arch/loongarch/mm/tlbex.S b/arch/loongarch/mm/tlbex.S index 4ad78703de6f..ca17dd3a1915 100644 --- a/arch/loongarch/mm/tlbex.S +++ b/arch/loongarch/mm/tlbex.S @@ -3,7 +3,6 @@ * Copyright (C) 2020-2022 Loongson Technology Corporation Limited */ #include <asm/asm.h> -#include <asm/export.h> #include <asm/loongarch.h> #include <asm/page.h> #include <asm/pgtable.h> diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 4383ed851063..6deb8faa564b 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -591,7 +591,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -635,6 +634,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index ec0f9c9f9562..802c161827f4 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -548,7 +548,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -591,6 +590,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 8656ae1f239e..2cb3d755873b 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -568,7 +568,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -612,6 +611,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 496fb6a415ea..b13552caa6b3 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -540,7 +540,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -583,6 +582,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 4add7ab9973b..f88356c45440 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -550,7 +550,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -593,6 +592,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 5845f1f71fd1..7c2ebb616fba 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -570,7 +570,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -614,6 +613,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index bbb251bab81a..d3b272910b38 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -656,7 +656,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -700,6 +699,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 4f9cfc70c66d..4529bc4b843c 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -539,7 +539,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -582,6 +581,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 67c42b4822f0..30824032e4d5 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -540,7 +540,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -583,6 +582,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 85f19515200b..3911211410ed 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -557,7 +557,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -601,6 +600,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index b1b15acb5d5f..991730c50957 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -538,7 +538,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -580,6 +579,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index 91d66c0f5ab6..e80d7509ab1d 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -538,7 +538,6 @@ CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -581,6 +580,7 @@ CONFIG_TEST_SCANF=m CONFIG_TEST_BITMAP=m CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m +CONFIG_TEST_MAPLE_TREE=m CONFIG_TEST_RHASHTABLE=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1b720299deb1..0dbf9c5c6fae 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,6 +1,5 @@ # SPDX-License-Identifier: GPL-2.0 generated-y += syscall_table.h -generic-y += export.h generic-y += extable.h generic-y += kvm_para.h generic-y += mcs_spinlock.h diff --git a/arch/m68k/include/asm/div64.h b/arch/m68k/include/asm/div64.h index 365f39f5e256..df1f6b450cc5 100644 --- a/arch/m68k/include/asm/div64.h +++ b/arch/m68k/include/asm/div64.h @@ -31,6 +31,9 @@ __rem; \ }) +/* defining this stops the unused helper function from being built */ +#define __div64_32 __div64_32 + #endif /* CONFIG_CPU_HAS_NO_MULDIV64 */ #endif /* _M68K_DIV64_H */ diff --git a/arch/m68k/include/asm/string.h b/arch/m68k/include/asm/string.h index f0f5021d6327..760cc13acdf4 100644 --- a/arch/m68k/include/asm/string.h +++ b/arch/m68k/include/asm/string.h @@ -41,6 +41,7 @@ static inline char *strncpy(char *dest, const char *src, size_t n) #define __HAVE_ARCH_MEMMOVE extern void *memmove(void *, const void *, __kernel_size_t); +extern int memcmp(const void *, const void *, __kernel_size_t); #define memcmp(d, s, n) __builtin_memcmp(d, s, n) #define __HAVE_ARCH_MEMSET diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 4f504783371f..259ceb125367 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 diff --git a/arch/m68k/lib/divsi3.S b/arch/m68k/lib/divsi3.S index 3a2143f51631..62787b4333e7 100644 --- a/arch/m68k/lib/divsi3.S +++ b/arch/m68k/lib/divsi3.S @@ -33,7 +33,7 @@ General Public License for more details. */ D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992 */ -#include <asm/export.h> +#include <linux/export.h> /* These are predefined by new versions of GNU cpp. */ diff --git a/arch/m68k/lib/modsi3.S b/arch/m68k/lib/modsi3.S index 1c967649a4e0..1bcb742d0b76 100644 --- a/arch/m68k/lib/modsi3.S +++ b/arch/m68k/lib/modsi3.S @@ -33,7 +33,7 @@ General Public License for more details. */ D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992 */ -#include <asm/export.h> +#include <linux/export.h> /* These are predefined by new versions of GNU cpp. */ diff --git a/arch/m68k/lib/mulsi3.S b/arch/m68k/lib/mulsi3.S index 855675e69a8a..c2853248249e 100644 --- a/arch/m68k/lib/mulsi3.S +++ b/arch/m68k/lib/mulsi3.S @@ -32,7 +32,7 @@ General Public License for more details. */ Some of this code comes from MINIX, via the folks at ericsson. D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992 */ -#include <asm/export.h> +#include <linux/export.h> /* These are predefined by new versions of GNU cpp. */ #ifndef __USER_LABEL_PREFIX__ diff --git a/arch/m68k/lib/udivsi3.S b/arch/m68k/lib/udivsi3.S index 78440ae513bf..39ad70596293 100644 --- a/arch/m68k/lib/udivsi3.S +++ b/arch/m68k/lib/udivsi3.S @@ -32,7 +32,7 @@ General Public License for more details. */ Some of this code comes from MINIX, via the folks at ericsson. D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992 */ -#include <asm/export.h> +#include <linux/export.h> /* These are predefined by new versions of GNU cpp. */ #ifndef __USER_LABEL_PREFIX__ diff --git a/arch/m68k/lib/umodsi3.S b/arch/m68k/lib/umodsi3.S index b6fd11f58948..6640eaa9eb03 100644 --- a/arch/m68k/lib/umodsi3.S +++ b/arch/m68k/lib/umodsi3.S @@ -32,7 +32,7 @@ General Public License for more details. */ Some of this code comes from MINIX, via the folks at ericsson. D. V. Henkel-Wallace (gumby@cygnus.com) Fete Bastille, 1992 */ -#include <asm/export.h> +#include <linux/export.h> /* These are predefined by new versions of GNU cpp. */ #ifndef __USER_LABEL_PREFIX__ diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 858d22bf275c..a3798c2637fd 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -457,3 +457,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 diff --git a/arch/mips/include/asm/local.h b/arch/mips/include/asm/local.h index 5daf6fe8e3e9..e6ae3df0349d 100644 --- a/arch/mips/include/asm/local.h +++ b/arch/mips/include/asm/local.h @@ -101,8 +101,8 @@ static __inline__ long local_cmpxchg(local_t *l, long old, long new) static __inline__ bool local_try_cmpxchg(local_t *l, long *old, long new) { - typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old; - return try_cmpxchg_local(&l->a.counter, __old, new); + return try_cmpxchg_local(&l->a.counter, + (typeof(l->a.counter) *) old, new); } #define local_xchg(l, n) (atomic_long_xchg((&(l)->a), (n))) diff --git a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h index 9151dcd9d0d5..af9cea21c853 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h +++ b/arch/mips/include/asm/mach-cavium-octeon/cpu-feature-overrides.h @@ -58,8 +58,6 @@ #define cpu_has_rixi (cpu_data[0].cputype != CPU_CAVIUM_OCTEON) -#define ARCH_HAS_SPINLOCK_PREFETCH 1 -#define spin_lock_prefetch(x) prefetch(x) #define PREFETCH_STRIDE 128 #ifdef __OCTEON__ diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 1976317d4e8b..152034b8e0a0 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -390,3 +390,4 @@ 449 n32 futex_waitv sys_futex_waitv 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 n32 cachestat sys_cachestat +452 n32 fchmodat2 sys_fchmodat2 diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index cfda2511badf..cb5e757f6621 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -366,3 +366,4 @@ 449 n64 futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 n64 cachestat sys_cachestat +452 n64 fchmodat2 sys_fchmodat2 diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 7692234c3768..1a646813afdc 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -439,3 +439,4 @@ 449 o32 futex_waitv sys_futex_waitv 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 o32 cachestat sys_cachestat +452 o32 fchmodat2 sys_fchmodat2 diff --git a/arch/parisc/Kbuild b/arch/parisc/Kbuild index a6d3b280ba0c..749b195f2894 100644 --- a/arch/parisc/Kbuild +++ b/arch/parisc/Kbuild @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-y += mm/ kernel/ math-emu/ +obj-y += mm/ kernel/ math-emu/ net/ # for cleaning subdir- += boot diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 4cda9f0a3277..4fd36642a779 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -50,6 +50,9 @@ config PARISC select TTY # Needed for pdc_cons.c select HAS_IOPORT if PCI || EISA select HAVE_DEBUG_STACKOVERFLOW + select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT + select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT + select HAVE_ARCH_MMAP_RND_BITS select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_HASH select HAVE_ARCH_JUMP_LABEL @@ -57,6 +60,8 @@ config PARISC select HAVE_ARCH_KFENCE select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK + select HAVE_EBPF_JIT + select ARCH_WANT_DEFAULT_BPF_JIT select HAVE_REGS_AND_STACK_ACCESS_API select HOTPLUG_CORE_SYNC_DEAD if HOTPLUG_CPU select GENERIC_SCHED_CLOCK @@ -125,6 +130,20 @@ config TIME_LOW_RES depends on SMP default y +config ARCH_MMAP_RND_BITS_MIN + default 18 if 64BIT + default 8 + +config ARCH_MMAP_RND_COMPAT_BITS_MIN + default 8 + +config ARCH_MMAP_RND_BITS_MAX + default 24 if 64BIT + default 17 + +config ARCH_MMAP_RND_COMPAT_BITS_MAX + default 17 + # unless you want to implement ACPI on PA-RISC ... ;-) config PM bool diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug index 1401e4c5fe5f..f4f164eb12df 100644 --- a/arch/parisc/Kconfig.debug +++ b/arch/parisc/Kconfig.debug @@ -2,7 +2,7 @@ # config LIGHTWEIGHT_SPINLOCK_CHECK bool "Enable lightweight spinlock checks" - depends on SMP && !DEBUG_SPINLOCK + depends on DEBUG_KERNEL && SMP && !DEBUG_SPINLOCK default y help Add checks with low performance impact to the spinlock functions @@ -13,7 +13,7 @@ config LIGHTWEIGHT_SPINLOCK_CHECK config TLB_PTLOCK bool "Use page table locks in TLB fault handler" - depends on SMP + depends on DEBUG_KERNEL && SMP default n help Select this option to enable page table locking in the TLB diff --git a/arch/parisc/boot/compressed/misc.c b/arch/parisc/boot/compressed/misc.c index 7ee49f5881d1..d389359e22ac 100644 --- a/arch/parisc/boot/compressed/misc.c +++ b/arch/parisc/boot/compressed/misc.c @@ -117,7 +117,7 @@ char *strchr(const char *s, int c) return NULL; } -int puts(const char *s) +static int puts(const char *s) { const char *nuline = s; @@ -172,7 +172,7 @@ static int print_num(unsigned long num, int base) return 0; } -int printf(const char *fmt, ...) +static int printf(const char *fmt, ...) { va_list args; int i = 0; @@ -204,13 +204,13 @@ void abort(void) } #undef malloc -void *malloc(size_t size) +static void *malloc(size_t size) { return malloc_gzip(size); } #undef free -void free(void *ptr) +static void free(void *ptr) { return free_gzip(ptr); } @@ -278,7 +278,7 @@ static void parse_elf(void *output) free(phdrs); } -unsigned long decompress_kernel(unsigned int started_wide, +asmlinkage unsigned long __visible decompress_kernel(unsigned int started_wide, unsigned int command_line, const unsigned int rd_start, const unsigned int rd_end) diff --git a/arch/parisc/include/asm/dma.h b/arch/parisc/include/asm/dma.h index 9e8c101de902..582fb5d1a5d5 100644 --- a/arch/parisc/include/asm/dma.h +++ b/arch/parisc/include/asm/dma.h @@ -14,6 +14,8 @@ #define dma_outb outb #define dma_inb inb +extern unsigned long pcxl_dma_start; + /* ** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up ** (or rather not merge) DMAs into manageable chunks. diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index cc426d365892..140eaa97bf21 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -163,8 +163,7 @@ typedef struct elf32_fdesc { /* Format for the Elf64 Function descriptor */ typedef struct elf64_fdesc { - __u64 dummy[2]; /* FIXME: nothing uses these, why waste - * the space */ + __u64 dummy[2]; /* used by 64-bit eBPF and tracing functions */ __u64 addr; __u64 gp; } Elf64_Fdesc; diff --git a/arch/parisc/include/asm/ftrace.h b/arch/parisc/include/asm/ftrace.h index a7cf0d05ccf4..f1cc1ee3a647 100644 --- a/arch/parisc/include/asm/ftrace.h +++ b/arch/parisc/include/asm/ftrace.h @@ -12,6 +12,10 @@ extern void mcount(void); extern unsigned long sys_call_table[]; extern unsigned long return_address(unsigned int); +struct ftrace_regs; +extern void ftrace_function_trampoline(unsigned long parent, + unsigned long self_addr, unsigned long org_sp_gr3, + struct ftrace_regs *fregs); #ifdef CONFIG_DYNAMIC_FTRACE extern void ftrace_caller(void); diff --git a/arch/parisc/include/asm/led.h b/arch/parisc/include/asm/led.h index 6de13d08a388..0aea47eff48d 100644 --- a/arch/parisc/include/asm/led.h +++ b/arch/parisc/include/asm/led.h @@ -11,8 +11,8 @@ #define LED1 0x02 #define LED0 0x01 /* bottom (or furthest left) LED */ -#define LED_LAN_TX LED0 /* for LAN transmit activity */ -#define LED_LAN_RCV LED1 /* for LAN receive activity */ +#define LED_LAN_RCV LED0 /* for LAN receive activity */ +#define LED_LAN_TX LED1 /* for LAN transmit activity */ #define LED_DISK_IO LED2 /* for disk activity */ #define LED_HEARTBEAT LED3 /* heartbeat */ @@ -25,19 +25,13 @@ #define LED_CMD_REG_NONE 0 /* NULL == no addr for the cmd register */ /* register_led_driver() */ -int __init register_led_driver(int model, unsigned long cmd_reg, unsigned long data_reg); - -/* registers the LED regions for procfs */ -void __init register_led_regions(void); +int register_led_driver(int model, unsigned long cmd_reg, unsigned long data_reg); #ifdef CONFIG_CHASSIS_LCD_LED /* writes a string to the LCD display (if possible on this h/w) */ -int lcd_print(const char *str); +void lcd_print(const char *str); #else -#define lcd_print(str) +#define lcd_print(str) do { } while (0) #endif -/* main LED initialization function (uses PDC) */ -int __init led_init(void); - #endif /* LED_H */ diff --git a/arch/parisc/include/asm/machdep.h b/arch/parisc/include/asm/machdep.h deleted file mode 100644 index 215d2c43989d..000000000000 --- a/arch/parisc/include/asm/machdep.h +++ /dev/null @@ -1,17 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _PARISC_MACHDEP_H -#define _PARISC_MACHDEP_H - -#include <linux/notifier.h> - -#define MACH_RESTART 1 -#define MACH_HALT 2 -#define MACH_POWER_ON 3 -#define MACH_POWER_OFF 4 - -extern struct notifier_block *mach_notifier; -extern void pa7300lc_init(void); - -extern void (*cpu_lpmc)(int, struct pt_regs *); - -#endif diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index e132b2819fc9..d77c43d32974 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -313,15 +313,7 @@ extern void collect_boot_cpu_data(void); extern int show_cpuinfo (struct seq_file *m, void *v); /* driver code in driver/parisc */ -extern void gsc_init(void); extern void processor_init(void); -extern void ccio_init(void); -extern void hppb_init(void); -extern void dino_init(void); -extern void iosapic_init(void); -extern void lba_init(void); -extern void sba_init(void); -extern void parisc_eisa_init(void); struct parisc_device; struct resource; extern void sba_distributed_lmmio(struct parisc_device *, struct resource *); diff --git a/arch/parisc/include/asm/ropes.h b/arch/parisc/include/asm/ropes.h index 8e51c775c80a..fd96706c7234 100644 --- a/arch/parisc/include/asm/ropes.h +++ b/arch/parisc/include/asm/ropes.h @@ -252,7 +252,7 @@ static inline int agp_mode_mercury(void __iomem *hpa) { ** fixup_irq is to initialize PCI IRQ line support and ** virtualize pcidev->irq value. To be called by pci_fixup_bus(). */ -extern void *iosapic_register(unsigned long hpa); +extern void *iosapic_register(unsigned long hpa, void __iomem *vaddr); extern int iosapic_fixup_irq(void *obj, struct pci_dev *pcidev); #define LBA_FUNC_ID 0x0000 /* function id */ diff --git a/arch/parisc/include/asm/runway.h b/arch/parisc/include/asm/runway.h index 5cf061376ddb..2837f0223d6d 100644 --- a/arch/parisc/include/asm/runway.h +++ b/arch/parisc/include/asm/runway.h @@ -2,9 +2,6 @@ #ifndef ASM_PARISC_RUNWAY_H #define ASM_PARISC_RUNWAY_H -/* declared in arch/parisc/kernel/setup.c */ -extern struct proc_dir_entry * proc_runway_root; - #define RUNWAY_STATUS 0x10 #define RUNWAY_DEBUG 0x40 diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index edfcb9858bcb..0b326e52255e 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -7,8 +7,6 @@ #include <asm/processor.h> #include <asm/spinlock_types.h> -#define SPINLOCK_BREAK_INSN 0x0000c006 /* break 6,6 */ - static inline void arch_spin_val_check(int lock_val) { if (IS_ENABLED(CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK)) diff --git a/arch/parisc/include/asm/spinlock_types.h b/arch/parisc/include/asm/spinlock_types.h index d65934079ebd..efd06a897c6a 100644 --- a/arch/parisc/include/asm/spinlock_types.h +++ b/arch/parisc/include/asm/spinlock_types.h @@ -4,6 +4,10 @@ #define __ARCH_SPIN_LOCK_UNLOCKED_VAL 0x1a46 +#define SPINLOCK_BREAK_INSN 0x0000c006 /* break 6,6 */ + +#ifndef __ASSEMBLY__ + typedef struct { #ifdef CONFIG_PA20 volatile unsigned int slock; @@ -27,6 +31,8 @@ typedef struct { volatile unsigned int counter; } arch_rwlock_t; +#endif /* __ASSEMBLY__ */ + #define __ARCH_RW_LOCK_UNLOCKED__ 0x01000000 #define __ARCH_RW_LOCK_UNLOCKED { .lock_mutex = __ARCH_SPIN_LOCK_UNLOCKED, \ .counter = __ARCH_RW_LOCK_UNLOCKED__ } diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 2d1478fc4aa5..5ab0467be70a 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -6,7 +6,7 @@ extra-y := vmlinux.lds obj-y := head.o cache.o pacache.o setup.o pdt.o traps.o time.o irq.o \ - pa7300lc.o syscall.o entry.o sys_parisc.o firmware.o \ + syscall.o entry.o sys_parisc.o firmware.o \ ptrace.o hardware.o inventory.o drivers.o alternative.o \ signal.o hpmc.o real2.o parisc_ksyms.o unaligned.o \ process.o processor.o pdc_cons.o pdc_chassis.o unwind.o \ diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 0e5ebfe8d9d2..ae03b8679696 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -25,6 +25,7 @@ #include <asm/traps.h> #include <asm/thread_info.h> #include <asm/alternative.h> +#include <asm/spinlock_types.h> #include <linux/linkage.h> #include <linux/pgtable.h> @@ -406,7 +407,7 @@ LDREG 0(\ptp),\pte bb,<,n \pte,_PAGE_PRESENT_BIT,3f b \fault - stw \spc,0(\tmp) + stw \tmp1,0(\tmp) 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif 2: LDREG 0(\ptp),\pte @@ -415,24 +416,22 @@ .endm /* Release page_table_lock without reloading lock address. - Note that the values in the register spc are limited to - NR_SPACE_IDS (262144). Thus, the stw instruction always - stores a nonzero value even when register spc is 64 bits. We use an ordered store to ensure all prior accesses are performed prior to releasing the lock. */ - .macro ptl_unlock0 spc,tmp + .macro ptl_unlock0 spc,tmp,tmp2 #ifdef CONFIG_TLB_PTLOCK -98: or,COND(=) %r0,\spc,%r0 - stw,ma \spc,0(\tmp) +98: ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmp2 + or,COND(=) %r0,\spc,%r0 + stw,ma \tmp2,0(\tmp) 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif .endm /* Release page_table_lock. */ - .macro ptl_unlock1 spc,tmp + .macro ptl_unlock1 spc,tmp,tmp2 #ifdef CONFIG_TLB_PTLOCK 98: get_ptl \tmp - ptl_unlock0 \spc,\tmp + ptl_unlock0 \spc,\tmp,\tmp2 99: ALTERNATIVE(98b, 99b, ALT_COND_NO_SMP, INSN_NOP) #endif .endm @@ -1125,7 +1124,7 @@ dtlb_miss_20w: idtlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1151,7 +1150,7 @@ nadtlb_miss_20w: idtlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1185,7 +1184,7 @@ dtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1218,7 +1217,7 @@ nadtlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1247,7 +1246,7 @@ dtlb_miss_20: idtlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1275,7 +1274,7 @@ nadtlb_miss_20: idtlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1320,7 +1319,7 @@ itlb_miss_20w: iitlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1344,7 +1343,7 @@ naitlb_miss_20w: iitlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1378,7 +1377,7 @@ itlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1402,7 +1401,7 @@ naitlb_miss_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1432,7 +1431,7 @@ itlb_miss_20: iitlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1452,7 +1451,7 @@ naitlb_miss_20: iitlbt pte,prot - ptl_unlock1 spc,t0 + ptl_unlock1 spc,t0,t1 rfir nop @@ -1482,7 +1481,7 @@ dbit_trap_20w: idtlbt pte,prot - ptl_unlock0 spc,t0 + ptl_unlock0 spc,t0,t1 rfir nop #else @@ -1508,7 +1507,7 @@ dbit_trap_11: mtsp t1, %sr1 /* Restore sr1 */ - ptl_unlock0 spc,t0 + ptl_unlock0 spc,t0,t1 rfir nop @@ -1528,7 +1527,7 @@ dbit_trap_20: idtlbt pte,prot - ptl_unlock0 spc,t0 + ptl_unlock0 spc,t0,t1 rfir nop #endif diff --git a/arch/parisc/kernel/firmware.c b/arch/parisc/kernel/firmware.c index 6d1c781eb1db..8f37e75f2fb9 100644 --- a/arch/parisc/kernel/firmware.c +++ b/arch/parisc/kernel/firmware.c @@ -74,8 +74,8 @@ static DEFINE_SPINLOCK(pdc_lock); #endif -unsigned long pdc_result[NUM_PDC_RESULT] __aligned(8); -unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8); +static unsigned long pdc_result[NUM_PDC_RESULT] __aligned(8); +static unsigned long pdc_result2[NUM_PDC_RESULT] __aligned(8); #ifdef CONFIG_64BIT #define WIDE_FIRMWARE 0x1 @@ -334,7 +334,7 @@ int __pdc_cpu_rendezvous(void) /** * pdc_cpu_rendezvous_lock - Lock PDC while transitioning to rendezvous state */ -void pdc_cpu_rendezvous_lock(void) +void pdc_cpu_rendezvous_lock(void) __acquires(&pdc_lock) { spin_lock(&pdc_lock); } @@ -342,7 +342,7 @@ void pdc_cpu_rendezvous_lock(void) /** * pdc_cpu_rendezvous_unlock - Unlock PDC after reaching rendezvous state */ -void pdc_cpu_rendezvous_unlock(void) +void pdc_cpu_rendezvous_unlock(void) __releases(&pdc_lock) { spin_unlock(&pdc_lock); } diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 4d392e4ed358..d1defb9ede70 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -53,7 +53,7 @@ static void __hot prepare_ftrace_return(unsigned long *parent, static ftrace_func_t ftrace_func; -void notrace __hot ftrace_function_trampoline(unsigned long parent, +asmlinkage void notrace __hot ftrace_function_trampoline(unsigned long parent, unsigned long self_addr, unsigned long org_sp_gr3, struct ftrace_regs *fregs) diff --git a/arch/parisc/kernel/pa7300lc.c b/arch/parisc/kernel/pa7300lc.c deleted file mode 100644 index 0d770ac83f70..000000000000 --- a/arch/parisc/kernel/pa7300lc.c +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/arch/parisc/kernel/pa7300lc.c - * - PA7300LC-specific functions - * - * Copyright (C) 2000 Philipp Rumpf */ - -#include <linux/sched.h> -#include <linux/sched/debug.h> -#include <linux/smp.h> -#include <linux/kernel.h> -#include <asm/io.h> -#include <asm/ptrace.h> -#include <asm/machdep.h> - -/* CPU register indices */ - -#define MIOC_STATUS 0xf040 -#define MIOC_CONTROL 0xf080 -#define MDERRADD 0xf0e0 -#define DMAERR 0xf0e8 -#define DIOERR 0xf0ec -#define HIDMAMEM 0xf0f4 - -/* this returns the HPA of the CPU it was called on */ -static u32 cpu_hpa(void) -{ - return 0xfffb0000; -} - -static void pa7300lc_lpmc(int code, struct pt_regs *regs) -{ - u32 hpa; - printk(KERN_WARNING "LPMC on CPU %d\n", smp_processor_id()); - - show_regs(regs); - - hpa = cpu_hpa(); - printk(KERN_WARNING - "MIOC_CONTROL %08x\n" "MIOC_STATUS %08x\n" - "MDERRADD %08x\n" "DMAERR %08x\n" - "DIOERR %08x\n" "HIDMAMEM %08x\n", - gsc_readl(hpa+MIOC_CONTROL), gsc_readl(hpa+MIOC_STATUS), - gsc_readl(hpa+MDERRADD), gsc_readl(hpa+DMAERR), - gsc_readl(hpa+DIOERR), gsc_readl(hpa+HIDMAMEM)); -} - -void pa7300lc_init(void) -{ - cpu_lpmc = pa7300lc_lpmc; -} diff --git a/arch/parisc/kernel/parisc_ksyms.c b/arch/parisc/kernel/parisc_ksyms.c index 00297e8e1c88..6f0c92e8149d 100644 --- a/arch/parisc/kernel/parisc_ksyms.c +++ b/arch/parisc/kernel/parisc_ksyms.c @@ -14,6 +14,7 @@ #include <linux/module.h> #include <linux/kernel.h> #include <linux/syscalls.h> +#include <linux/libgcc.h> #include <linux/string.h> EXPORT_SYMBOL(memset); @@ -92,12 +93,6 @@ EXPORT_SYMBOL($$divI_12); EXPORT_SYMBOL($$divI_14); EXPORT_SYMBOL($$divI_15); -extern void __ashrdi3(void); -extern void __ashldi3(void); -extern void __lshrdi3(void); -extern void __muldi3(void); -extern void __ucmpdi2(void); - EXPORT_SYMBOL(__ashrdi3); EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__lshrdi3); diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index d818ece23b4a..bf9f192c826e 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -39,7 +39,7 @@ static struct proc_dir_entry * proc_gsc_root __read_mostly = NULL; static unsigned long pcxl_used_bytes __read_mostly; static unsigned long pcxl_used_pages __read_mostly; -extern unsigned long pcxl_dma_start; /* Start of pcxl dma mapping area */ +unsigned long pcxl_dma_start __ro_after_init; /* pcxl dma mapping area start */ static DEFINE_SPINLOCK(pcxl_res_lock); static char *pcxl_res_map; static int pcxl_res_hint; @@ -381,7 +381,7 @@ pcxl_dma_init(void) pcxl_res_map = (char *)__get_free_pages(GFP_KERNEL, get_order(pcxl_res_size)); memset(pcxl_res_map, 0, pcxl_res_size); - proc_gsc_root = proc_mkdir("gsc", NULL); + proc_gsc_root = proc_mkdir("bus/gsc", NULL); if (!proc_gsc_root) printk(KERN_WARNING "pcxl_dma_init: Unable to create gsc /proc dir entry\n"); @@ -417,14 +417,6 @@ void *arch_dma_alloc(struct device *dev, size_t size, map_uncached_pages(vaddr, size, paddr); *dma_handle = (dma_addr_t) paddr; -#if 0 -/* This probably isn't needed to support EISA cards. -** ISA cards will certainly only support 24-bit DMA addressing. -** Not clear if we can, want, or need to support ISA. -*/ - if (!dev || *dev->coherent_dma_mask < 0xffffffff) - gfp |= GFP_DMA; -#endif return (void *)vaddr; } diff --git a/arch/parisc/kernel/pdc_chassis.c b/arch/parisc/kernel/pdc_chassis.c index 0a9d7008ef2a..d477d0177c2f 100644 --- a/arch/parisc/kernel/pdc_chassis.c +++ b/arch/parisc/kernel/pdc_chassis.c @@ -31,6 +31,7 @@ #include <asm/processor.h> #include <asm/pdc.h> #include <asm/pdcpat.h> +#include <asm/led.h> #define PDC_CHASSIS_VER "0.05" @@ -234,6 +235,11 @@ int pdc_chassis_send_status(int message) } else retval = -1; #endif /* CONFIG_64BIT */ } /* if (pdc_chassis_enabled) */ + + /* if system has LCD display, update current string */ + if (retval != -1 && IS_ENABLED(CONFIG_CHASSIS_LCD_LED)) + lcd_print(NULL); + #endif /* CONFIG_PDC_CHASSIS */ return retval; } diff --git a/arch/parisc/kernel/pdt.c b/arch/parisc/kernel/pdt.c index 0d24735bd918..0f9b3b5914cf 100644 --- a/arch/parisc/kernel/pdt.c +++ b/arch/parisc/kernel/pdt.c @@ -354,10 +354,8 @@ static int __init pdt_initcall(void) return -ENODEV; kpdtd_task = kthread_run(pdt_mainloop, NULL, "kpdtd"); - if (IS_ERR(kpdtd_task)) - return PTR_ERR(kpdtd_task); - return 0; + return PTR_ERR_OR_ZERO(kpdtd_task); } late_initcall(pdt_initcall); diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c index 90b04d8af212..b0f0816879df 100644 --- a/arch/parisc/kernel/perf.c +++ b/arch/parisc/kernel/perf.c @@ -57,7 +57,7 @@ struct rdr_tbl_ent { static int perf_processor_interface __read_mostly = UNKNOWN_INTF; static int perf_enabled __read_mostly; static DEFINE_SPINLOCK(perf_lock); -struct parisc_device *cpu_device __read_mostly; +static struct parisc_device *cpu_device __read_mostly; /* RDRs to write for PCX-W */ static const int perf_rdrs_W[] = diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index abdbf038d643..ed93bd8c1545 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -97,18 +97,12 @@ void machine_restart(char *cmd) } -void (*chassis_power_off)(void); - /* * This routine is called from sys_reboot to actually turn off the * machine */ void machine_power_off(void) { - /* If there is a registered power off handler, call it. */ - if (chassis_power_off) - chassis_power_off(); - /* Put the soft power button back under hardware control. * If the user had already pressed the power button, the * following call will immediately power off. */ @@ -284,17 +278,3 @@ __get_wchan(struct task_struct *p) } while (count++ < MAX_UNWIND_ENTRIES); return 0; } - -static inline unsigned long brk_rnd(void) -{ - return (get_random_u32() & BRK_RND_MASK) << PAGE_SHIFT; -} - -unsigned long arch_randomize_brk(struct mm_struct *mm) -{ - unsigned long ret = PAGE_ALIGN(mm->brk + brk_rnd()); - - if (ret < mm->brk) - return mm->brk; - return ret; -} diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 00b0df97afb1..a0e2d37c5b3b 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -26,6 +26,7 @@ #include <asm/processor.h> #include <asm/page.h> #include <asm/pdc.h> +#include <asm/smp.h> #include <asm/pdcpat.h> #include <asm/irq.h> /* for struct irq_region */ #include <asm/parisc-device.h> @@ -377,10 +378,18 @@ int show_cpuinfo (struct seq_file *m, void *v) { unsigned long cpu; + char cpu_name[60], *p; + + /* strip PA path from CPU name to not confuse lscpu */ + strlcpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name)); + p = strrchr(cpu_name, '['); + if (p) + *(--p) = 0; for_each_online_cpu(cpu) { - const struct cpuinfo_parisc *cpuinfo = &per_cpu(cpu_data, cpu); #ifdef CONFIG_SMP + const struct cpuinfo_parisc *cpuinfo = &per_cpu(cpu_data, cpu); + if (0 == cpuinfo->hpa) continue; #endif @@ -425,8 +434,7 @@ show_cpuinfo (struct seq_file *m, void *v) seq_printf(m, "model\t\t: %s - %s\n", boot_cpu_data.pdc.sys_model_name, - cpuinfo->dev ? - cpuinfo->dev->name : "Unknown"); + cpu_name); seq_printf(m, "hversion\t: 0x%08x\n" "sversion\t: 0x%08x\n", diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 573f8303e2b0..2f434f2da185 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -31,7 +31,6 @@ #include <asm/sections.h> #include <asm/pdc.h> #include <asm/led.h> -#include <asm/machdep.h> /* for pa7300lc_init() proto */ #include <asm/pdc_chassis.h> #include <asm/io.h> #include <asm/setup.h> @@ -40,11 +39,6 @@ static char __initdata command_line[COMMAND_LINE_SIZE]; -/* Intended for ccio/sba/cpu statistics under /proc/bus/{runway|gsc} */ -struct proc_dir_entry * proc_runway_root __read_mostly = NULL; -struct proc_dir_entry * proc_gsc_root __read_mostly = NULL; -struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL; - static void __init setup_cmdline(char **cmdline_p) { extern unsigned int boot_args[]; @@ -98,8 +92,6 @@ static void __init dma_ops_init(void) "the PA-RISC 1.1 or 2.0 architecture specification.\n"); case pcxl2: - pa7300lc_init(); - break; default: break; } @@ -151,11 +143,6 @@ void __init setup_arch(char **cmdline_p) parisc_cache_init(); paging_init(); -#ifdef CONFIG_CHASSIS_LCD_LED - /* initialize the LCD/LED after boot_cpu_data is available ! */ - led_init(); /* LCD/LED initialization */ -#endif - #ifdef CONFIG_PA11 dma_ops_init(); #endif @@ -196,48 +183,6 @@ const struct seq_operations cpuinfo_op = { .show = show_cpuinfo }; -static void __init parisc_proc_mkdir(void) -{ - /* - ** Can't call proc_mkdir() until after proc_root_init() has been - ** called by start_kernel(). In other words, this code can't - ** live in arch/.../setup.c because start_parisc() calls - ** start_kernel(). - */ - switch (boot_cpu_data.cpu_type) { - case pcxl: - case pcxl2: - if (NULL == proc_gsc_root) - { - proc_gsc_root = proc_mkdir("bus/gsc", NULL); - } - break; - case pcxt_: - case pcxu: - case pcxu_: - case pcxw: - case pcxw_: - case pcxw2: - if (NULL == proc_runway_root) - { - proc_runway_root = proc_mkdir("bus/runway", NULL); - } - break; - case mako: - case mako2: - if (NULL == proc_mckinley_root) - { - proc_mckinley_root = proc_mkdir("bus/mckinley", NULL); - } - break; - default: - /* FIXME: this was added to prevent the compiler - * complaining about missing pcx, pcxs and pcxt - * I'm assuming they have neither gsc nor runway */ - break; - } -} - static struct resource central_bus = { .name = "Central Bus", .start = F_EXTEND(0xfff80000), @@ -294,7 +239,6 @@ static int __init parisc_init(void) { u32 osid = (OS_ID_LINUX << 16); - parisc_proc_mkdir(); parisc_init_resources(); do_device_inventory(); /* probe for hardware */ @@ -329,47 +273,6 @@ static int __init parisc_init(void) apply_alternatives_all(); parisc_setup_cache_timing(); - - /* These are in a non-obvious order, will fix when we have an iotree */ -#if defined(CONFIG_IOSAPIC) - iosapic_init(); -#endif -#if defined(CONFIG_IOMMU_SBA) - sba_init(); -#endif -#if defined(CONFIG_PCI_LBA) - lba_init(); -#endif - - /* CCIO before any potential subdevices */ -#if defined(CONFIG_IOMMU_CCIO) - ccio_init(); -#endif - - /* - * Need to register Asp & Wax before the EISA adapters for the IRQ - * regions. EISA must come before PCI to be sure it gets IRQ region - * 0. - */ -#if defined(CONFIG_GSC_LASI) || defined(CONFIG_GSC_WAX) - gsc_init(); -#endif -#ifdef CONFIG_EISA - parisc_eisa_init(); -#endif - -#if defined(CONFIG_HPPB) - hppb_init(); -#endif - -#if defined(CONFIG_GSC_DINO) - dino_init(); -#endif - -#ifdef CONFIG_CHASSIS_LCD_LED - register_led_regions(); /* register LED port info in procfs */ -#endif - return 0; } arch_initcall(parisc_init); diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index f886ff0c75df..e8d27def6c52 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -423,7 +423,7 @@ static void check_syscallno_in_delay_branch(struct pt_regs *regs) regs->gr[31] -= 8; /* delayed branching */ /* Get assembler opcode of code in delay branch */ - uaddr = (unsigned int *) ((regs->gr[31] & ~3) + 4); + uaddr = (u32 __user *) ((regs->gr[31] & ~3) + 4); err = get_user(opcode, uaddr); if (err) return; diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index ca2d537e25b1..ab896eff7a1d 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -27,17 +27,12 @@ #include <linux/elf-randomize.h> /* - * Construct an artificial page offset for the mapping based on the virtual + * Construct an artificial page offset for the mapping based on the physical * address of the kernel file mapping variable. - * If filp is zero the calculated pgoff value aliases the memory of the given - * address. This is useful for io_uring where the mapping shall alias a kernel - * address and a userspace adress where both the kernel and the userspace - * access the same memory region. */ -#define GET_FILP_PGOFF(filp, addr) \ - ((filp ? (((unsigned long) filp->f_mapping) >> 8) \ - & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL) \ - + (addr >> PAGE_SHIFT)) +#define GET_FILP_PGOFF(filp) \ + (filp ? (((unsigned long) filp->f_mapping) >> 8) \ + & ((SHM_COLOUR-1) >> PAGE_SHIFT) : 0UL) static unsigned long shared_align_offset(unsigned long filp_pgoff, unsigned long pgoff) @@ -117,7 +112,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, do_color_align = 0; if (filp || (flags & MAP_SHARED)) do_color_align = 1; - filp_pgoff = GET_FILP_PGOFF(filp, addr); + filp_pgoff = GET_FILP_PGOFF(filp); if (flags & MAP_FIXED) { /* Even MAP_FIXED mappings must reside within TASK_SIZE */ @@ -166,7 +161,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp, } info.flags = 0; - info.low_limit = mm->mmap_legacy_base; + info.low_limit = mm->mmap_base; info.high_limit = mmap_upper_limit(NULL); return vm_unmapped_area(&info); } @@ -186,58 +181,6 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp, addr, len, pgoff, flags, DOWN); } -static int mmap_is_legacy(void) -{ - if (current->personality & ADDR_COMPAT_LAYOUT) - return 1; - - /* parisc stack always grows up - so a unlimited stack should - * not be an indicator to use the legacy memory layout. - * if (rlimit(RLIMIT_STACK) == RLIM_INFINITY) - * return 1; - */ - - return sysctl_legacy_va_layout; -} - -static unsigned long mmap_rnd(void) -{ - unsigned long rnd = 0; - - if (current->flags & PF_RANDOMIZE) - rnd = get_random_u32() & MMAP_RND_MASK; - - return rnd << PAGE_SHIFT; -} - -unsigned long arch_mmap_rnd(void) -{ - return (get_random_u32() & MMAP_RND_MASK) << PAGE_SHIFT; -} - -static unsigned long mmap_legacy_base(void) -{ - return TASK_UNMAPPED_BASE + mmap_rnd(); -} - -/* - * This function, called very early during the creation of a new - * process VM image, sets up which VM layout function to use: - */ -void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) -{ - mm->mmap_legacy_base = mmap_legacy_base(); - mm->mmap_base = mmap_upper_limit(rlim_stack); - - if (mmap_is_legacy()) { - mm->mmap_base = mm->mmap_legacy_base; - mm->get_unmapped_area = arch_get_unmapped_area; - } else { - mm->get_unmapped_area = arch_get_unmapped_area_topdown; - } -} - - asmlinkage unsigned long sys_mmap2(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, unsigned long fd, unsigned long pgoff) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 1373e5129868..1f51aa9c8230 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -39,6 +39,7 @@ registers). #include <asm/assembly.h> #include <asm/processor.h> #include <asm/cache.h> +#include <asm/spinlock_types.h> #include <linux/linkage.h> @@ -66,6 +67,16 @@ registers). stw \reg1, 0(%sr2,\reg2) .endm + /* raise exception if spinlock content is not zero or + * __ARCH_SPIN_LOCK_UNLOCKED_VAL */ + .macro spinlock_check spin_val,tmpreg +#ifdef CONFIG_LIGHTWEIGHT_SPINLOCK_CHECK + ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, \tmpreg + andcm,= \spin_val, \tmpreg, %r0 + .word SPINLOCK_BREAK_INSN +#endif + .endm + .text .import syscall_exit,code @@ -508,7 +519,8 @@ lws_start: lws_exit_noerror: lws_pagefault_enable %r1,%r21 - stw,ma %r20, 0(%sr2,%r20) + ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21 + stw,ma %r21, 0(%sr2,%r20) ssm PSW_SM_I, %r0 b lws_exit copy %r0, %r21 @@ -521,7 +533,8 @@ lws_wouldblock: lws_pagefault: lws_pagefault_enable %r1,%r21 - stw,ma %r20, 0(%sr2,%r20) + ldi __ARCH_SPIN_LOCK_UNLOCKED_VAL, %r21 + stw,ma %r21, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 3(%r0),%r28 b lws_exit @@ -619,6 +632,7 @@ lws_compare_and_swap: /* Try to acquire the lock */ LDCW 0(%sr2,%r20), %r28 + spinlock_check %r28, %r21 comclr,<> %r0, %r28, %r0 b,n lws_wouldblock @@ -772,6 +786,7 @@ cas2_lock_start: /* Try to acquire the lock */ LDCW 0(%sr2,%r20), %r28 + spinlock_check %r28, %r21 comclr,<> %r0, %r28, %r0 b,n lws_wouldblock @@ -1001,6 +1016,7 @@ atomic_xchg_start: /* Try to acquire the lock */ LDCW 0(%sr2,%r20), %r28 + spinlock_check %r28, %r21 comclr,<> %r0, %r28, %r0 b,n lws_wouldblock @@ -1199,6 +1215,7 @@ atomic_store_start: /* Try to acquire the lock */ LDCW 0(%sr2,%r20), %r28 + spinlock_check %r28, %r21 comclr,<> %r0, %r28, %r0 b,n lws_wouldblock @@ -1330,7 +1347,7 @@ ENTRY(lws_lock_start) /* lws locks */ .rept 256 /* Keep locks aligned at 16-bytes */ - .word 1 + .word __ARCH_SPIN_LOCK_UNLOCKED_VAL .word 0 .word 0 .word 0 diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index a0a9145b6dd4..e97c175b56f9 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -450,3 +450,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 3b97944c7291..1107ca819ac8 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -335,9 +335,6 @@ static void default_trap(int code, struct pt_regs *regs) show_regs(regs); } -void (*cpu_lpmc) (int code, struct pt_regs *regs) __read_mostly = default_trap; - - static void transfer_pim_to_trap_frame(struct pt_regs *regs) { register int i; @@ -557,7 +554,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) flush_cache_all(); flush_tlb_all(); - cpu_lpmc(5, regs); + default_trap(code, regs); return; case PARISC_ITLB_TRAP: diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 033b9e50b44a..ce25acfe4889 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -11,6 +11,7 @@ #include <linux/signal.h> #include <linux/ratelimit.h> #include <linux/uaccess.h> +#include <linux/sysctl.h> #include <asm/unaligned.h> #include <asm/hardirq.h> #include <asm/traps.h> @@ -337,25 +338,24 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) : "r19", "r20", "r21", "r22", "r1" ); #else { - unsigned long valh=(val>>32),vall=(val&0xffffffffl); __asm__ __volatile__ ( -" mtsp %4, %%sr1\n" -" zdep %2, 29, 2, %%r19\n" -" dep %%r0, 31, 2, %3\n" +" mtsp %3, %%sr1\n" +" zdep %R1, 29, 2, %%r19\n" +" dep %%r0, 31, 2, %2\n" " mtsar %%r19\n" " zvdepi -2, 32, %%r19\n" -"1: ldw 0(%%sr1,%3),%%r20\n" -"2: ldw 8(%%sr1,%3),%%r21\n" -" vshd %1, %2, %%r1\n" +"1: ldw 0(%%sr1,%2),%%r20\n" +"2: ldw 8(%%sr1,%2),%%r21\n" +" vshd %1, %R1, %%r1\n" " vshd %%r0, %1, %1\n" -" vshd %2, %%r0, %2\n" +" vshd %R1, %%r0, %R1\n" " and %%r20, %%r19, %%r20\n" " andcm %%r21, %%r19, %%r21\n" " or %1, %%r20, %1\n" -" or %2, %%r21, %2\n" -"3: stw %1,0(%%sr1,%3)\n" -"4: stw %%r1,4(%%sr1,%3)\n" -"5: stw %2,8(%%sr1,%3)\n" +" or %R1, %%r21, %R1\n" +"3: stw %1,0(%%sr1,%2)\n" +"4: stw %%r1,4(%%sr1,%2)\n" +"5: stw %R1,8(%%sr1,%2)\n" "6: \n" ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b) ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b) @@ -363,7 +363,7 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b) ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b) : "+r" (ret) - : "r" (valh), "r" (vall), "r" (regs->ior), "r" (regs->isr) + : "r" (val), "r" (regs->ior), "r" (regs->isr) : "r19", "r20", "r21", "r1" ); } #endif @@ -473,7 +473,7 @@ void handle_unaligned(struct pt_regs *regs) case OPCODE_LDWA_I: case OPCODE_LDW_S: case OPCODE_LDWA_S: - ret = emulate_ldw(regs, R3(regs->iir),0); + ret = emulate_ldw(regs, R3(regs->iir), 0); break; case OPCODE_STH: @@ -482,7 +482,7 @@ void handle_unaligned(struct pt_regs *regs) case OPCODE_STW: case OPCODE_STWA: - ret = emulate_stw(regs, R2(regs->iir),0); + ret = emulate_stw(regs, R2(regs->iir), 0); break; #ifdef CONFIG_64BIT @@ -490,12 +490,12 @@ void handle_unaligned(struct pt_regs *regs) case OPCODE_LDDA_I: case OPCODE_LDD_S: case OPCODE_LDDA_S: - ret = emulate_ldd(regs, R3(regs->iir),0); + ret = emulate_ldd(regs, R3(regs->iir), 0); break; case OPCODE_STD: case OPCODE_STDA: - ret = emulate_std(regs, R2(regs->iir),0); + ret = emulate_std(regs, R2(regs->iir), 0); break; #endif @@ -503,24 +503,24 @@ void handle_unaligned(struct pt_regs *regs) case OPCODE_FLDWS: case OPCODE_FLDWXR: case OPCODE_FLDWSR: - ret = emulate_ldw(regs,FR3(regs->iir),1); + ret = emulate_ldw(regs, FR3(regs->iir), 1); break; case OPCODE_FLDDX: case OPCODE_FLDDS: - ret = emulate_ldd(regs,R3(regs->iir),1); + ret = emulate_ldd(regs, R3(regs->iir), 1); break; case OPCODE_FSTWX: case OPCODE_FSTWS: case OPCODE_FSTWXR: case OPCODE_FSTWSR: - ret = emulate_stw(regs,FR3(regs->iir),1); + ret = emulate_stw(regs, FR3(regs->iir), 1); break; case OPCODE_FSTDX: case OPCODE_FSTDS: - ret = emulate_std(regs,R3(regs->iir),1); + ret = emulate_std(regs, R3(regs->iir), 1); break; case OPCODE_LDCD_I: diff --git a/arch/parisc/lib/ucmpdi2.c b/arch/parisc/lib/ucmpdi2.c index 8e6014a142ef..9d8b4dbae273 100644 --- a/arch/parisc/lib/ucmpdi2.c +++ b/arch/parisc/lib/ucmpdi2.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/module.h> +#include <linux/libgcc.h> union ull_union { unsigned long long ull; @@ -9,7 +10,7 @@ union ull_union { } ui; }; -int __ucmpdi2(unsigned long long a, unsigned long long b) +word_type __ucmpdi2(unsigned long long a, unsigned long long b) { union ull_union au = {.ull = a}; union ull_union bu = {.ull = b}; diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index a4c7c7630f48..2fe5b44986e0 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -192,31 +192,31 @@ int fixup_exception(struct pt_regs *regs) * For implementation see handle_interruption() in traps.c */ static const char * const trap_description[] = { - [1] "High-priority machine check (HPMC)", - [2] "Power failure interrupt", - [3] "Recovery counter trap", - [5] "Low-priority machine check", - [6] "Instruction TLB miss fault", - [7] "Instruction access rights / protection trap", - [8] "Illegal instruction trap", - [9] "Break instruction trap", - [10] "Privileged operation trap", - [11] "Privileged register trap", - [12] "Overflow trap", - [13] "Conditional trap", - [14] "FP Assist Exception trap", - [15] "Data TLB miss fault", - [16] "Non-access ITLB miss fault", - [17] "Non-access DTLB miss fault", - [18] "Data memory protection/unaligned access trap", - [19] "Data memory break trap", - [20] "TLB dirty bit trap", - [21] "Page reference trap", - [22] "Assist emulation trap", - [25] "Taken branch trap", - [26] "Data memory access rights trap", - [27] "Data memory protection ID trap", - [28] "Unaligned data reference trap", + [1] = "High-priority machine check (HPMC)", + [2] = "Power failure interrupt", + [3] = "Recovery counter trap", + [5] = "Low-priority machine check", + [6] = "Instruction TLB miss fault", + [7] = "Instruction access rights / protection trap", + [8] = "Illegal instruction trap", + [9] = "Break instruction trap", + [10] = "Privileged operation trap", + [11] = "Privileged register trap", + [12] = "Overflow trap", + [13] = "Conditional trap", + [14] = "FP Assist Exception trap", + [15] = "Data TLB miss fault", + [16] = "Non-access ITLB miss fault", + [17] = "Non-access DTLB miss fault", + [18] = "Data memory protection/unaligned access trap", + [19] = "Data memory break trap", + [20] = "TLB dirty bit trap", + [21] = "Page reference trap", + [22] = "Assist emulation trap", + [25] = "Taken branch trap", + [26] = "Data memory access rights trap", + [27] = "Data memory protection ID trap", + [28] = "Unaligned data reference trap", }; const char *trap_name(unsigned long code) diff --git a/arch/parisc/mm/fixmap.c b/arch/parisc/mm/fixmap.c index cc15d737fda6..ae3493dae9dc 100644 --- a/arch/parisc/mm/fixmap.c +++ b/arch/parisc/mm/fixmap.c @@ -19,9 +19,6 @@ void notrace set_fixmap(enum fixed_addresses idx, phys_addr_t phys) pmd_t *pmd = pmd_offset(pud, vaddr); pte_t *pte; - if (pmd_none(*pmd)) - pte = pte_alloc_kernel(pmd, vaddr); - pte = pte_offset_kernel(pmd, vaddr); set_pte_at(&init_mm, vaddr, pte, __mk_pte(phys, PAGE_KERNEL_RWX)); flush_tlb_kernel_range(vaddr, vaddr + PAGE_SIZE); diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index 406c52fe23d5..a088c243edea 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -523,10 +523,6 @@ void mark_rodata_ro(void) void *parisc_vmalloc_start __ro_after_init; EXPORT_SYMBOL(parisc_vmalloc_start); -#ifdef CONFIG_PA11 -unsigned long pcxl_dma_start __ro_after_init; -#endif - void __init mem_init(void) { /* Do sanity checks on IPC (compat) structures */ @@ -669,6 +665,39 @@ static void __init gateway_init(void) PAGE_SIZE, PAGE_GATEWAY, 1); } +static void __init fixmap_init(void) +{ + unsigned long addr = FIXMAP_START; + unsigned long end = FIXMAP_START + FIXMAP_SIZE; + pgd_t *pgd = pgd_offset_k(addr); + p4d_t *p4d = p4d_offset(pgd, addr); + pud_t *pud = pud_offset(p4d, addr); + pmd_t *pmd; + + BUILD_BUG_ON(FIXMAP_SIZE > PMD_SIZE); + +#if CONFIG_PGTABLE_LEVELS == 3 + if (pud_none(*pud)) { + pmd = memblock_alloc(PAGE_SIZE << PMD_TABLE_ORDER, + PAGE_SIZE << PMD_TABLE_ORDER); + if (!pmd) + panic("fixmap: pmd allocation failed.\n"); + pud_populate(NULL, pud, pmd); + } +#endif + + pmd = pmd_offset(pud, addr); + do { + pte_t *pte = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + if (!pte) + panic("fixmap: pte allocation failed.\n"); + + pmd_populate_kernel(&init_mm, pmd, pte); + + addr += PAGE_SIZE; + } while (addr < end); +} + static void __init parisc_bootmem_free(void) { unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0, }; @@ -683,6 +712,7 @@ void __init paging_init(void) setup_bootmem(); pagetable_init(); gateway_init(); + fixmap_init(); flush_cache_all_local(); /* start with known state */ flush_tlb_all_local(NULL); diff --git a/arch/parisc/net/Makefile b/arch/parisc/net/Makefile new file mode 100644 index 000000000000..22b12024d4c3 --- /dev/null +++ b/arch/parisc/net/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only + +obj-$(CONFIG_BPF_JIT) += bpf_jit_core.o + +ifeq ($(CONFIG_64BIT),y) + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o +else + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o +endif diff --git a/arch/parisc/net/bpf_jit.h b/arch/parisc/net/bpf_jit.h new file mode 100644 index 000000000000..8b8896959f04 --- /dev/null +++ b/arch/parisc/net/bpf_jit.h @@ -0,0 +1,479 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common functionality for PARISC32 and PARISC64 BPF JIT compilers + * + * Copyright (c) 2023 Helge Deller <deller@gmx.de> + * + */ + +#ifndef _BPF_JIT_H +#define _BPF_JIT_H + +#include <linux/bpf.h> +#include <linux/filter.h> +#include <asm/cacheflush.h> + +#define HPPA_JIT_DEBUG 0 +#define HPPA_JIT_REBOOT 0 +#define HPPA_JIT_DUMP 0 + +#define OPTIMIZE_HPPA 1 /* enable some asm optimizations */ +// echo 1 > /proc/sys/net/core/bpf_jit_enable + +#define HPPA_R(nr) nr /* use HPPA register #nr */ + +enum { + HPPA_REG_ZERO = 0, /* The constant value 0 */ + HPPA_REG_R1 = 1, /* used for addil */ + HPPA_REG_RP = 2, /* Return address */ + + HPPA_REG_ARG7 = 19, /* ARG4-7 used in 64-bit ABI */ + HPPA_REG_ARG6 = 20, + HPPA_REG_ARG5 = 21, + HPPA_REG_ARG4 = 22, + + HPPA_REG_ARG3 = 23, /* ARG0-3 in 32- and 64-bit ABI */ + HPPA_REG_ARG2 = 24, + HPPA_REG_ARG1 = 25, + HPPA_REG_ARG0 = 26, + HPPA_REG_GP = 27, /* Global pointer */ + HPPA_REG_RET0 = 28, /* Return value, HI in 32-bit */ + HPPA_REG_RET1 = 29, /* Return value, LOW in 32-bit */ + HPPA_REG_SP = 30, /* Stack pointer */ + HPPA_REG_R31 = 31, + +#ifdef CONFIG_64BIT + HPPA_REG_TCC = 3, + HPPA_REG_TCC_SAVED = 4, + HPPA_REG_TCC_IN_INIT = HPPA_REG_R31, +#else + HPPA_REG_TCC = 18, + HPPA_REG_TCC_SAVED = 17, + HPPA_REG_TCC_IN_INIT = HPPA_REG_R31, +#endif + + HPPA_REG_T0 = HPPA_REG_R1, /* Temporaries */ + HPPA_REG_T1 = HPPA_REG_R31, + HPPA_REG_T2 = HPPA_REG_ARG4, +#ifndef CONFIG_64BIT + HPPA_REG_T3 = HPPA_REG_ARG5, /* not used in 64-bit */ + HPPA_REG_T4 = HPPA_REG_ARG6, + HPPA_REG_T5 = HPPA_REG_ARG7, +#endif +}; + +struct hppa_jit_context { + struct bpf_prog *prog; + u32 *insns; /* HPPA insns */ + int ninsns; + int reg_seen_collect; + int reg_seen; + int body_len; + int epilogue_offset; + int prologue_len; + int *offset; /* BPF to HPPA */ +}; + +#define REG_SET_SEEN(ctx, nr) { if (ctx->reg_seen_collect) ctx->reg_seen |= BIT(nr); } +#define REG_SET_SEEN_ALL(ctx) { if (ctx->reg_seen_collect) ctx->reg_seen = -1; } +#define REG_FORCE_SEEN(ctx, nr) { ctx->reg_seen |= BIT(nr); } +#define REG_WAS_SEEN(ctx, nr) (ctx->reg_seen & BIT(nr)) +#define REG_ALL_SEEN(ctx) (ctx->reg_seen == -1) + +#define HPPA_INSN_SIZE 4 /* bytes per HPPA asm instruction */ +#define REG_SIZE REG_SZ /* bytes per native "long" word */ + +/* subtract hppa displacement on branches which is .+8 */ +#define HPPA_BRANCH_DISPLACEMENT 2 /* instructions */ + +/* asm statement indicator to execute delay slot */ +#define EXEC_NEXT_INSTR 0 +#define NOP_NEXT_INSTR 1 + +#define im11(val) (((u32)(val)) & 0x07ff) + +#define hppa_ldil(addr, reg) \ + hppa_t5_insn(0x08, reg, ((u32)(addr)) >> 11) /* ldil im21,reg */ +#define hppa_addil(addr, reg) \ + hppa_t5_insn(0x0a, reg, ((u32)(addr)) >> 11) /* addil im21,reg -> result in gr1 */ +#define hppa_ldo(im14, reg, target) \ + hppa_t1_insn(0x0d, reg, target, im14) /* ldo val14(reg),target */ +#define hppa_ldi(im14, reg) \ + hppa_ldo(im14, HPPA_REG_ZERO, reg) /* ldi val14,reg */ +#define hppa_or(reg1, reg2, target) \ + hppa_t6_insn(0x02, reg2, reg1, 0, 0, 0x09, target) /* or reg1,reg2,target */ +#define hppa_or_cond(reg1, reg2, cond, f, target) \ + hppa_t6_insn(0x02, reg2, reg1, cond, f, 0x09, target) +#define hppa_and(reg1, reg2, target) \ + hppa_t6_insn(0x02, reg2, reg1, 0, 0, 0x08, target) /* and reg1,reg2,target */ +#define hppa_and_cond(reg1, reg2, cond, f, target) \ + hppa_t6_insn(0x02, reg2, reg1, cond, f, 0x08, target) +#define hppa_xor(reg1, reg2, target) \ + hppa_t6_insn(0x02, reg2, reg1, 0, 0, 0x0a, target) /* xor reg1,reg2,target */ +#define hppa_add(reg1, reg2, target) \ + hppa_t6_insn(0x02, reg2, reg1, 0, 0, 0x18, target) /* add reg1,reg2,target */ +#define hppa_addc(reg1, reg2, target) \ + hppa_t6_insn(0x02, reg2, reg1, 0, 0, 0x1c, target) /* add,c reg1,reg2,target */ +#define hppa_sub(reg1, reg2, target) \ + hppa_t6_insn(0x02, reg2, reg1, 0, 0, 0x10, target) /* sub reg1,reg2,target */ +#define hppa_subb(reg1, reg2, target) \ + hppa_t6_insn(0x02, reg2, reg1, 0, 0, 0x14, target) /* sub,b reg1,reg2,target */ +#define hppa_nop() \ + hppa_or(0,0,0) /* nop: or 0,0,0 */ +#define hppa_addi(val11, reg, target) \ + hppa_t7_insn(0x2d, reg, target, val11) /* addi im11,reg,target */ +#define hppa_subi(val11, reg, target) \ + hppa_t7_insn(0x25, reg, target, val11) /* subi im11,reg,target */ +#define hppa_copy(reg, target) \ + hppa_or(reg, HPPA_REG_ZERO, target) /* copy reg,target */ +#define hppa_ldw(val14, reg, target) \ + hppa_t1_insn(0x12, reg, target, val14) /* ldw im14(reg),target */ +#define hppa_ldb(val14, reg, target) \ + hppa_t1_insn(0x10, reg, target, val14) /* ldb im14(reg),target */ +#define hppa_ldh(val14, reg, target) \ + hppa_t1_insn(0x11, reg, target, val14) /* ldh im14(reg),target */ +#define hppa_stw(reg, val14, base) \ + hppa_t1_insn(0x1a, base, reg, val14) /* stw reg,im14(base) */ +#define hppa_stb(reg, val14, base) \ + hppa_t1_insn(0x18, base, reg, val14) /* stb reg,im14(base) */ +#define hppa_sth(reg, val14, base) \ + hppa_t1_insn(0x19, base, reg, val14) /* sth reg,im14(base) */ +#define hppa_stwma(reg, val14, base) \ + hppa_t1_insn(0x1b, base, reg, val14) /* stw,ma reg,im14(base) */ +#define hppa_bv(reg, base, nop) \ + hppa_t11_insn(0x3a, base, reg, 0x06, 0, nop) /* bv(,n) reg(base) */ +#define hppa_be(offset, base) \ + hppa_t12_insn(0x38, base, offset, 0x00, 1) /* be,n offset(0,base) */ +#define hppa_be_l(offset, base, nop) \ + hppa_t12_insn(0x39, base, offset, 0x00, nop) /* ble(,nop) offset(0,base) */ +#define hppa_mtctl(reg, cr) \ + hppa_t21_insn(0x00, cr, reg, 0xc2, 0) /* mtctl reg,cr */ +#define hppa_mtsar(reg) \ + hppa_mtctl(reg, 11) /* mtsar reg */ +#define hppa_zdep(r, p, len, target) \ + hppa_t10_insn(0x35, target, r, 0, 2, p, len) /* zdep r,a,b,t */ +#define hppa_shl(r, len, target) \ + hppa_zdep(r, len, len, lo(rd)) +#define hppa_depwz(r, p, len, target) \ + hppa_t10_insn(0x35, target, r, 0, 3, 31-(p), 32-(len)) /* depw,z r,p,len,ret1 */ +#define hppa_depwz_sar(reg, target) \ + hppa_t1_insn(0x35, target, reg, 0) /* depw,z reg,sar,32,target */ +#define hppa_shrpw_sar(reg, target) \ + hppa_t10_insn(0x34, reg, 0, 0, 0, 0, target) /* shrpw r0,reg,sar,target */ +#define hppa_shrpw(r1, r2, p, target) \ + hppa_t10_insn(0x34, r2, r1, 0, 2, 31-(p), target) /* shrpw r1,r2,p,target */ +#define hppa_shd(r1, r2, p, target) \ + hppa_t10_insn(0x34, r2, r1, 0, 2, 31-(p), target) /* shrpw r1,r2,p,tarfer */ +#define hppa_extrws_sar(reg, target) \ + hppa_t10_insn(0x34, reg, target, 0, 5, 0, 0) /* extrw,s reg,sar,32,ret0 */ +#define hppa_extrws(reg, p, len, target) \ + hppa_t10_insn(0x34, reg, target, 0, 7, p, len) /* extrw,s reg,p,len,target */ +#define hppa_extru(r, p, len, target) \ + hppa_t10_insn(0x34, r, target, 0, 6, p, 32-(len)) +#define hppa_shr(r, len, target) \ + hppa_extru(r, 31-(len), 32-(len), target) +#define hppa_bl(imm17, rp) \ + hppa_t12_insn(0x3a, rp, imm17, 0x00, 1) /* bl,n target_addr,rp */ +#define hppa_sh2add(r1, r2, target) \ + hppa_t6_insn(0x02, r2, r1, 0, 0, 0x1a, target) /* sh2add r1,r2,target */ + +#define hppa_combt(r1, r2, target_addr, condition, nop) \ + hppa_t11_insn(IS_ENABLED(CONFIG_64BIT) ? 0x27 : 0x20, \ + r2, r1, condition, target_addr, nop) /* combt,cond,n r1,r2,addr */ +#define hppa_beq(r1, r2, target_addr) \ + hppa_combt(r1, r2, target_addr, 1, NOP_NEXT_INSTR) +#define hppa_blt(r1, r2, target_addr) \ + hppa_combt(r1, r2, target_addr, 2, NOP_NEXT_INSTR) +#define hppa_ble(r1, r2, target_addr) \ + hppa_combt(r1, r2, target_addr, 3, NOP_NEXT_INSTR) +#define hppa_bltu(r1, r2, target_addr) \ + hppa_combt(r1, r2, target_addr, 4, NOP_NEXT_INSTR) +#define hppa_bleu(r1, r2, target_addr) \ + hppa_combt(r1, r2, target_addr, 5, NOP_NEXT_INSTR) + +#define hppa_combf(r1, r2, target_addr, condition, nop) \ + hppa_t11_insn(IS_ENABLED(CONFIG_64BIT) ? 0x2f : 0x22, \ + r2, r1, condition, target_addr, nop) /* combf,cond,n r1,r2,addr */ +#define hppa_bne(r1, r2, target_addr) \ + hppa_combf(r1, r2, target_addr, 1, NOP_NEXT_INSTR) +#define hppa_bge(r1, r2, target_addr) \ + hppa_combf(r1, r2, target_addr, 2, NOP_NEXT_INSTR) +#define hppa_bgt(r1, r2, target_addr) \ + hppa_combf(r1, r2, target_addr, 3, NOP_NEXT_INSTR) +#define hppa_bgeu(r1, r2, target_addr) \ + hppa_combf(r1, r2, target_addr, 4, NOP_NEXT_INSTR) +#define hppa_bgtu(r1, r2, target_addr) \ + hppa_combf(r1, r2, target_addr, 5, NOP_NEXT_INSTR) + +/* 64-bit instructions */ +#ifdef CONFIG_64BIT +#define hppa64_ldd_reg(reg, b, target) \ + hppa_t10_insn(0x03, b, reg, 0, 0, 3<<1, target) +#define hppa64_ldd_im5(im5, b, target) \ + hppa_t10_insn(0x03, b, low_sign_unext(im5,5), 0, 1<<2, 3<<1, target) +#define hppa64_ldd_im16(im16, b, target) \ + hppa_t10_insn(0x14, b, target, 0, 0, 0, 0) | re_assemble_16(im16) +#define hppa64_std_im5(src, im5, b) \ + hppa_t10_insn(0x03, b, src, 0, 1<<2, 0xB<<1, low_sign_unext(im5,5)) +#define hppa64_std_im16(src, im16, b) \ + hppa_t10_insn(0x1c, b, src, 0, 0, 0, 0) | re_assemble_16(im16) +#define hppa64_bl_long(offs22) \ + hppa_t12_L_insn(0x3a, offs22, 1) +#define hppa64_mtsarcm(reg) \ + hppa_t21_insn(0x00, 11, reg, 0xc6, 0) +#define hppa64_shrpd_sar(reg, target) \ + hppa_t10_insn(0x34, reg, 0, 0, 0, 1<<4, target) +#define hppa64_shladd(r1, sa, r2, target) \ + hppa_t6_insn(0x02, r2, r1, 0, 0, 1<<4|1<<3|sa, target) +#define hppa64_depdz_sar(reg, target) \ + hppa_t21_insn(0x35, target, reg, 3<<3, 0) +#define hppa_extrd_sar(reg, target, se) \ + hppa_t10_insn(0x34, reg, target, 0, 0, 0, 0) | 2<<11 | (se&1)<<10 | 1<<9 | 1<<8 +#define hppa64_bve_l_rp(base) \ + (0x3a << 26) | (base << 21) | 0xf000 +#define hppa64_permh_3210(r, target) \ + (0x3e << 26) | (r << 21) | (r << 16) | (target) | 0x00006900 +#define hppa64_hshl(r, sa, target) \ + (0x3e << 26) | (0 << 21) | (r << 16) | (sa << 6) | (target) | 0x00008800 +#define hppa64_hshr_u(r, sa, target) \ + (0x3e << 26) | (r << 21) | (0 << 16) | (sa << 6) | (target) | 0x0000c800 +#endif + +struct hppa_jit_data { + struct bpf_binary_header *header; + u8 *image; + struct hppa_jit_context ctx; +}; + +static inline void bpf_fill_ill_insns(void *area, unsigned int size) +{ + memset(area, 0, size); +} + +static inline void bpf_flush_icache(void *start, void *end) +{ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +/* Emit a 4-byte HPPA instruction. */ +static inline void emit(const u32 insn, struct hppa_jit_context *ctx) +{ + if (ctx->insns) { + ctx->insns[ctx->ninsns] = insn; + } + + ctx->ninsns++; +} + +static inline int epilogue_offset(struct hppa_jit_context *ctx) +{ + int to = ctx->epilogue_offset, from = ctx->ninsns; + + return (to - from); +} + +/* Return -1 or inverted cond. */ +static inline int invert_bpf_cond(u8 cond) +{ + switch (cond) { + case BPF_JEQ: + return BPF_JNE; + case BPF_JGT: + return BPF_JLE; + case BPF_JLT: + return BPF_JGE; + case BPF_JGE: + return BPF_JLT; + case BPF_JLE: + return BPF_JGT; + case BPF_JNE: + return BPF_JEQ; + case BPF_JSGT: + return BPF_JSLE; + case BPF_JSLT: + return BPF_JSGE; + case BPF_JSGE: + return BPF_JSLT; + case BPF_JSLE: + return BPF_JSGT; + } + return -1; +} + + +static inline signed long hppa_offset(int insn, int off, struct hppa_jit_context *ctx) +{ + signed long from, to; + + off++; /* BPF branch is from PC+1 */ + from = (insn > 0) ? ctx->offset[insn - 1] : 0; + to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; + return (to - from); +} + +/* does the signed value fits into a given number of bits ? */ +static inline int check_bits_int(signed long val, int bits) +{ + return ((val >= 0) && ((val >> bits) == 0)) || + ((val < 0) && (((~((u32)val)) >> (bits-1)) == 0)); +} + +/* can the signed value be used in relative code ? */ +static inline int relative_bits_ok(signed long val, int bits) +{ + return ((val >= 0) && (val < (1UL << (bits-1)))) || /* XXX */ + ((val < 0) && (((~((unsigned long)val)) >> (bits-1)) == 0) + && (val & (1UL << (bits-1)))); +} + +/* can the signed value be used in relative branches ? */ +static inline int relative_branch_ok(signed long val, int bits) +{ + return ((val >= 0) && (val < (1UL << (bits-2)))) || /* XXX */ + ((val < 0) && (((~((unsigned long)val)) < (1UL << (bits-2)))) + && (val & (1UL << (bits-1)))); +} + + +#define is_5b_int(val) check_bits_int(val, 5) + +static inline unsigned sign_unext(unsigned x, unsigned len) +{ + unsigned len_ones; + + len_ones = (1 << len) - 1; + return x & len_ones; +} + +static inline unsigned low_sign_unext(unsigned x, unsigned len) +{ + unsigned temp; + unsigned sign; + + sign = (x >> (len-1)) & 1; + temp = sign_unext (x, len-1); + return (temp << 1) | sign; +} + +static inline unsigned re_assemble_12(unsigned as12) +{ + return (( (as12 & 0x800) >> 11) + | ((as12 & 0x400) >> (10 - 2)) + | ((as12 & 0x3ff) << (1 + 2))); +} + +static inline unsigned re_assemble_14(unsigned as14) +{ + return (( (as14 & 0x1fff) << 1) + | ((as14 & 0x2000) >> 13)); +} + +#ifdef CONFIG_64BIT +static inline unsigned re_assemble_16(unsigned as16) +{ + unsigned s, t; + + /* Unusual 16-bit encoding, for wide mode only. */ + t = (as16 << 1) & 0xffff; + s = (as16 & 0x8000); + return (t ^ s ^ (s >> 1)) | (s >> 15); +} +#endif + +static inline unsigned re_assemble_17(unsigned as17) +{ + return (( (as17 & 0x10000) >> 16) + | ((as17 & 0x0f800) << (16 - 11)) + | ((as17 & 0x00400) >> (10 - 2)) + | ((as17 & 0x003ff) << (1 + 2))); +} + +static inline unsigned re_assemble_21(unsigned as21) +{ + return (( (as21 & 0x100000) >> 20) + | ((as21 & 0x0ffe00) >> 8) + | ((as21 & 0x000180) << 7) + | ((as21 & 0x00007c) << 14) + | ((as21 & 0x000003) << 12)); +} + +static inline unsigned re_assemble_22(unsigned as22) +{ + return (( (as22 & 0x200000) >> 21) + | ((as22 & 0x1f0000) << (21 - 16)) + | ((as22 & 0x00f800) << (16 - 11)) + | ((as22 & 0x000400) >> (10 - 2)) + | ((as22 & 0x0003ff) << (1 + 2))); +} + +/* Various HPPA instruction formats. */ +/* see https://parisc.wiki.kernel.org/images-parisc/6/68/Pa11_acd.pdf, appendix C */ + +static inline u32 hppa_t1_insn(u8 opcode, u8 b, u8 r, s16 im14) +{ + return ((opcode << 26) | (b << 21) | (r << 16) | re_assemble_14(im14)); +} + +static inline u32 hppa_t5_insn(u8 opcode, u8 tr, u32 val21) +{ + return ((opcode << 26) | (tr << 21) | re_assemble_21(val21)); +} + +static inline u32 hppa_t6_insn(u8 opcode, u8 r2, u8 r1, u8 c, u8 f, u8 ext6, u16 t) +{ + return ((opcode << 26) | (r2 << 21) | (r1 << 16) | (c << 13) | (f << 12) | + (ext6 << 6) | t); +} + +/* 7. Arithmetic immediate */ +static inline u32 hppa_t7_insn(u8 opcode, u8 r, u8 t, u32 im11) +{ + return ((opcode << 26) | (r << 21) | (t << 16) | low_sign_unext(im11, 11)); +} + +/* 10. Shift instructions */ +static inline u32 hppa_t10_insn(u8 opcode, u8 r2, u8 r1, u8 c, u8 ext3, u8 cp, u8 t) +{ + return ((opcode << 26) | (r2 << 21) | (r1 << 16) | (c << 13) | + (ext3 << 10) | (cp << 5) | t); +} + +/* 11. Conditional branch instructions */ +static inline u32 hppa_t11_insn(u8 opcode, u8 r2, u8 r1, u8 c, u32 w, u8 nop) +{ + u32 ra = re_assemble_12(w); + // ra = low_sign_unext(w,11) | (w & (1<<10) + return ((opcode << 26) | (r2 << 21) | (r1 << 16) | (c << 13) | (nop << 1) | ra); +} + +/* 12. Branch instructions */ +static inline u32 hppa_t12_insn(u8 opcode, u8 rp, u32 w, u8 ext3, u8 nop) +{ + return ((opcode << 26) | (rp << 21) | (ext3 << 13) | (nop << 1) | re_assemble_17(w)); +} + +static inline u32 hppa_t12_L_insn(u8 opcode, u32 w, u8 nop) +{ + return ((opcode << 26) | (0x05 << 13) | (nop << 1) | re_assemble_22(w)); +} + +/* 21. Move to control register */ +static inline u32 hppa_t21_insn(u8 opcode, u8 r2, u8 r1, u8 ext8, u8 t) +{ + return ((opcode << 26) | (r2 << 21) | (r1 << 16) | (ext8 << 5) | t); +} + +/* Helper functions called by jit code on HPPA32 and HPPA64. */ + +u64 hppa_div64(u64 div, u64 divisor); +u64 hppa_div64_rem(u64 div, u64 divisor); + +/* Helper functions that emit HPPA instructions when possible. */ + +void bpf_jit_build_prologue(struct hppa_jit_context *ctx); +void bpf_jit_build_epilogue(struct hppa_jit_context *ctx); + +int bpf_jit_emit_insn(const struct bpf_insn *insn, struct hppa_jit_context *ctx, + bool extra_pass); + +#endif /* _BPF_JIT_H */ diff --git a/arch/parisc/net/bpf_jit_comp32.c b/arch/parisc/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..5ff0cf925fe9 --- /dev/null +++ b/arch/parisc/net/bpf_jit_comp32.c @@ -0,0 +1,1615 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * BPF JIT compiler for PA-RISC (32-bit) + * + * Copyright (c) 2023 Helge Deller <deller@gmx.de> + * + * The code is based on the BPF JIT compiler for RV64 by Björn Töpel and + * the BPF JIT compiler for 32-bit ARM by Shubham Bansal and Mircea Gherzan. + */ + +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/libgcc.h> +#include "bpf_jit.h" + +/* + * Stack layout during BPF program execution (note: stack grows up): + * + * high + * HPPA32 sp => +----------+ <= HPPA32 fp + * | saved sp | + * | saved rp | + * | ... | HPPA32 callee-saved registers + * | curr args| + * | local var| + * +----------+ <= (sp - 4 * NR_SAVED_REGISTERS) + * | lo(R9) | + * | hi(R9) | + * | lo(FP) | JIT scratch space for BPF registers + * | hi(FP) | + * | ... | + * +----------+ <= (sp - 4 * NR_SAVED_REGISTERS + * | | - 4 * BPF_JIT_SCRATCH_REGS) + * | | + * | ... | BPF program stack + * | | + * | ... | Function call stack + * | | + * +----------+ + * low + */ + +enum { + /* Stack layout - these are offsets from top of JIT scratch space. */ + BPF_R8_HI, + BPF_R8_LO, + BPF_R9_HI, + BPF_R9_LO, + BPF_FP_HI, + BPF_FP_LO, + BPF_AX_HI, + BPF_AX_LO, + BPF_R0_TEMP_HI, + BPF_R0_TEMP_LO, + BPF_JIT_SCRATCH_REGS, +}; + +/* Number of callee-saved registers stored to stack: rp, r3-r18. */ +#define NR_SAVED_REGISTERS (18 - 3 + 1 + 8) + +/* Offset from fp for BPF registers stored on stack. */ +#define STACK_OFFSET(k) (- (NR_SAVED_REGISTERS + k + 1)) +#define STACK_ALIGN FRAME_SIZE + +#define EXIT_PTR_LOAD(reg) hppa_ldw(-0x08, HPPA_REG_SP, reg) +#define EXIT_PTR_STORE(reg) hppa_stw(reg, -0x08, HPPA_REG_SP) +#define EXIT_PTR_JUMP(reg, nop) hppa_bv(HPPA_REG_ZERO, reg, nop) + +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) +#define TMP_REG_R0 (MAX_BPF_JIT_REG + 2) + +static const s8 regmap[][2] = { + /* Return value from in-kernel function, and exit value from eBPF. */ + [BPF_REG_0] = {HPPA_REG_RET0, HPPA_REG_RET1}, /* HI/LOW */ + + /* Arguments from eBPF program to in-kernel function. */ + [BPF_REG_1] = {HPPA_R(3), HPPA_R(4)}, + [BPF_REG_2] = {HPPA_R(5), HPPA_R(6)}, + [BPF_REG_3] = {HPPA_R(7), HPPA_R(8)}, + [BPF_REG_4] = {HPPA_R(9), HPPA_R(10)}, + [BPF_REG_5] = {HPPA_R(11), HPPA_R(12)}, + + [BPF_REG_6] = {HPPA_R(13), HPPA_R(14)}, + [BPF_REG_7] = {HPPA_R(15), HPPA_R(16)}, + /* + * Callee-saved registers that in-kernel function will preserve. + * Stored on the stack. + */ + [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)}, + [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, + + /* Read-only frame pointer to access BPF stack. Not needed. */ + [BPF_REG_FP] = {STACK_OFFSET(BPF_FP_HI), STACK_OFFSET(BPF_FP_LO)}, + + /* Temporary register for blinding constants. Stored on the stack. */ + [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)}, + /* + * Temporary registers used by the JIT to operate on registers stored + * on the stack. Save t0 and t1 to be used as temporaries in generated + * code. + */ + [TMP_REG_1] = {HPPA_REG_T3, HPPA_REG_T2}, + [TMP_REG_2] = {HPPA_REG_T5, HPPA_REG_T4}, + + /* temporary space for BPF_R0 during libgcc and millicode calls */ + [TMP_REG_R0] = {STACK_OFFSET(BPF_R0_TEMP_HI), STACK_OFFSET(BPF_R0_TEMP_LO)}, +}; + +static s8 hi(const s8 *r) +{ + return r[0]; +} + +static s8 lo(const s8 *r) +{ + return r[1]; +} + +static void emit_hppa_copy(const s8 rs, const s8 rd, struct hppa_jit_context *ctx) +{ + REG_SET_SEEN(ctx, rd); + if (OPTIMIZE_HPPA && (rs == rd)) + return; + REG_SET_SEEN(ctx, rs); + emit(hppa_copy(rs, rd), ctx); +} + +static void emit_hppa_xor(const s8 r1, const s8 r2, const s8 r3, struct hppa_jit_context *ctx) +{ + REG_SET_SEEN(ctx, r1); + REG_SET_SEEN(ctx, r2); + REG_SET_SEEN(ctx, r3); + if (OPTIMIZE_HPPA && (r1 == r2)) { + emit(hppa_copy(HPPA_REG_ZERO, r3), ctx); + } else { + emit(hppa_xor(r1, r2, r3), ctx); + } +} + +static void emit_imm(const s8 rd, s32 imm, struct hppa_jit_context *ctx) +{ + u32 lower = im11(imm); + + REG_SET_SEEN(ctx, rd); + if (OPTIMIZE_HPPA && relative_bits_ok(imm, 14)) { + emit(hppa_ldi(imm, rd), ctx); + return; + } + emit(hppa_ldil(imm, rd), ctx); + if (OPTIMIZE_HPPA && (lower == 0)) + return; + emit(hppa_ldo(lower, rd, rd), ctx); +} + +static void emit_imm32(const s8 *rd, s32 imm, struct hppa_jit_context *ctx) +{ + /* Emit immediate into lower bits. */ + REG_SET_SEEN(ctx, lo(rd)); + emit_imm(lo(rd), imm, ctx); + + /* Sign-extend into upper bits. */ + REG_SET_SEEN(ctx, hi(rd)); + if (imm >= 0) + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); + else + emit(hppa_ldi(-1, hi(rd)), ctx); +} + +static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo, + struct hppa_jit_context *ctx) +{ + emit_imm(hi(rd), imm_hi, ctx); + emit_imm(lo(rd), imm_lo, ctx); +} + +static void __build_epilogue(bool is_tail_call, struct hppa_jit_context *ctx) +{ + const s8 *r0 = regmap[BPF_REG_0]; + int i; + + if (is_tail_call) { + /* + * goto *(t0 + 4); + * Skips first instruction of prologue which initializes tail + * call counter. Assumes t0 contains address of target program, + * see emit_bpf_tail_call. + */ + emit(hppa_ldo(1 * HPPA_INSN_SIZE, HPPA_REG_T0, HPPA_REG_T0), ctx); + emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_T0, EXEC_NEXT_INSTR), ctx); + /* in delay slot: */ + emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_IN_INIT), ctx); + + return; + } + + /* load epilogue function pointer and jump to it. */ + /* exit point is either directly below, or the outest TCC exit function */ + emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx); + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); + + /* NOTE: we are 32-bit and big-endian, so return lower 32-bit value */ + emit_hppa_copy(lo(r0), HPPA_REG_RET0, ctx); + + /* Restore callee-saved registers. */ + for (i = 3; i <= 18; i++) { + if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i))) + continue; + emit(hppa_ldw(-REG_SIZE * (8 + (i-3)), HPPA_REG_SP, HPPA_R(i)), ctx); + } + + /* load original return pointer (stored by outest TCC function) */ + emit(hppa_ldw(-0x14, HPPA_REG_SP, HPPA_REG_RP), ctx); + emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_RP, EXEC_NEXT_INSTR), ctx); + /* in delay slot: */ + emit(hppa_ldw(-0x04, HPPA_REG_SP, HPPA_REG_SP), ctx); +} + +static bool is_stacked(s8 reg) +{ + return reg < 0; +} + +static const s8 *bpf_get_reg64_offset(const s8 *reg, const s8 *tmp, + u16 offset_sp, struct hppa_jit_context *ctx) +{ + if (is_stacked(hi(reg))) { + emit(hppa_ldw(REG_SIZE * hi(reg) - offset_sp, HPPA_REG_SP, hi(tmp)), ctx); + emit(hppa_ldw(REG_SIZE * lo(reg) - offset_sp, HPPA_REG_SP, lo(tmp)), ctx); + reg = tmp; + } + REG_SET_SEEN(ctx, hi(reg)); + REG_SET_SEEN(ctx, lo(reg)); + return reg; +} + +static const s8 *bpf_get_reg64(const s8 *reg, const s8 *tmp, + struct hppa_jit_context *ctx) +{ + return bpf_get_reg64_offset(reg, tmp, 0, ctx); +} + +static const s8 *bpf_get_reg64_ref(const s8 *reg, const s8 *tmp, + bool must_load, struct hppa_jit_context *ctx) +{ + if (!OPTIMIZE_HPPA) + return bpf_get_reg64(reg, tmp, ctx); + + if (is_stacked(hi(reg))) { + if (must_load) + emit(hppa_ldw(REG_SIZE * hi(reg), HPPA_REG_SP, hi(tmp)), ctx); + reg = tmp; + } + REG_SET_SEEN(ctx, hi(reg)); + REG_SET_SEEN(ctx, lo(reg)); + return reg; +} + + +static void bpf_put_reg64(const s8 *reg, const s8 *src, + struct hppa_jit_context *ctx) +{ + if (is_stacked(hi(reg))) { + emit(hppa_stw(hi(src), REG_SIZE * hi(reg), HPPA_REG_SP), ctx); + emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx); + } +} + +static void bpf_save_R0(struct hppa_jit_context *ctx) +{ + bpf_put_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx); +} + +static void bpf_restore_R0(struct hppa_jit_context *ctx) +{ + bpf_get_reg64(regmap[TMP_REG_R0], regmap[BPF_REG_0], ctx); +} + + +static const s8 *bpf_get_reg32(const s8 *reg, const s8 *tmp, + struct hppa_jit_context *ctx) +{ + if (is_stacked(lo(reg))) { + emit(hppa_ldw(REG_SIZE * lo(reg), HPPA_REG_SP, lo(tmp)), ctx); + reg = tmp; + } + REG_SET_SEEN(ctx, lo(reg)); + return reg; +} + +static const s8 *bpf_get_reg32_ref(const s8 *reg, const s8 *tmp, + struct hppa_jit_context *ctx) +{ + if (!OPTIMIZE_HPPA) + return bpf_get_reg32(reg, tmp, ctx); + + if (is_stacked(hi(reg))) { + reg = tmp; + } + REG_SET_SEEN(ctx, lo(reg)); + return reg; +} + +static void bpf_put_reg32(const s8 *reg, const s8 *src, + struct hppa_jit_context *ctx) +{ + if (is_stacked(lo(reg))) { + REG_SET_SEEN(ctx, lo(src)); + emit(hppa_stw(lo(src), REG_SIZE * lo(reg), HPPA_REG_SP), ctx); + if (1 && !ctx->prog->aux->verifier_zext) { + REG_SET_SEEN(ctx, hi(reg)); + emit(hppa_stw(HPPA_REG_ZERO, REG_SIZE * hi(reg), HPPA_REG_SP), ctx); + } + } else if (1 && !ctx->prog->aux->verifier_zext) { + REG_SET_SEEN(ctx, hi(reg)); + emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx); + } +} + +/* extern hppa millicode functions */ +extern void $$mulI(void); +extern void $$divU(void); +extern void $$remU(void); + +static void emit_call_millicode(void *func, const s8 arg0, + const s8 arg1, u8 opcode, struct hppa_jit_context *ctx) +{ + u32 func_addr; + + emit_hppa_copy(arg0, HPPA_REG_ARG0, ctx); + emit_hppa_copy(arg1, HPPA_REG_ARG1, ctx); + + /* libcgcc overwrites HPPA_REG_RET0/1, save temp. in dest. */ + if (arg0 != HPPA_REG_RET1) + bpf_save_R0(ctx); + + func_addr = (uintptr_t) dereference_function_descriptor(func); + emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx); + /* skip the following be_l instruction if divisor is zero. */ + if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) { + if (BPF_OP(opcode) == BPF_DIV) + emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx); + else + emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx); + emit(hppa_or_cond(HPPA_REG_ARG1, HPPA_REG_ZERO, 1, 0, HPPA_REG_ZERO), ctx); + } + /* Note: millicode functions use r31 as return pointer instead of rp */ + emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx); + emit(hppa_nop(), ctx); /* this nop is needed here for delay slot */ + + /* Note: millicode functions return result in RET1, not RET0 */ + emit_hppa_copy(HPPA_REG_RET1, arg0, ctx); + + /* restore HPPA_REG_RET0/1, temp. save in dest. */ + if (arg0 != HPPA_REG_RET1) + bpf_restore_R0(ctx); +} + +static void emit_call_libgcc_ll(void *func, const s8 *arg0, + const s8 *arg1, u8 opcode, struct hppa_jit_context *ctx) +{ + u32 func_addr; + + emit_hppa_copy(lo(arg0), HPPA_REG_ARG0, ctx); + emit_hppa_copy(hi(arg0), HPPA_REG_ARG1, ctx); + emit_hppa_copy(lo(arg1), HPPA_REG_ARG2, ctx); + emit_hppa_copy(hi(arg1), HPPA_REG_ARG3, ctx); + + /* libcgcc overwrites HPPA_REG_RET0/_RET1, so keep copy of R0 on stack */ + if (hi(arg0) != HPPA_REG_RET0) + bpf_save_R0(ctx); + + /* prepare stack */ + emit(hppa_ldo(2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx); + + func_addr = (uintptr_t) dereference_function_descriptor(func); + emit(hppa_ldil(func_addr, HPPA_REG_R31), ctx); + /* zero out the following be_l instruction if divisor is 0 (and set default values) */ + if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) { + emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET0, ctx); + if (BPF_OP(opcode) == BPF_DIV) + emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET1, ctx); + else + emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET1, ctx); + emit(hppa_or_cond(HPPA_REG_ARG2, HPPA_REG_ARG3, 1, 0, HPPA_REG_ZERO), ctx); + } + emit(hppa_be_l(im11(func_addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx); + emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx); + + /* restore stack */ + emit(hppa_ldo(-2 * FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx); + + emit_hppa_copy(HPPA_REG_RET0, hi(arg0), ctx); + emit_hppa_copy(HPPA_REG_RET1, lo(arg0), ctx); + + /* restore HPPA_REG_RET0/_RET1 */ + if (hi(arg0) != HPPA_REG_RET0) + bpf_restore_R0(ctx); +} + +static void emit_jump(s32 paoff, bool force_far, + struct hppa_jit_context *ctx) +{ + unsigned long pc, addr; + + /* Note: allocate 2 instructions for jumps if force_far is set. */ + if (relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 17)) { + /* use BL,short branch followed by nop() */ + emit(hppa_bl(paoff - HPPA_BRANCH_DISPLACEMENT, HPPA_REG_ZERO), ctx); + if (force_far) + emit(hppa_nop(), ctx); + return; + } + + pc = (uintptr_t) &ctx->insns[ctx->ninsns]; + addr = pc + (paoff * HPPA_INSN_SIZE); + emit(hppa_ldil(addr, HPPA_REG_R31), ctx); + emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx); // be,l,n addr(sr4,r31), %sr0, %r31 +} + +static void emit_alu_i64(const s8 *dst, s32 imm, + struct hppa_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = regmap[TMP_REG_1]; + const s8 *rd; + + if (0 && op == BPF_MOV) + rd = bpf_get_reg64_ref(dst, tmp1, false, ctx); + else + rd = bpf_get_reg64(dst, tmp1, ctx); + + /* dst = dst OP imm */ + switch (op) { + case BPF_MOV: + emit_imm32(rd, imm, ctx); + break; + case BPF_AND: + emit_imm(HPPA_REG_T0, imm, ctx); + emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx); + if (imm >= 0) + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); + break; + case BPF_OR: + emit_imm(HPPA_REG_T0, imm, ctx); + emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx); + if (imm < 0) + emit_imm(hi(rd), -1, ctx); + break; + case BPF_XOR: + emit_imm(HPPA_REG_T0, imm, ctx); + emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx); + if (imm < 0) { + emit_imm(HPPA_REG_T0, -1, ctx); + emit_hppa_xor(hi(rd), HPPA_REG_T0, hi(rd), ctx); + } + break; + case BPF_LSH: + if (imm == 0) + break; + if (imm > 32) { + imm -= 32; + emit(hppa_zdep(lo(rd), imm, imm, hi(rd)), ctx); + emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx); + } else if (imm == 32) { + emit_hppa_copy(lo(rd), hi(rd), ctx); + emit_hppa_copy(HPPA_REG_ZERO, lo(rd), ctx); + } else { + emit(hppa_shd(hi(rd), lo(rd), 32 - imm, hi(rd)), ctx); + emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx); + } + break; + case BPF_RSH: + if (imm == 0) + break; + if (imm > 32) { + imm -= 32; + emit(hppa_shr(hi(rd), imm, lo(rd)), ctx); + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); + } else if (imm == 32) { + emit_hppa_copy(hi(rd), lo(rd), ctx); + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); + } else { + emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx); + emit(hppa_shr(hi(rd), imm, hi(rd)), ctx); + } + break; + case BPF_ARSH: + if (imm == 0) + break; + if (imm > 32) { + imm -= 32; + emit(hppa_extrws(hi(rd), 31 - imm, imm, lo(rd)), ctx); + emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx); + } else if (imm == 32) { + emit_hppa_copy(hi(rd), lo(rd), ctx); + emit(hppa_extrws(hi(rd), 0, 31, hi(rd)), ctx); + } else { + emit(hppa_shrpw(hi(rd), lo(rd), imm, lo(rd)), ctx); + emit(hppa_extrws(hi(rd), 31 - imm, imm, hi(rd)), ctx); + } + break; + default: + WARN_ON(1); + } + + bpf_put_reg64(dst, rd, ctx); +} + +static void emit_alu_i32(const s8 *dst, s32 imm, + struct hppa_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = regmap[TMP_REG_1]; + const s8 *rd = bpf_get_reg32(dst, tmp1, ctx); + + if (op == BPF_MOV) + rd = bpf_get_reg32_ref(dst, tmp1, ctx); + else + rd = bpf_get_reg32(dst, tmp1, ctx); + + /* dst = dst OP imm */ + switch (op) { + case BPF_MOV: + emit_imm(lo(rd), imm, ctx); + break; + case BPF_ADD: + emit_imm(HPPA_REG_T0, imm, ctx); + emit(hppa_add(lo(rd), HPPA_REG_T0, lo(rd)), ctx); + break; + case BPF_SUB: + emit_imm(HPPA_REG_T0, imm, ctx); + emit(hppa_sub(lo(rd), HPPA_REG_T0, lo(rd)), ctx); + break; + case BPF_AND: + emit_imm(HPPA_REG_T0, imm, ctx); + emit(hppa_and(lo(rd), HPPA_REG_T0, lo(rd)), ctx); + break; + case BPF_OR: + emit_imm(HPPA_REG_T0, imm, ctx); + emit(hppa_or(lo(rd), HPPA_REG_T0, lo(rd)), ctx); + break; + case BPF_XOR: + emit_imm(HPPA_REG_T0, imm, ctx); + emit_hppa_xor(lo(rd), HPPA_REG_T0, lo(rd), ctx); + break; + case BPF_LSH: + if (imm != 0) + emit(hppa_zdep(lo(rd), imm, imm, lo(rd)), ctx); + break; + case BPF_RSH: + if (imm != 0) + emit(hppa_shr(lo(rd), imm, lo(rd)), ctx); + break; + case BPF_ARSH: + if (imm != 0) + emit(hppa_extrws(lo(rd), 31 - imm, imm, lo(rd)), ctx); + break; + default: + WARN_ON(1); + } + + bpf_put_reg32(dst, rd, ctx); +} + +static void emit_alu_r64(const s8 *dst, const s8 *src, + struct hppa_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = regmap[TMP_REG_1]; + const s8 *tmp2 = regmap[TMP_REG_2]; + const s8 *rd; + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); + + if (op == BPF_MOV) + rd = bpf_get_reg64_ref(dst, tmp1, false, ctx); + else + rd = bpf_get_reg64(dst, tmp1, ctx); + + /* dst = dst OP src */ + switch (op) { + case BPF_MOV: + emit_hppa_copy(lo(rs), lo(rd), ctx); + emit_hppa_copy(hi(rs), hi(rd), ctx); + break; + case BPF_ADD: + emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx); + emit(hppa_addc(hi(rd), hi(rs), hi(rd)), ctx); + break; + case BPF_SUB: + emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx); + emit(hppa_subb(hi(rd), hi(rs), hi(rd)), ctx); + break; + case BPF_AND: + emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx); + emit(hppa_and(hi(rd), hi(rs), hi(rd)), ctx); + break; + case BPF_OR: + emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx); + emit(hppa_or(hi(rd), hi(rs), hi(rd)), ctx); + break; + case BPF_XOR: + emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx); + emit_hppa_xor(hi(rd), hi(rs), hi(rd), ctx); + break; + case BPF_MUL: + emit_call_libgcc_ll(__muldi3, rd, rs, op, ctx); + break; + case BPF_DIV: + emit_call_libgcc_ll(&hppa_div64, rd, rs, op, ctx); + break; + case BPF_MOD: + emit_call_libgcc_ll(&hppa_div64_rem, rd, rs, op, ctx); + break; + case BPF_LSH: + emit_call_libgcc_ll(__ashldi3, rd, rs, op, ctx); + break; + case BPF_RSH: + emit_call_libgcc_ll(__lshrdi3, rd, rs, op, ctx); + break; + case BPF_ARSH: + emit_call_libgcc_ll(__ashrdi3, rd, rs, op, ctx); + break; + case BPF_NEG: + emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx); + emit(hppa_subb(HPPA_REG_ZERO, hi(rd), hi(rd)), ctx); + break; + default: + WARN_ON(1); + } + + bpf_put_reg64(dst, rd, ctx); +} + +static void emit_alu_r32(const s8 *dst, const s8 *src, + struct hppa_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = regmap[TMP_REG_1]; + const s8 *tmp2 = regmap[TMP_REG_2]; + const s8 *rd; + const s8 *rs = bpf_get_reg32(src, tmp2, ctx); + + if (op == BPF_MOV) + rd = bpf_get_reg32_ref(dst, tmp1, ctx); + else + rd = bpf_get_reg32(dst, tmp1, ctx); + + /* dst = dst OP src */ + switch (op) { + case BPF_MOV: + emit_hppa_copy(lo(rs), lo(rd), ctx); + break; + case BPF_ADD: + emit(hppa_add(lo(rd), lo(rs), lo(rd)), ctx); + break; + case BPF_SUB: + emit(hppa_sub(lo(rd), lo(rs), lo(rd)), ctx); + break; + case BPF_AND: + emit(hppa_and(lo(rd), lo(rs), lo(rd)), ctx); + break; + case BPF_OR: + emit(hppa_or(lo(rd), lo(rs), lo(rd)), ctx); + break; + case BPF_XOR: + emit_hppa_xor(lo(rd), lo(rs), lo(rd), ctx); + break; + case BPF_MUL: + emit_call_millicode($$mulI, lo(rd), lo(rs), op, ctx); + break; + case BPF_DIV: + emit_call_millicode($$divU, lo(rd), lo(rs), op, ctx); + break; + case BPF_MOD: + emit_call_millicode($$remU, lo(rd), lo(rs), op, ctx); + break; + case BPF_LSH: + emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx); + emit(hppa_mtsar(HPPA_REG_T0), ctx); + emit(hppa_depwz_sar(lo(rd), lo(rd)), ctx); + break; + case BPF_RSH: + emit(hppa_mtsar(lo(rs)), ctx); + emit(hppa_shrpw_sar(lo(rd), lo(rd)), ctx); + break; + case BPF_ARSH: /* sign extending arithmetic shift right */ + // emit(hppa_beq(lo(rs), HPPA_REG_ZERO, 2), ctx); + emit(hppa_subi(0x1f, lo(rs), HPPA_REG_T0), ctx); + emit(hppa_mtsar(HPPA_REG_T0), ctx); + emit(hppa_extrws_sar(lo(rd), lo(rd)), ctx); + break; + case BPF_NEG: + emit(hppa_sub(HPPA_REG_ZERO, lo(rd), lo(rd)), ctx); // sub r0,rd,rd + break; + default: + WARN_ON(1); + } + + bpf_put_reg32(dst, rd, ctx); +} + +static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 paoff, + struct hppa_jit_context *ctx, const u8 op) +{ + int e, s = ctx->ninsns; + const s8 *tmp1 = regmap[TMP_REG_1]; + const s8 *tmp2 = regmap[TMP_REG_2]; + + const s8 *rs1 = bpf_get_reg64(src1, tmp1, ctx); + const s8 *rs2 = bpf_get_reg64(src2, tmp2, ctx); + + /* + * NO_JUMP skips over the rest of the instructions and the + * emit_jump, meaning the BPF branch is not taken. + * JUMP skips directly to the emit_jump, meaning + * the BPF branch is taken. + * + * The fallthrough case results in the BPF branch being taken. + */ +#define NO_JUMP(idx) (2 + (idx) - 1) +#define JUMP(idx) (0 + (idx) - 1) + + switch (op) { + case BPF_JEQ: + emit(hppa_bne(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(hppa_bne(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JGT: + emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JLT: + emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JGE: + emit(hppa_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(hppa_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JLE: + emit(hppa_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(hppa_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JNE: + emit(hppa_bne(hi(rs1), hi(rs2), JUMP(1)), ctx); + emit(hppa_beq(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSGT: + emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(hppa_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSLT: + emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(hppa_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSGE: + emit(hppa_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(hppa_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(hppa_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSLE: + emit(hppa_blt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(hppa_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(hppa_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSET: + emit(hppa_and(hi(rs1), hi(rs2), HPPA_REG_T0), ctx); + emit(hppa_and(lo(rs1), lo(rs2), HPPA_REG_T1), ctx); + emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, JUMP(1)), ctx); + emit(hppa_beq(HPPA_REG_T1, HPPA_REG_ZERO, NO_JUMP(0)), ctx); + break; + default: + WARN_ON(1); + } + +#undef NO_JUMP +#undef JUMP + + e = ctx->ninsns; + /* Adjust for extra insns. */ + paoff -= (e - s); + emit_jump(paoff, true, ctx); + return 0; +} + +static int emit_bcc(u8 op, u8 rd, u8 rs, int paoff, struct hppa_jit_context *ctx) +{ + int e, s; + bool far = false; + int off; + + if (op == BPF_JSET) { + /* + * BPF_JSET is a special case: it has no inverse so we always + * treat it as a far branch. + */ + emit(hppa_and(rd, rs, HPPA_REG_T0), ctx); + paoff -= 1; /* reduce offset due to hppa_and() above */ + rd = HPPA_REG_T0; + rs = HPPA_REG_ZERO; + op = BPF_JNE; + } + + s = ctx->ninsns; + + if (!relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 12)) { + op = invert_bpf_cond(op); + far = true; + } + + /* + * For a far branch, the condition is negated and we jump over the + * branch itself, and the three instructions from emit_jump. + * For a near branch, just use paoff. + */ + off = far ? (HPPA_BRANCH_DISPLACEMENT - 1) : paoff - HPPA_BRANCH_DISPLACEMENT; + + switch (op) { + /* IF (dst COND src) JUMP off */ + case BPF_JEQ: + emit(hppa_beq(rd, rs, off), ctx); + break; + case BPF_JGT: + emit(hppa_bgtu(rd, rs, off), ctx); + break; + case BPF_JLT: + emit(hppa_bltu(rd, rs, off), ctx); + break; + case BPF_JGE: + emit(hppa_bgeu(rd, rs, off), ctx); + break; + case BPF_JLE: + emit(hppa_bleu(rd, rs, off), ctx); + break; + case BPF_JNE: + emit(hppa_bne(rd, rs, off), ctx); + break; + case BPF_JSGT: + emit(hppa_bgt(rd, rs, off), ctx); + break; + case BPF_JSLT: + emit(hppa_blt(rd, rs, off), ctx); + break; + case BPF_JSGE: + emit(hppa_bge(rd, rs, off), ctx); + break; + case BPF_JSLE: + emit(hppa_ble(rd, rs, off), ctx); + break; + default: + WARN_ON(1); + } + + if (far) { + e = ctx->ninsns; + /* Adjust for extra insns. */ + paoff -= (e - s); + emit_jump(paoff, true, ctx); + } + return 0; +} + +static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 paoff, + struct hppa_jit_context *ctx, const u8 op) +{ + int e, s = ctx->ninsns; + const s8 *tmp1 = regmap[TMP_REG_1]; + const s8 *tmp2 = regmap[TMP_REG_2]; + + const s8 *rs1 = bpf_get_reg32(src1, tmp1, ctx); + const s8 *rs2 = bpf_get_reg32(src2, tmp2, ctx); + + e = ctx->ninsns; + /* Adjust for extra insns. */ + paoff -= (e - s); + + if (emit_bcc(op, lo(rs1), lo(rs2), paoff, ctx)) + return -1; + + return 0; +} + +static void emit_call(bool fixed, u64 addr, struct hppa_jit_context *ctx) +{ + const s8 *tmp = regmap[TMP_REG_1]; + const s8 *r0 = regmap[BPF_REG_0]; + const s8 *reg; + const int offset_sp = 2 * STACK_ALIGN; + + /* prepare stack */ + emit(hppa_ldo(offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx); + + /* load R1 & R2 in registers, R3-R5 to stack. */ + reg = bpf_get_reg64_offset(regmap[BPF_REG_5], tmp, offset_sp, ctx); + emit(hppa_stw(hi(reg), -0x48, HPPA_REG_SP), ctx); + emit(hppa_stw(lo(reg), -0x44, HPPA_REG_SP), ctx); + + reg = bpf_get_reg64_offset(regmap[BPF_REG_4], tmp, offset_sp, ctx); + emit(hppa_stw(hi(reg), -0x40, HPPA_REG_SP), ctx); + emit(hppa_stw(lo(reg), -0x3c, HPPA_REG_SP), ctx); + + reg = bpf_get_reg64_offset(regmap[BPF_REG_3], tmp, offset_sp, ctx); + emit(hppa_stw(hi(reg), -0x38, HPPA_REG_SP), ctx); + emit(hppa_stw(lo(reg), -0x34, HPPA_REG_SP), ctx); + + reg = bpf_get_reg64_offset(regmap[BPF_REG_2], tmp, offset_sp, ctx); + emit_hppa_copy(hi(reg), HPPA_REG_ARG3, ctx); + emit_hppa_copy(lo(reg), HPPA_REG_ARG2, ctx); + + reg = bpf_get_reg64_offset(regmap[BPF_REG_1], tmp, offset_sp, ctx); + emit_hppa_copy(hi(reg), HPPA_REG_ARG1, ctx); + emit_hppa_copy(lo(reg), HPPA_REG_ARG0, ctx); + + /* backup TCC */ + if (REG_WAS_SEEN(ctx, HPPA_REG_TCC)) + emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_SAVED), ctx); + + /* + * Use ldil() to load absolute address. Don't use emit_imm as the + * number of emitted instructions should not depend on the value of + * addr. + */ + emit(hppa_ldil(addr, HPPA_REG_R31), ctx); + emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, EXEC_NEXT_INSTR), ctx); + /* set return address in delay slot */ + emit_hppa_copy(HPPA_REG_R31, HPPA_REG_RP, ctx); + + /* restore TCC */ + if (REG_WAS_SEEN(ctx, HPPA_REG_TCC)) + emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_TCC), ctx); + + /* restore stack */ + emit(hppa_ldo(-offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx); + + /* set return value. */ + emit_hppa_copy(HPPA_REG_RET0, hi(r0), ctx); + emit_hppa_copy(HPPA_REG_RET1, lo(r0), ctx); +} + +static int emit_bpf_tail_call(int insn, struct hppa_jit_context *ctx) +{ + /* + * R1 -> &ctx + * R2 -> &array + * R3 -> index + */ + int off; + const s8 *arr_reg = regmap[BPF_REG_2]; + const s8 *idx_reg = regmap[BPF_REG_3]; + struct bpf_array bpfa; + struct bpf_prog bpfp; + + /* get address of TCC main exit function for error case into rp */ + emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx); + + /* max_entries = array->map.max_entries; */ + off = offsetof(struct bpf_array, map.max_entries); + BUILD_BUG_ON(sizeof(bpfa.map.max_entries) != 4); + emit(hppa_ldw(off, lo(arr_reg), HPPA_REG_T1), ctx); + + /* + * if (index >= max_entries) + * goto out; + */ + emit(hppa_bltu(lo(idx_reg), HPPA_REG_T1, 2 - HPPA_BRANCH_DISPLACEMENT), ctx); + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); + + /* + * if (--tcc < 0) + * goto out; + */ + REG_FORCE_SEEN(ctx, HPPA_REG_TCC); + emit(hppa_ldo(-1, HPPA_REG_TCC, HPPA_REG_TCC), ctx); + emit(hppa_bge(HPPA_REG_TCC, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx); + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); + + /* + * prog = array->ptrs[index]; + * if (!prog) + * goto out; + */ + BUILD_BUG_ON(sizeof(bpfa.ptrs[0]) != 4); + emit(hppa_sh2add(lo(idx_reg), lo(arr_reg), HPPA_REG_T0), ctx); + off = offsetof(struct bpf_array, ptrs); + BUILD_BUG_ON(!relative_bits_ok(off, 11)); + emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx); + emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx); + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); + + /* + * tcc = temp_tcc; + * goto *(prog->bpf_func + 4); + */ + off = offsetof(struct bpf_prog, bpf_func); + BUILD_BUG_ON(!relative_bits_ok(off, 11)); + BUILD_BUG_ON(sizeof(bpfp.bpf_func) != 4); + emit(hppa_ldw(off, HPPA_REG_T0, HPPA_REG_T0), ctx); + /* Epilogue jumps to *(t0 + 4). */ + __build_epilogue(true, ctx); + return 0; +} + +static int emit_load_r64(const s8 *dst, const s8 *src, s16 off, + struct hppa_jit_context *ctx, const u8 size) +{ + const s8 *tmp1 = regmap[TMP_REG_1]; + const s8 *tmp2 = regmap[TMP_REG_2]; + const s8 *rd = bpf_get_reg64_ref(dst, tmp1, ctx->prog->aux->verifier_zext, ctx); + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); + s8 srcreg; + + /* need to calculate address since offset does not fit in 14 bits? */ + if (relative_bits_ok(off, 14)) + srcreg = lo(rs); + else { + /* need to use R1 here, since addil puts result into R1 */ + srcreg = HPPA_REG_R1; + emit(hppa_addil(off, lo(rs)), ctx); + off = im11(off); + } + + /* LDX: dst = *(size *)(src + off) */ + switch (size) { + case BPF_B: + emit(hppa_ldb(off + 0, srcreg, lo(rd)), ctx); + if (!ctx->prog->aux->verifier_zext) + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); + break; + case BPF_H: + emit(hppa_ldh(off + 0, srcreg, lo(rd)), ctx); + if (!ctx->prog->aux->verifier_zext) + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); + break; + case BPF_W: + emit(hppa_ldw(off + 0, srcreg, lo(rd)), ctx); + if (!ctx->prog->aux->verifier_zext) + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); + break; + case BPF_DW: + emit(hppa_ldw(off + 0, srcreg, hi(rd)), ctx); + emit(hppa_ldw(off + 4, srcreg, lo(rd)), ctx); + break; + } + + bpf_put_reg64(dst, rd, ctx); + return 0; +} + +static int emit_store_r64(const s8 *dst, const s8 *src, s16 off, + struct hppa_jit_context *ctx, const u8 size, + const u8 mode) +{ + const s8 *tmp1 = regmap[TMP_REG_1]; + const s8 *tmp2 = regmap[TMP_REG_2]; + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); + s8 dstreg; + + /* need to calculate address since offset does not fit in 14 bits? */ + if (relative_bits_ok(off, 14)) + dstreg = lo(rd); + else { + /* need to use R1 here, since addil puts result into R1 */ + dstreg = HPPA_REG_R1; + emit(hppa_addil(off, lo(rd)), ctx); + off = im11(off); + } + + /* ST: *(size *)(dst + off) = imm */ + switch (size) { + case BPF_B: + emit(hppa_stb(lo(rs), off + 0, dstreg), ctx); + break; + case BPF_H: + emit(hppa_sth(lo(rs), off + 0, dstreg), ctx); + break; + case BPF_W: + emit(hppa_stw(lo(rs), off + 0, dstreg), ctx); + break; + case BPF_DW: + emit(hppa_stw(hi(rs), off + 0, dstreg), ctx); + emit(hppa_stw(lo(rs), off + 4, dstreg), ctx); + break; + } + + return 0; +} + +static void emit_rev16(const s8 rd, struct hppa_jit_context *ctx) +{ + emit(hppa_extru(rd, 23, 8, HPPA_REG_T1), ctx); + emit(hppa_depwz(rd, 23, 8, HPPA_REG_T1), ctx); + emit(hppa_extru(HPPA_REG_T1, 31, 16, rd), ctx); +} + +static void emit_rev32(const s8 rs, const s8 rd, struct hppa_jit_context *ctx) +{ + emit(hppa_shrpw(rs, rs, 16, HPPA_REG_T1), ctx); + emit(hppa_depwz(HPPA_REG_T1, 15, 8, HPPA_REG_T1), ctx); + emit(hppa_shrpw(rs, HPPA_REG_T1, 8, rd), ctx); +} + +static void emit_zext64(const s8 *dst, struct hppa_jit_context *ctx) +{ + const s8 *rd; + const s8 *tmp1 = regmap[TMP_REG_1]; + + rd = bpf_get_reg64(dst, tmp1, ctx); + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); + bpf_put_reg64(dst, rd, ctx); +} + +int bpf_jit_emit_insn(const struct bpf_insn *insn, struct hppa_jit_context *ctx, + bool extra_pass) +{ + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || + BPF_CLASS(insn->code) == BPF_JMP; + int s, e, paoff, i = insn - ctx->prog->insnsi; + u8 code = insn->code; + s16 off = insn->off; + s32 imm = insn->imm; + + const s8 *dst = regmap[insn->dst_reg]; + const s8 *src = regmap[insn->src_reg]; + const s8 *tmp1 = regmap[TMP_REG_1]; + const s8 *tmp2 = regmap[TMP_REG_2]; + + if (0) printk("CLASS %03d CODE %#02x ALU64:%d BPF_SIZE %#02x " + "BPF_CODE %#02x src_reg %d dst_reg %d\n", + BPF_CLASS(code), code, (code & BPF_ALU64) ? 1:0, BPF_SIZE(code), + BPF_OP(code), insn->src_reg, insn->dst_reg); + + switch (code) { + /* dst = src */ + case BPF_ALU64 | BPF_MOV | BPF_X: + + case BPF_ALU64 | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_K: + + case BPF_ALU64 | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_K: + + case BPF_ALU64 | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + + case BPF_ALU64 | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_K: + + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_K: + + case BPF_ALU64 | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_K: + + case BPF_ALU64 | BPF_LSH | BPF_X: + case BPF_ALU64 | BPF_RSH | BPF_X: + case BPF_ALU64 | BPF_ARSH | BPF_X: + if (BPF_SRC(code) == BPF_K) { + emit_imm32(tmp2, imm, ctx); + src = tmp2; + } + emit_alu_r64(dst, src, ctx, BPF_OP(code)); + break; + + /* dst = -dst */ + case BPF_ALU64 | BPF_NEG: + emit_alu_r64(dst, tmp2, ctx, BPF_OP(code)); + break; + + case BPF_ALU64 | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + emit_alu_i64(dst, imm, ctx, BPF_OP(code)); + break; + + case BPF_ALU | BPF_MOV | BPF_X: + if (imm == 1) { + /* Special mov32 for zext. */ + emit_zext64(dst, ctx); + break; + } + fallthrough; + /* dst = dst OP src */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_X: + + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_MUL | BPF_K: + + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_DIV | BPF_K: + + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU | BPF_MOD | BPF_K: + + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_ARSH | BPF_X: + if (BPF_SRC(code) == BPF_K) { + emit_imm32(tmp2, imm, ctx); + src = tmp2; + } + emit_alu_r32(dst, src, ctx, BPF_OP(code)); + break; + + /* dst = dst OP imm */ + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: + /* + * mul,div,mod are handled in the BPF_X case. + */ + emit_alu_i32(dst, imm, ctx, BPF_OP(code)); + break; + + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + /* + * src is ignored---choose tmp2 as a dummy register since it + * is not on the stack. + */ + emit_alu_r32(dst, tmp2, ctx, BPF_OP(code)); + break; + + /* dst = BSWAP##imm(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_BE: + { + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + emit(hppa_extru(lo(rd), 31, 16, lo(rd)), ctx); + fallthrough; + case 32: + /* zero-extend 32 bits into 64 bits */ + if (!ctx->prog->aux->verifier_zext) + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); + break; + case 64: + /* Do nothing. */ + break; + default: + pr_err("bpf-jit: BPF_END imm %d invalid\n", imm); + return -1; + } + + bpf_put_reg64(dst, rd, ctx); + break; + } + + case BPF_ALU | BPF_END | BPF_FROM_LE: + { + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + + switch (imm) { + case 16: + emit_rev16(lo(rd), ctx); + if (!ctx->prog->aux->verifier_zext) + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); + break; + case 32: + emit_rev32(lo(rd), lo(rd), ctx); + if (!ctx->prog->aux->verifier_zext) + emit_hppa_copy(HPPA_REG_ZERO, hi(rd), ctx); + break; + case 64: + /* Swap upper and lower halves, then each half. */ + emit_hppa_copy(hi(rd), HPPA_REG_T0, ctx); + emit_rev32(lo(rd), hi(rd), ctx); + emit_rev32(HPPA_REG_T0, lo(rd), ctx); + break; + default: + pr_err("bpf-jit: BPF_END imm %d invalid\n", imm); + return -1; + } + + bpf_put_reg64(dst, rd, ctx); + break; + } + /* JUMP off */ + case BPF_JMP | BPF_JA: + paoff = hppa_offset(i, off, ctx); + emit_jump(paoff, false, ctx); + break; + /* function call */ + case BPF_JMP | BPF_CALL: + { + bool fixed; + int ret; + u64 addr; + + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, + &fixed); + if (ret < 0) + return ret; + emit_call(fixed, addr, ctx); + break; + } + /* tail call */ + case BPF_JMP | BPF_TAIL_CALL: + REG_SET_SEEN_ALL(ctx); + if (emit_bpf_tail_call(i, ctx)) + return -1; + break; + /* IF (dst COND imm) JUMP off */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_K: + + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_K: + + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_K: + + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_K: + + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_K: + + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_K: + + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_K: + + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_K: + + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_K: + + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_K: + + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_K: + paoff = hppa_offset(i, off, ctx); + if (BPF_SRC(code) == BPF_K) { + s = ctx->ninsns; + emit_imm32(tmp2, imm, ctx); + src = tmp2; + e = ctx->ninsns; + paoff -= (e - s); + } + if (is64) + emit_branch_r64(dst, src, paoff, ctx, BPF_OP(code)); + else + emit_branch_r32(dst, src, paoff, ctx, BPF_OP(code)); + break; + /* function return */ + case BPF_JMP | BPF_EXIT: + if (i == ctx->prog->len - 1) + break; + /* load epilogue function pointer and jump to it. */ + emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx); + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); + break; + + /* dst = imm64 */ + case BPF_LD | BPF_IMM | BPF_DW: + { + struct bpf_insn insn1 = insn[1]; + u32 upper = insn1.imm; + u32 lower = imm; + const s8 *rd = bpf_get_reg64_ref(dst, tmp1, false, ctx); + + if (0 && bpf_pseudo_func(insn)) { + WARN_ON(upper); /* we are 32-bit! */ + upper = 0; + lower = (uintptr_t) dereference_function_descriptor(lower); + } + + emit_imm64(rd, upper, lower, ctx); + bpf_put_reg64(dst, rd, ctx); + return 1; + } + + /* LDX: dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: + if (emit_load_r64(dst, src, off, ctx, BPF_SIZE(code))) + return -1; + break; + + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + + /* ST: *(size *)(dst + off) = imm */ + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_DW: + + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_DW: + if (BPF_CLASS(code) == BPF_ST) { + emit_imm32(tmp2, imm, ctx); + src = tmp2; + } + + if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code), + BPF_MODE(code))) + return -1; + break; + + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + pr_info_once( + "bpf-jit: not supported: atomic operation %02x ***\n", + insn->imm); + return -EFAULT; + + default: + pr_err("bpf-jit: unknown opcode %02x\n", code); + return -EINVAL; + } + + return 0; +} + +void bpf_jit_build_prologue(struct hppa_jit_context *ctx) +{ + const s8 *tmp = regmap[TMP_REG_1]; + const s8 *dst, *reg; + int stack_adjust = 0; + int i; + unsigned long addr; + int bpf_stack_adjust; + + /* + * stack on hppa grows up, so if tail calls are used we need to + * allocate the maximum stack size + */ + if (REG_ALL_SEEN(ctx)) + bpf_stack_adjust = MAX_BPF_STACK; + else + bpf_stack_adjust = ctx->prog->aux->stack_depth; + bpf_stack_adjust = round_up(bpf_stack_adjust, STACK_ALIGN); + + /* make space for callee-saved registers. */ + stack_adjust += NR_SAVED_REGISTERS * REG_SIZE; + /* make space for BPF registers on stack. */ + stack_adjust += BPF_JIT_SCRATCH_REGS * REG_SIZE; + /* make space for BPF stack. */ + stack_adjust += bpf_stack_adjust; + /* round up for stack alignment. */ + stack_adjust = round_up(stack_adjust, STACK_ALIGN); + + /* + * The first instruction sets the tail-call-counter (TCC) register. + * This instruction is skipped by tail calls. + * Use a temporary register instead of a caller-saved register initially. + */ + emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC_IN_INIT), ctx); + + /* + * skip all initializations when called as BPF TAIL call. + */ + emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_R1), ctx); + emit(hppa_bne(HPPA_REG_TCC_IN_INIT, HPPA_REG_R1, ctx->prologue_len - 2 - HPPA_BRANCH_DISPLACEMENT), ctx); + + /* set up hppa stack frame. */ + emit_hppa_copy(HPPA_REG_SP, HPPA_REG_R1, ctx); // copy sp,r1 (=prev_sp) + emit(hppa_ldo(stack_adjust, HPPA_REG_SP, HPPA_REG_SP), ctx); // ldo stack_adjust(sp),sp (increase stack) + emit(hppa_stw(HPPA_REG_R1, -REG_SIZE, HPPA_REG_SP), ctx); // stw prev_sp,-0x04(sp) + emit(hppa_stw(HPPA_REG_RP, -0x14, HPPA_REG_SP), ctx); // stw rp,-0x14(sp) + + REG_FORCE_SEEN(ctx, HPPA_REG_T0); + REG_FORCE_SEEN(ctx, HPPA_REG_T1); + REG_FORCE_SEEN(ctx, HPPA_REG_T2); + REG_FORCE_SEEN(ctx, HPPA_REG_T3); + REG_FORCE_SEEN(ctx, HPPA_REG_T4); + REG_FORCE_SEEN(ctx, HPPA_REG_T5); + + /* save callee-save registers. */ + for (i = 3; i <= 18; i++) { + if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i))) + continue; + emit(hppa_stw(HPPA_R(i), -REG_SIZE * (8 + (i-3)), HPPA_REG_SP), ctx); // stw ri,-save_area(sp) + } + + /* + * now really set the tail call counter (TCC) register. + */ + if (REG_WAS_SEEN(ctx, HPPA_REG_TCC)) + emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC), ctx); + + /* + * save epilogue function pointer for outer TCC call chain. + * The main TCC call stores the final RP on stack. + */ + addr = (uintptr_t) &ctx->insns[ctx->epilogue_offset]; + /* skip first two instructions of exit function, which jump to exit */ + addr += 2 * HPPA_INSN_SIZE; + emit(hppa_ldil(addr, HPPA_REG_T2), ctx); + emit(hppa_ldo(im11(addr), HPPA_REG_T2, HPPA_REG_T2), ctx); + emit(EXIT_PTR_STORE(HPPA_REG_T2), ctx); + + /* load R1 & R2 from registers, R3-R5 from stack. */ + /* use HPPA_REG_R1 which holds the old stack value */ + dst = regmap[BPF_REG_5]; + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { + if (REG_WAS_SEEN(ctx, hi(reg))) + emit(hppa_ldw(-0x48, HPPA_REG_R1, hi(reg)), ctx); + if (REG_WAS_SEEN(ctx, lo(reg))) + emit(hppa_ldw(-0x44, HPPA_REG_R1, lo(reg)), ctx); + bpf_put_reg64(dst, tmp, ctx); + } + + dst = regmap[BPF_REG_4]; + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { + if (REG_WAS_SEEN(ctx, hi(reg))) + emit(hppa_ldw(-0x40, HPPA_REG_R1, hi(reg)), ctx); + if (REG_WAS_SEEN(ctx, lo(reg))) + emit(hppa_ldw(-0x3c, HPPA_REG_R1, lo(reg)), ctx); + bpf_put_reg64(dst, tmp, ctx); + } + + dst = regmap[BPF_REG_3]; + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { + if (REG_WAS_SEEN(ctx, hi(reg))) + emit(hppa_ldw(-0x38, HPPA_REG_R1, hi(reg)), ctx); + if (REG_WAS_SEEN(ctx, lo(reg))) + emit(hppa_ldw(-0x34, HPPA_REG_R1, lo(reg)), ctx); + bpf_put_reg64(dst, tmp, ctx); + } + + dst = regmap[BPF_REG_2]; + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { + if (REG_WAS_SEEN(ctx, hi(reg))) + emit_hppa_copy(HPPA_REG_ARG3, hi(reg), ctx); + if (REG_WAS_SEEN(ctx, lo(reg))) + emit_hppa_copy(HPPA_REG_ARG2, lo(reg), ctx); + bpf_put_reg64(dst, tmp, ctx); + } + + dst = regmap[BPF_REG_1]; + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { + if (REG_WAS_SEEN(ctx, hi(reg))) + emit_hppa_copy(HPPA_REG_ARG1, hi(reg), ctx); + if (REG_WAS_SEEN(ctx, lo(reg))) + emit_hppa_copy(HPPA_REG_ARG0, lo(reg), ctx); + bpf_put_reg64(dst, tmp, ctx); + } + + /* Set up BPF frame pointer. */ + dst = regmap[BPF_REG_FP]; + reg = bpf_get_reg64_ref(dst, tmp, false, ctx); + if (REG_WAS_SEEN(ctx, lo(reg)) | REG_WAS_SEEN(ctx, hi(reg))) { + if (REG_WAS_SEEN(ctx, lo(reg))) + emit(hppa_ldo(-REG_SIZE * (NR_SAVED_REGISTERS + BPF_JIT_SCRATCH_REGS), + HPPA_REG_SP, lo(reg)), ctx); + if (REG_WAS_SEEN(ctx, hi(reg))) + emit_hppa_copy(HPPA_REG_ZERO, hi(reg), ctx); + bpf_put_reg64(dst, tmp, ctx); + } + + emit(hppa_nop(), ctx); +} + +void bpf_jit_build_epilogue(struct hppa_jit_context *ctx) +{ + __build_epilogue(false, ctx); +} diff --git a/arch/parisc/net/bpf_jit_comp64.c b/arch/parisc/net/bpf_jit_comp64.c new file mode 100644 index 000000000000..54b0d5e25e02 --- /dev/null +++ b/arch/parisc/net/bpf_jit_comp64.c @@ -0,0 +1,1209 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * BPF JIT compiler for PA-RISC (64-bit) + * + * Copyright(c) 2023 Helge Deller <deller@gmx.de> + * + * The code is based on the BPF JIT compiler for RV64 by Björn Töpel. + * + * TODO: + * - check if bpf_jit_needs_zext() is needed (currently enabled) + * - implement arch_prepare_bpf_trampoline(), poke(), ... + */ + +#include <linux/bitfield.h> +#include <linux/bpf.h> +#include <linux/filter.h> +#include <linux/libgcc.h> +#include "bpf_jit.h" + +static const int regmap[] = { + [BPF_REG_0] = HPPA_REG_RET0, + [BPF_REG_1] = HPPA_R(5), + [BPF_REG_2] = HPPA_R(6), + [BPF_REG_3] = HPPA_R(7), + [BPF_REG_4] = HPPA_R(8), + [BPF_REG_5] = HPPA_R(9), + [BPF_REG_6] = HPPA_R(10), + [BPF_REG_7] = HPPA_R(11), + [BPF_REG_8] = HPPA_R(12), + [BPF_REG_9] = HPPA_R(13), + [BPF_REG_FP] = HPPA_R(14), + [BPF_REG_AX] = HPPA_R(15), +}; + +/* + * Stack layout during BPF program execution (note: stack grows up): + * + * high + * HPPA64 sp => +----------+ <= HPPA64 fp + * | saved sp | + * | saved rp | + * | ... | HPPA64 callee-saved registers + * | curr args| + * | local var| + * +----------+ <= (BPF FP) + * | | + * | ... | BPF program stack + * | | + * | ... | Function call stack + * | | + * +----------+ + * low + */ + +/* Offset from fp for BPF registers stored on stack. */ +#define STACK_ALIGN FRAME_SIZE + +#define EXIT_PTR_LOAD(reg) hppa64_ldd_im16(-FRAME_SIZE, HPPA_REG_SP, reg) +#define EXIT_PTR_STORE(reg) hppa64_std_im16(reg, -FRAME_SIZE, HPPA_REG_SP) +#define EXIT_PTR_JUMP(reg, nop) hppa_bv(HPPA_REG_ZERO, reg, nop) + +static u8 bpf_to_hppa_reg(int bpf_reg, struct hppa_jit_context *ctx) +{ + u8 reg = regmap[bpf_reg]; + + REG_SET_SEEN(ctx, reg); + return reg; +}; + +static void emit_hppa_copy(const s8 rs, const s8 rd, struct hppa_jit_context *ctx) +{ + REG_SET_SEEN(ctx, rd); + if (OPTIMIZE_HPPA && (rs == rd)) + return; + REG_SET_SEEN(ctx, rs); + emit(hppa_copy(rs, rd), ctx); +} + +static void emit_hppa64_depd(u8 src, u8 pos, u8 len, u8 target, bool no_zero, struct hppa_jit_context *ctx) +{ + int c; + + pos &= (BITS_PER_LONG - 1); + pos = 63 - pos; + len = 64 - len; + c = (len < 32) ? 0x4 : 0; + c |= (pos >= 32) ? 0x2 : 0; + c |= (no_zero) ? 0x1 : 0; + emit(hppa_t10_insn(0x3c, target, src, 0, c, pos & 0x1f, len & 0x1f), ctx); +} + +static void emit_hppa64_shld(u8 src, int num, u8 target, struct hppa_jit_context *ctx) +{ + emit_hppa64_depd(src, 63-num, 64-num, target, 0, ctx); +} + +static void emit_hppa64_extrd(u8 src, u8 pos, u8 len, u8 target, bool signed_op, struct hppa_jit_context *ctx) +{ + int c; + + pos &= (BITS_PER_LONG - 1); + len = 64 - len; + c = (len < 32) ? 0x4 : 0; + c |= (pos >= 32) ? 0x2 : 0; + c |= signed_op ? 0x1 : 0; + emit(hppa_t10_insn(0x36, src, target, 0, c, pos & 0x1f, len & 0x1f), ctx); +} + +static void emit_hppa64_extrw(u8 src, u8 pos, u8 len, u8 target, bool signed_op, struct hppa_jit_context *ctx) +{ + int c; + + pos &= (32 - 1); + len = 32 - len; + c = 0x06 | (signed_op ? 1 : 0); + emit(hppa_t10_insn(0x34, src, target, 0, c, pos, len), ctx); +} + +#define emit_hppa64_zext32(r, target, ctx) \ + emit_hppa64_extrd(r, 63, 32, target, false, ctx) +#define emit_hppa64_sext32(r, target, ctx) \ + emit_hppa64_extrd(r, 63, 32, target, true, ctx) + +static void emit_hppa64_shrd(u8 src, int num, u8 target, bool signed_op, struct hppa_jit_context *ctx) +{ + emit_hppa64_extrd(src, 63-num, 64-num, target, signed_op, ctx); +} + +static void emit_hppa64_shrw(u8 src, int num, u8 target, bool signed_op, struct hppa_jit_context *ctx) +{ + emit_hppa64_extrw(src, 31-num, 32-num, target, signed_op, ctx); +} + +/* Emit variable-length instructions for 32-bit imm */ +static void emit_imm32(u8 rd, s32 imm, struct hppa_jit_context *ctx) +{ + u32 lower = im11(imm); + + REG_SET_SEEN(ctx, rd); + if (OPTIMIZE_HPPA && relative_bits_ok(imm, 14)) { + emit(hppa_ldi(imm, rd), ctx); + return; + } + if (OPTIMIZE_HPPA && lower == imm) { + emit(hppa_ldo(lower, HPPA_REG_ZERO, rd), ctx); + return; + } + emit(hppa_ldil(imm, rd), ctx); + if (OPTIMIZE_HPPA && (lower == 0)) + return; + emit(hppa_ldo(lower, rd, rd), ctx); +} + +static bool is_32b_int(s64 val) +{ + return val == (s32) val; +} + +/* Emit variable-length instructions for 64-bit imm */ +static void emit_imm(u8 rd, s64 imm, u8 tmpreg, struct hppa_jit_context *ctx) +{ + u32 upper32; + + /* get lower 32-bits into rd, sign extended */ + emit_imm32(rd, imm, ctx); + + /* do we have upper 32-bits too ? */ + if (OPTIMIZE_HPPA && is_32b_int(imm)) + return; + + /* load upper 32-bits into lower tmpreg and deposit into rd */ + upper32 = imm >> 32; + if (upper32 || !OPTIMIZE_HPPA) { + emit_imm32(tmpreg, upper32, ctx); + emit_hppa64_depd(tmpreg, 31, 32, rd, 1, ctx); + } else + emit_hppa64_depd(HPPA_REG_ZERO, 31, 32, rd, 1, ctx); + +} + +static int emit_jump(signed long paoff, bool force_far, + struct hppa_jit_context *ctx) +{ + unsigned long pc, addr; + + /* Note: Use 2 instructions for jumps if force_far is set. */ + if (relative_bits_ok(paoff - HPPA_BRANCH_DISPLACEMENT, 22)) { + /* use BL,long branch followed by nop() */ + emit(hppa64_bl_long(paoff - HPPA_BRANCH_DISPLACEMENT), ctx); + if (force_far) + emit(hppa_nop(), ctx); + return 0; + } + + pc = (uintptr_t) &ctx->insns[ctx->ninsns]; + addr = pc + (paoff * HPPA_INSN_SIZE); + /* even the 64-bit kernel runs in memory below 4GB */ + if (WARN_ON_ONCE(addr >> 32)) + return -E2BIG; + emit(hppa_ldil(addr, HPPA_REG_R31), ctx); + emit(hppa_be_l(im11(addr) >> 2, HPPA_REG_R31, NOP_NEXT_INSTR), ctx); + return 0; +} + +static void __build_epilogue(bool is_tail_call, struct hppa_jit_context *ctx) +{ + int i; + + if (is_tail_call) { + /* + * goto *(t0 + 4); + * Skips first instruction of prologue which initializes tail + * call counter. Assumes t0 contains address of target program, + * see emit_bpf_tail_call. + */ + emit(hppa_ldo(1 * HPPA_INSN_SIZE, HPPA_REG_T0, HPPA_REG_T0), ctx); + emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_T0, EXEC_NEXT_INSTR), ctx); + /* in delay slot: */ + emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_IN_INIT), ctx); + + return; + } + + /* load epilogue function pointer and jump to it. */ + /* exit point is either at next instruction, or the outest TCC exit function */ + emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx); + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); + + /* NOTE: we are 64-bit and big-endian, so return lower sign-extended 32-bit value */ + emit_hppa64_sext32(regmap[BPF_REG_0], HPPA_REG_RET0, ctx); + + /* Restore callee-saved registers. */ + for (i = 3; i <= 15; i++) { + if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i))) + continue; + emit(hppa64_ldd_im16(-REG_SIZE * i, HPPA_REG_SP, HPPA_R(i)), ctx); + } + + /* load original return pointer (stored by outest TCC function) */ + emit(hppa64_ldd_im16(-2*REG_SIZE, HPPA_REG_SP, HPPA_REG_RP), ctx); + emit(hppa_bv(HPPA_REG_ZERO, HPPA_REG_RP, EXEC_NEXT_INSTR), ctx); + /* in delay slot: */ + emit(hppa64_ldd_im5(-REG_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx); + + emit(hppa_nop(), ctx); // XXX WARUM einer zu wenig ?? +} + +static int emit_branch(u8 op, u8 rd, u8 rs, signed long paoff, + struct hppa_jit_context *ctx) +{ + int e, s; + bool far = false; + int off; + + if (op == BPF_JSET) { + /* + * BPF_JSET is a special case: it has no inverse so translate + * to and() function and compare against zero + */ + emit(hppa_and(rd, rs, HPPA_REG_T0), ctx); + paoff -= 1; /* reduce offset due to hppa_and() above */ + rd = HPPA_REG_T0; + rs = HPPA_REG_ZERO; + op = BPF_JNE; + } + + /* set start after BPF_JSET */ + s = ctx->ninsns; + + if (!relative_branch_ok(paoff - HPPA_BRANCH_DISPLACEMENT + 1, 12)) { + op = invert_bpf_cond(op); + far = true; + } + + /* + * For a far branch, the condition is negated and we jump over the + * branch itself, and the two instructions from emit_jump. + * For a near branch, just use paoff. + */ + off = far ? (2 - HPPA_BRANCH_DISPLACEMENT) : paoff - HPPA_BRANCH_DISPLACEMENT; + + switch (op) { + /* IF (dst COND src) JUMP off */ + case BPF_JEQ: + emit(hppa_beq(rd, rs, off), ctx); + break; + case BPF_JGT: + emit(hppa_bgtu(rd, rs, off), ctx); + break; + case BPF_JLT: + emit(hppa_bltu(rd, rs, off), ctx); + break; + case BPF_JGE: + emit(hppa_bgeu(rd, rs, off), ctx); + break; + case BPF_JLE: + emit(hppa_bleu(rd, rs, off), ctx); + break; + case BPF_JNE: + emit(hppa_bne(rd, rs, off), ctx); + break; + case BPF_JSGT: + emit(hppa_bgt(rd, rs, off), ctx); + break; + case BPF_JSLT: + emit(hppa_blt(rd, rs, off), ctx); + break; + case BPF_JSGE: + emit(hppa_bge(rd, rs, off), ctx); + break; + case BPF_JSLE: + emit(hppa_ble(rd, rs, off), ctx); + break; + default: + WARN_ON(1); + } + + if (far) { + int ret; + e = ctx->ninsns; + /* Adjust for extra insns. */ + paoff -= (e - s); + ret = emit_jump(paoff, true, ctx); + if (ret) + return ret; + } else { + /* + * always allocate 2 nops instead of the far branch to + * reduce translation loops + */ + emit(hppa_nop(), ctx); + emit(hppa_nop(), ctx); + } + return 0; +} + +static void emit_zext_32(u8 reg, struct hppa_jit_context *ctx) +{ + emit_hppa64_zext32(reg, reg, ctx); +} + +static void emit_bpf_tail_call(int insn, struct hppa_jit_context *ctx) +{ + /* + * R1 -> &ctx + * R2 -> &array + * R3 -> index + */ + int off; + const s8 arr_reg = regmap[BPF_REG_2]; + const s8 idx_reg = regmap[BPF_REG_3]; + struct bpf_array bpfa; + struct bpf_prog bpfp; + + /* if there is any tail call, we need to save & restore all registers */ + REG_SET_SEEN_ALL(ctx); + + /* get address of TCC main exit function for error case into rp */ + emit(EXIT_PTR_LOAD(HPPA_REG_RP), ctx); + + /* max_entries = array->map.max_entries; */ + off = offsetof(struct bpf_array, map.max_entries); + BUILD_BUG_ON(sizeof(bpfa.map.max_entries) != 4); + emit(hppa_ldw(off, arr_reg, HPPA_REG_T1), ctx); + + /* + * if (index >= max_entries) + * goto out; + */ + emit(hppa_bltu(idx_reg, HPPA_REG_T1, 2 - HPPA_BRANCH_DISPLACEMENT), ctx); + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); + + /* + * if (--tcc < 0) + * goto out; + */ + REG_FORCE_SEEN(ctx, HPPA_REG_TCC); + emit(hppa_ldo(-1, HPPA_REG_TCC, HPPA_REG_TCC), ctx); + emit(hppa_bge(HPPA_REG_TCC, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx); + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); + + /* + * prog = array->ptrs[index]; + * if (!prog) + * goto out; + */ + BUILD_BUG_ON(sizeof(bpfa.ptrs[0]) != 8); + emit(hppa64_shladd(idx_reg, 3, arr_reg, HPPA_REG_T0), ctx); + off = offsetof(struct bpf_array, ptrs); + BUILD_BUG_ON(off < 16); + emit(hppa64_ldd_im16(off, HPPA_REG_T0, HPPA_REG_T0), ctx); + emit(hppa_bne(HPPA_REG_T0, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx); + emit(EXIT_PTR_JUMP(HPPA_REG_RP, NOP_NEXT_INSTR), ctx); + + /* + * tcc = temp_tcc; + * goto *(prog->bpf_func + 4); + */ + off = offsetof(struct bpf_prog, bpf_func); + BUILD_BUG_ON(off < 16); + BUILD_BUG_ON(sizeof(bpfp.bpf_func) != 8); + emit(hppa64_ldd_im16(off, HPPA_REG_T0, HPPA_REG_T0), ctx); + /* Epilogue jumps to *(t0 + 4). */ + __build_epilogue(true, ctx); +} + +static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn, + struct hppa_jit_context *ctx) +{ + u8 code = insn->code; + + switch (code) { + case BPF_JMP | BPF_JA: + case BPF_JMP | BPF_CALL: + case BPF_JMP | BPF_EXIT: + case BPF_JMP | BPF_TAIL_CALL: + break; + default: + *rd = bpf_to_hppa_reg(insn->dst_reg, ctx); + } + + if (code & (BPF_ALU | BPF_X) || code & (BPF_ALU64 | BPF_X) || + code & (BPF_JMP | BPF_X) || code & (BPF_JMP32 | BPF_X) || + code & BPF_LDX || code & BPF_STX) + *rs = bpf_to_hppa_reg(insn->src_reg, ctx); +} + +static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct hppa_jit_context *ctx) +{ + emit_hppa64_zext32(*rd, HPPA_REG_T2, ctx); + *rd = HPPA_REG_T2; + emit_hppa64_zext32(*rs, HPPA_REG_T1, ctx); + *rs = HPPA_REG_T1; +} + +static void emit_sext_32_rd_rs(u8 *rd, u8 *rs, struct hppa_jit_context *ctx) +{ + emit_hppa64_sext32(*rd, HPPA_REG_T2, ctx); + *rd = HPPA_REG_T2; + emit_hppa64_sext32(*rs, HPPA_REG_T1, ctx); + *rs = HPPA_REG_T1; +} + +static void emit_zext_32_rd_t1(u8 *rd, struct hppa_jit_context *ctx) +{ + emit_hppa64_zext32(*rd, HPPA_REG_T2, ctx); + *rd = HPPA_REG_T2; + emit_zext_32(HPPA_REG_T1, ctx); +} + +static void emit_sext_32_rd(u8 *rd, struct hppa_jit_context *ctx) +{ + emit_hppa64_sext32(*rd, HPPA_REG_T2, ctx); + *rd = HPPA_REG_T2; +} + +static bool is_signed_bpf_cond(u8 cond) +{ + return cond == BPF_JSGT || cond == BPF_JSLT || + cond == BPF_JSGE || cond == BPF_JSLE; +} + +static void emit_call(u64 addr, bool fixed, struct hppa_jit_context *ctx) +{ + const int offset_sp = 2*FRAME_SIZE; + + emit(hppa_ldo(offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx); + + emit_hppa_copy(regmap[BPF_REG_1], HPPA_REG_ARG0, ctx); + emit_hppa_copy(regmap[BPF_REG_2], HPPA_REG_ARG1, ctx); + emit_hppa_copy(regmap[BPF_REG_3], HPPA_REG_ARG2, ctx); + emit_hppa_copy(regmap[BPF_REG_4], HPPA_REG_ARG3, ctx); + emit_hppa_copy(regmap[BPF_REG_5], HPPA_REG_ARG4, ctx); + + /* Backup TCC. */ + REG_FORCE_SEEN(ctx, HPPA_REG_TCC_SAVED); + if (REG_WAS_SEEN(ctx, HPPA_REG_TCC)) + emit(hppa_copy(HPPA_REG_TCC, HPPA_REG_TCC_SAVED), ctx); + + /* + * Use ldil() to load absolute address. Don't use emit_imm as the + * number of emitted instructions should not depend on the value of + * addr. + */ + WARN_ON(addr >> 32); + /* load function address and gp from Elf64_Fdesc descriptor */ + emit(hppa_ldil(addr, HPPA_REG_R31), ctx); + emit(hppa_ldo(im11(addr), HPPA_REG_R31, HPPA_REG_R31), ctx); + emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, addr), + HPPA_REG_R31, HPPA_REG_RP), ctx); + emit(hppa64_bve_l_rp(HPPA_REG_RP), ctx); + emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, gp), + HPPA_REG_R31, HPPA_REG_GP), ctx); + + /* Restore TCC. */ + if (REG_WAS_SEEN(ctx, HPPA_REG_TCC)) + emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_TCC), ctx); + + emit(hppa_ldo(-offset_sp, HPPA_REG_SP, HPPA_REG_SP), ctx); + + /* Set return value. */ + emit_hppa_copy(HPPA_REG_RET0, regmap[BPF_REG_0], ctx); +} + +static void emit_call_libgcc_ll(void *func, const s8 arg0, + const s8 arg1, u8 opcode, struct hppa_jit_context *ctx) +{ + u64 func_addr; + + if (BPF_CLASS(opcode) == BPF_ALU) { + emit_hppa64_zext32(arg0, HPPA_REG_ARG0, ctx); + emit_hppa64_zext32(arg1, HPPA_REG_ARG1, ctx); + } else { + emit_hppa_copy(arg0, HPPA_REG_ARG0, ctx); + emit_hppa_copy(arg1, HPPA_REG_ARG1, ctx); + } + + /* libcgcc overwrites HPPA_REG_RET0, so keep copy in HPPA_REG_TCC_SAVED */ + if (arg0 != HPPA_REG_RET0) { + REG_SET_SEEN(ctx, HPPA_REG_TCC_SAVED); + emit(hppa_copy(HPPA_REG_RET0, HPPA_REG_TCC_SAVED), ctx); + } + + /* set up stack */ + emit(hppa_ldo(FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx); + + func_addr = (uintptr_t) func; + /* load function func_address and gp from Elf64_Fdesc descriptor */ + emit_imm(HPPA_REG_R31, func_addr, arg0, ctx); + emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, addr), + HPPA_REG_R31, HPPA_REG_RP), ctx); + /* skip the following bve_l instruction if divisor is 0. */ + if (BPF_OP(opcode) == BPF_DIV || BPF_OP(opcode) == BPF_MOD) { + if (BPF_OP(opcode) == BPF_DIV) + emit_hppa_copy(HPPA_REG_ZERO, HPPA_REG_RET0, ctx); + else { + emit_hppa_copy(HPPA_REG_ARG0, HPPA_REG_RET0, ctx); + } + emit(hppa_beq(HPPA_REG_ARG1, HPPA_REG_ZERO, 2 - HPPA_BRANCH_DISPLACEMENT), ctx); + } + emit(hppa64_bve_l_rp(HPPA_REG_RP), ctx); + emit(hppa64_ldd_im16(offsetof(struct elf64_fdesc, gp), + HPPA_REG_R31, HPPA_REG_GP), ctx); + + emit(hppa_ldo(-FRAME_SIZE, HPPA_REG_SP, HPPA_REG_SP), ctx); + + emit_hppa_copy(HPPA_REG_RET0, arg0, ctx); + + /* restore HPPA_REG_RET0 */ + if (arg0 != HPPA_REG_RET0) + emit(hppa_copy(HPPA_REG_TCC_SAVED, HPPA_REG_RET0), ctx); +} + +static void emit_store(const s8 rd, const s8 rs, s16 off, + struct hppa_jit_context *ctx, const u8 size, + const u8 mode) +{ + s8 dstreg; + + /* need to calculate address since offset does not fit in 14 bits? */ + if (relative_bits_ok(off, 14)) + dstreg = rd; + else { + /* need to use R1 here, since addil puts result into R1 */ + dstreg = HPPA_REG_R1; + emit(hppa_addil(off, rd), ctx); + off = im11(off); + } + + switch (size) { + case BPF_B: + emit(hppa_stb(rs, off, dstreg), ctx); + break; + case BPF_H: + emit(hppa_sth(rs, off, dstreg), ctx); + break; + case BPF_W: + emit(hppa_stw(rs, off, dstreg), ctx); + break; + case BPF_DW: + if (off & 7) { + emit(hppa_ldo(off, dstreg, HPPA_REG_R1), ctx); + emit(hppa64_std_im5(rs, 0, HPPA_REG_R1), ctx); + } else if (off >= -16 && off <= 15) + emit(hppa64_std_im5(rs, off, dstreg), ctx); + else + emit(hppa64_std_im16(rs, off, dstreg), ctx); + break; + } +} + +int bpf_jit_emit_insn(const struct bpf_insn *insn, struct hppa_jit_context *ctx, + bool extra_pass) +{ + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || + BPF_CLASS(insn->code) == BPF_JMP; + int s, e, ret, i = insn - ctx->prog->insnsi; + s64 paoff; + struct bpf_prog_aux *aux = ctx->prog->aux; + u8 rd = -1, rs = -1, code = insn->code; + s16 off = insn->off; + s32 imm = insn->imm; + + init_regs(&rd, &rs, insn, ctx); + + switch (code) { + /* dst = src */ + case BPF_ALU | BPF_MOV | BPF_X: + case BPF_ALU64 | BPF_MOV | BPF_X: + if (imm == 1) { + /* Special mov32 for zext */ + emit_zext_32(rd, ctx); + break; + } + if (!is64 && !aux->verifier_zext) + emit_hppa64_zext32(rs, rd, ctx); + else + emit_hppa_copy(rs, rd, ctx); + break; + + /* dst = dst OP src */ + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_X: + emit(hppa_add(rd, rs, rd), ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_X: + emit(hppa_sub(rd, rs, rd), ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_AND | BPF_X: + emit(hppa_and(rd, rs, rd), ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + emit(hppa_or(rd, rs, rd), ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_XOR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + emit(hppa_xor(rd, rs, rd), ctx); + if (!is64 && !aux->verifier_zext && rs != rd) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_MUL | BPF_K: + case BPF_ALU64 | BPF_MUL | BPF_K: + emit_imm(HPPA_REG_T1, is64 ? (s64)(s32)imm : (u32)imm, HPPA_REG_T2, ctx); + rs = HPPA_REG_T1; + fallthrough; + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_X: + emit_call_libgcc_ll(__muldi3, rd, rs, code, ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_DIV | BPF_K: + emit_imm(HPPA_REG_T1, is64 ? (s64)(s32)imm : (u32)imm, HPPA_REG_T2, ctx); + rs = HPPA_REG_T1; + fallthrough; + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_X: + emit_call_libgcc_ll(&hppa_div64, rd, rs, code, ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_MOD | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_K: + emit_imm(HPPA_REG_T1, is64 ? (s64)(s32)imm : (u32)imm, HPPA_REG_T2, ctx); + rs = HPPA_REG_T1; + fallthrough; + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_X: + emit_call_libgcc_ll(&hppa_div64_rem, rd, rs, code, ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU64 | BPF_LSH | BPF_X: + emit_hppa64_sext32(rs, HPPA_REG_T0, ctx); + emit(hppa64_mtsarcm(HPPA_REG_T0), ctx); + if (is64) + emit(hppa64_depdz_sar(rd, rd), ctx); + else + emit(hppa_depwz_sar(rd, rd), ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU64 | BPF_RSH | BPF_X: + emit(hppa_mtsar(rs), ctx); + if (is64) + emit(hppa64_shrpd_sar(rd, rd), ctx); + else + emit(hppa_shrpw_sar(rd, rd), ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_ARSH | BPF_X: + case BPF_ALU64 | BPF_ARSH | BPF_X: + emit_hppa64_sext32(rs, HPPA_REG_T0, ctx); + emit(hppa64_mtsarcm(HPPA_REG_T0), ctx); + if (is64) + emit(hppa_extrd_sar(rd, rd, 1), ctx); + else + emit(hppa_extrws_sar(rd, rd), ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + + /* dst = -dst */ + case BPF_ALU | BPF_NEG: + case BPF_ALU64 | BPF_NEG: + emit(hppa_sub(HPPA_REG_ZERO, rd, rd), ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + + /* dst = BSWAP##imm(dst) */ + case BPF_ALU | BPF_END | BPF_FROM_BE: + switch (imm) { + case 16: + /* zero-extend 16 bits into 64 bits */ + emit_hppa64_depd(HPPA_REG_ZERO, 63-16, 64-16, rd, 1, ctx); + break; + case 32: + if (!aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case 64: + /* Do nothing */ + break; + } + break; + + case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm) { + case 16: + emit(hppa_extru(rd, 31 - 8, 8, HPPA_REG_T1), ctx); + emit(hppa_depwz(rd, 23, 8, HPPA_REG_T1), ctx); + emit(hppa_extru(HPPA_REG_T1, 31, 16, rd), ctx); + emit_hppa64_extrd(HPPA_REG_T1, 63, 16, rd, 0, ctx); + break; + case 32: + emit(hppa_shrpw(rd, rd, 16, HPPA_REG_T1), ctx); + emit_hppa64_depd(HPPA_REG_T1, 63-16, 8, HPPA_REG_T1, 1, ctx); + emit(hppa_shrpw(rd, HPPA_REG_T1, 8, HPPA_REG_T1), ctx); + emit_hppa64_extrd(HPPA_REG_T1, 63, 32, rd, 0, ctx); + break; + case 64: + emit(hppa64_permh_3210(rd, HPPA_REG_T1), ctx); + emit(hppa64_hshl(HPPA_REG_T1, 8, HPPA_REG_T2), ctx); + emit(hppa64_hshr_u(HPPA_REG_T1, 8, HPPA_REG_T1), ctx); + emit(hppa_or(HPPA_REG_T2, HPPA_REG_T1, rd), ctx); + break; + default: + pr_err("bpf-jit: BPF_END imm %d invalid\n", imm); + return -1; + } + break; + + /* dst = imm */ + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_MOV | BPF_K: + emit_imm(rd, imm, HPPA_REG_T2, ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + + /* dst = dst OP imm */ + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU64 | BPF_ADD | BPF_K: + if (relative_bits_ok(imm, 14)) { + emit(hppa_ldo(imm, rd, rd), ctx); + } else { + emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx); + emit(hppa_add(rd, HPPA_REG_T1, rd), ctx); + } + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU64 | BPF_SUB | BPF_K: + if (relative_bits_ok(-imm, 14)) { + emit(hppa_ldo(-imm, rd, rd), ctx); + } else { + emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx); + emit(hppa_sub(rd, HPPA_REG_T1, rd), ctx); + } + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx); + emit(hppa_and(rd, HPPA_REG_T1, rd), ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx); + emit(hppa_or(rd, HPPA_REG_T1, rd), ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx); + emit(hppa_xor(rd, HPPA_REG_T1, rd), ctx); + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_LSH | BPF_K: + if (imm != 0) { + emit_hppa64_shld(rd, imm, rd, ctx); + } + + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + if (imm != 0) { + if (is64) + emit_hppa64_shrd(rd, imm, rd, false, ctx); + else + emit_hppa64_shrw(rd, imm, rd, false, ctx); + } + + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + case BPF_ALU | BPF_ARSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + if (imm != 0) { + if (is64) + emit_hppa64_shrd(rd, imm, rd, true, ctx); + else + emit_hppa64_shrw(rd, imm, rd, true, ctx); + } + + if (!is64 && !aux->verifier_zext) + emit_zext_32(rd, ctx); + break; + + /* JUMP off */ + case BPF_JMP | BPF_JA: + paoff = hppa_offset(i, off, ctx); + ret = emit_jump(paoff, false, ctx); + if (ret) + return ret; + break; + + /* IF (dst COND src) JUMP off */ + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_X: + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_X: + paoff = hppa_offset(i, off, ctx); + if (!is64) { + s = ctx->ninsns; + if (is_signed_bpf_cond(BPF_OP(code))) + emit_sext_32_rd_rs(&rd, &rs, ctx); + else + emit_zext_32_rd_rs(&rd, &rs, ctx); + e = ctx->ninsns; + + /* Adjust for extra insns */ + paoff -= (e - s); + } + if (BPF_OP(code) == BPF_JSET) { + /* Adjust for and */ + paoff -= 1; + emit(hppa_and(rs, rd, HPPA_REG_T1), ctx); + emit_branch(BPF_JNE, HPPA_REG_T1, HPPA_REG_ZERO, paoff, + ctx); + } else { + emit_branch(BPF_OP(code), rd, rs, paoff, ctx); + } + break; + + /* IF (dst COND imm) JUMP off */ + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_K: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_K: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_K: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_K: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_K: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_K: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_K: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_K: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_K: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_K: + paoff = hppa_offset(i, off, ctx); + s = ctx->ninsns; + if (imm) { + emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx); + rs = HPPA_REG_T1; + } else { + rs = HPPA_REG_ZERO; + } + if (!is64) { + if (is_signed_bpf_cond(BPF_OP(code))) + emit_sext_32_rd(&rd, ctx); + else + emit_zext_32_rd_t1(&rd, ctx); + } + e = ctx->ninsns; + + /* Adjust for extra insns */ + paoff -= (e - s); + emit_branch(BPF_OP(code), rd, rs, paoff, ctx); + break; + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_K: + paoff = hppa_offset(i, off, ctx); + s = ctx->ninsns; + emit_imm(HPPA_REG_T1, imm, HPPA_REG_T2, ctx); + emit(hppa_and(HPPA_REG_T1, rd, HPPA_REG_T1), ctx); + /* For jset32, we should clear the upper 32 bits of t1, but + * sign-extension is sufficient here and saves one instruction, + * as t1 is used only in comparison against zero. + */ + if (!is64 && imm < 0) + emit_hppa64_sext32(HPPA_REG_T1, HPPA_REG_T1, ctx); + e = ctx->ninsns; + paoff -= (e - s); + emit_branch(BPF_JNE, HPPA_REG_T1, HPPA_REG_ZERO, paoff, ctx); + break; + /* function call */ + case BPF_JMP | BPF_CALL: + { + bool fixed_addr; + u64 addr; + + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, + &addr, &fixed_addr); + if (ret < 0) + return ret; + + REG_SET_SEEN_ALL(ctx); + emit_call(addr, fixed_addr, ctx); + break; + } + /* tail call */ + case BPF_JMP | BPF_TAIL_CALL: + emit_bpf_tail_call(i, ctx); + break; + + /* function return */ + case BPF_JMP | BPF_EXIT: + if (i == ctx->prog->len - 1) + break; + + paoff = epilogue_offset(ctx); + ret = emit_jump(paoff, false, ctx); + if (ret) + return ret; + break; + + /* dst = imm64 */ + case BPF_LD | BPF_IMM | BPF_DW: + { + struct bpf_insn insn1 = insn[1]; + u64 imm64 = (u64)insn1.imm << 32 | (u32)imm; + if (bpf_pseudo_func(insn)) + imm64 = (uintptr_t)dereference_function_descriptor((void*)imm64); + emit_imm(rd, imm64, HPPA_REG_T2, ctx); + + return 1; + } + + /* LDX: dst = *(size *)(src + off) */ + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: + case BPF_LDX | BPF_PROBE_MEM | BPF_B: + case BPF_LDX | BPF_PROBE_MEM | BPF_H: + case BPF_LDX | BPF_PROBE_MEM | BPF_W: + case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + { + u8 srcreg; + + /* need to calculate address since offset does not fit in 14 bits? */ + if (relative_bits_ok(off, 14)) + srcreg = rs; + else { + /* need to use R1 here, since addil puts result into R1 */ + srcreg = HPPA_REG_R1; + BUG_ON(rs == HPPA_REG_R1); + BUG_ON(rd == HPPA_REG_R1); + emit(hppa_addil(off, rs), ctx); + off = im11(off); + } + + switch (BPF_SIZE(code)) { + case BPF_B: + emit(hppa_ldb(off, srcreg, rd), ctx); + if (insn_is_zext(&insn[1])) + return 1; + break; + case BPF_H: + emit(hppa_ldh(off, srcreg, rd), ctx); + if (insn_is_zext(&insn[1])) + return 1; + break; + case BPF_W: + emit(hppa_ldw(off, srcreg, rd), ctx); + if (insn_is_zext(&insn[1])) + return 1; + break; + case BPF_DW: + if (off & 7) { + emit(hppa_ldo(off, srcreg, HPPA_REG_R1), ctx); + emit(hppa64_ldd_reg(HPPA_REG_ZERO, HPPA_REG_R1, rd), ctx); + } else if (off >= -16 && off <= 15) + emit(hppa64_ldd_im5(off, srcreg, rd), ctx); + else + emit(hppa64_ldd_im16(off, srcreg, rd), ctx); + break; + } + break; + } + /* speculation barrier */ + case BPF_ST | BPF_NOSPEC: + break; + + /* ST: *(size *)(dst + off) = imm */ + /* STX: *(size *)(dst + off) = src */ + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_DW: + + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_DW: + if (BPF_CLASS(code) == BPF_ST) { + emit_imm(HPPA_REG_T2, imm, HPPA_REG_T1, ctx); + rs = HPPA_REG_T2; + } + + emit_store(rd, rs, off, ctx, BPF_SIZE(code), BPF_MODE(code)); + break; + + case BPF_STX | BPF_ATOMIC | BPF_W: + case BPF_STX | BPF_ATOMIC | BPF_DW: + pr_info_once( + "bpf-jit: not supported: atomic operation %02x ***\n", + insn->imm); + return -EFAULT; + + default: + pr_err("bpf-jit: unknown opcode %02x\n", code); + return -EINVAL; + } + + return 0; +} + +void bpf_jit_build_prologue(struct hppa_jit_context *ctx) +{ + int bpf_stack_adjust, stack_adjust, i; + unsigned long addr; + s8 reg; + + /* + * stack on hppa grows up, so if tail calls are used we need to + * allocate the maximum stack size + */ + if (REG_ALL_SEEN(ctx)) + bpf_stack_adjust = MAX_BPF_STACK; + else + bpf_stack_adjust = ctx->prog->aux->stack_depth; + bpf_stack_adjust = round_up(bpf_stack_adjust, STACK_ALIGN); + + stack_adjust = FRAME_SIZE + bpf_stack_adjust; + stack_adjust = round_up(stack_adjust, STACK_ALIGN); + + /* + * NOTE: We construct an Elf64_Fdesc descriptor here. + * The first 4 words initialize the TCC and compares them. + * Then follows the virtual address of the eBPF function, + * and the gp for this function. + * + * The first instruction sets the tail-call-counter (TCC) register. + * This instruction is skipped by tail calls. + * Use a temporary register instead of a caller-saved register initially. + */ + REG_FORCE_SEEN(ctx, HPPA_REG_TCC_IN_INIT); + emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC_IN_INIT), ctx); + + /* + * Skip all initializations when called as BPF TAIL call. + */ + emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_R1), ctx); + emit(hppa_beq(HPPA_REG_TCC_IN_INIT, HPPA_REG_R1, 6 - HPPA_BRANCH_DISPLACEMENT), ctx); + emit(hppa64_bl_long(ctx->prologue_len - 3 - HPPA_BRANCH_DISPLACEMENT), ctx); + + /* store entry address of this eBPF function */ + addr = (uintptr_t) &ctx->insns[0]; + emit(addr >> 32, ctx); + emit(addr & 0xffffffff, ctx); + + /* store gp of this eBPF function */ + asm("copy %%r27,%0" : "=r" (addr) ); + emit(addr >> 32, ctx); + emit(addr & 0xffffffff, ctx); + + /* Set up hppa stack frame. */ + emit_hppa_copy(HPPA_REG_SP, HPPA_REG_R1, ctx); + emit(hppa_ldo(stack_adjust, HPPA_REG_SP, HPPA_REG_SP), ctx); + emit(hppa64_std_im5 (HPPA_REG_R1, -REG_SIZE, HPPA_REG_SP), ctx); + emit(hppa64_std_im16(HPPA_REG_RP, -2*REG_SIZE, HPPA_REG_SP), ctx); + + /* Save callee-save registers. */ + for (i = 3; i <= 15; i++) { + if (OPTIMIZE_HPPA && !REG_WAS_SEEN(ctx, HPPA_R(i))) + continue; + emit(hppa64_std_im16(HPPA_R(i), -REG_SIZE * i, HPPA_REG_SP), ctx); + } + + /* load function parameters; load all if we use tail functions */ + #define LOAD_PARAM(arg, dst) \ + if (REG_WAS_SEEN(ctx, regmap[dst]) || \ + REG_WAS_SEEN(ctx, HPPA_REG_TCC)) \ + emit_hppa_copy(arg, regmap[dst], ctx) + LOAD_PARAM(HPPA_REG_ARG0, BPF_REG_1); + LOAD_PARAM(HPPA_REG_ARG1, BPF_REG_2); + LOAD_PARAM(HPPA_REG_ARG2, BPF_REG_3); + LOAD_PARAM(HPPA_REG_ARG3, BPF_REG_4); + LOAD_PARAM(HPPA_REG_ARG4, BPF_REG_5); + #undef LOAD_PARAM + + REG_FORCE_SEEN(ctx, HPPA_REG_T0); + REG_FORCE_SEEN(ctx, HPPA_REG_T1); + REG_FORCE_SEEN(ctx, HPPA_REG_T2); + + /* + * Now really set the tail call counter (TCC) register. + */ + if (REG_WAS_SEEN(ctx, HPPA_REG_TCC)) + emit(hppa_ldi(MAX_TAIL_CALL_CNT, HPPA_REG_TCC), ctx); + + /* + * Save epilogue function pointer for outer TCC call chain. + * The main TCC call stores the final RP on stack. + */ + addr = (uintptr_t) &ctx->insns[ctx->epilogue_offset]; + /* skip first two instructions which jump to exit */ + addr += 2 * HPPA_INSN_SIZE; + emit_imm(HPPA_REG_T2, addr, HPPA_REG_T1, ctx); + emit(EXIT_PTR_STORE(HPPA_REG_T2), ctx); + + /* Set up BPF frame pointer. */ + reg = regmap[BPF_REG_FP]; /* -> HPPA_REG_FP */ + if (REG_WAS_SEEN(ctx, reg)) { + emit(hppa_ldo(-FRAME_SIZE, HPPA_REG_SP, reg), ctx); + } +} + +void bpf_jit_build_epilogue(struct hppa_jit_context *ctx) +{ + __build_epilogue(false, ctx); +} + +bool bpf_jit_supports_kfunc_call(void) +{ + return true; +} diff --git a/arch/parisc/net/bpf_jit_core.c b/arch/parisc/net/bpf_jit_core.c new file mode 100644 index 000000000000..d6ee2fd45550 --- /dev/null +++ b/arch/parisc/net/bpf_jit_core.c @@ -0,0 +1,201 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common functionality for HPPA32 and HPPA64 BPF JIT compilers + * + * Copyright (c) 2023 Helge Deller <deller@gmx.de> + * + */ + +#include <linux/bpf.h> +#include <linux/filter.h> +#include "bpf_jit.h" + +/* Number of iterations to try until offsets converge. */ +#define NR_JIT_ITERATIONS 35 + +static int build_body(struct hppa_jit_context *ctx, bool extra_pass, int *offset) +{ + const struct bpf_prog *prog = ctx->prog; + int i; + + ctx->reg_seen_collect = true; + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + int ret; + + ret = bpf_jit_emit_insn(insn, ctx, extra_pass); + /* BPF_LD | BPF_IMM | BPF_DW: skip the next instruction. */ + if (ret > 0) + i++; + if (offset) + offset[i] = ctx->ninsns; + if (ret < 0) + return ret; + } + ctx->reg_seen_collect = false; + return 0; +} + +bool bpf_jit_needs_zext(void) +{ + return true; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + unsigned int prog_size = 0, extable_size = 0; + bool tmp_blinded = false, extra_pass = false; + struct bpf_prog *tmp, *orig_prog = prog; + int pass = 0, prev_ninsns = 0, prologue_len, i; + struct hppa_jit_data *jit_data; + struct hppa_jit_context *ctx; + + if (!prog->jit_requested) + return orig_prog; + + tmp = bpf_jit_blind_constants(prog); + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + + ctx = &jit_data->ctx; + + if (ctx->offset) { + extra_pass = true; + prog_size = sizeof(*ctx->insns) * ctx->ninsns; + goto skip_init_ctx; + } + + ctx->prog = prog; + ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); + if (!ctx->offset) { + prog = orig_prog; + goto out_offset; + } + for (i = 0; i < prog->len; i++) { + prev_ninsns += 20; + ctx->offset[i] = prev_ninsns; + } + + for (i = 0; i < NR_JIT_ITERATIONS; i++) { + pass++; + ctx->ninsns = 0; + if (build_body(ctx, extra_pass, ctx->offset)) { + prog = orig_prog; + goto out_offset; + } + ctx->body_len = ctx->ninsns; + bpf_jit_build_prologue(ctx); + ctx->prologue_len = ctx->ninsns - ctx->body_len; + ctx->epilogue_offset = ctx->ninsns; + bpf_jit_build_epilogue(ctx); + + if (ctx->ninsns == prev_ninsns) { + if (jit_data->header) + break; + /* obtain the actual image size */ + extable_size = prog->aux->num_exentries * + sizeof(struct exception_table_entry); + prog_size = sizeof(*ctx->insns) * ctx->ninsns; + + jit_data->header = + bpf_jit_binary_alloc(prog_size + extable_size, + &jit_data->image, + sizeof(u32), + bpf_fill_ill_insns); + if (!jit_data->header) { + prog = orig_prog; + goto out_offset; + } + + ctx->insns = (u32 *)jit_data->image; + /* + * Now, when the image is allocated, the image can + * potentially shrink more (auipc/jalr -> jal). + */ + } + prev_ninsns = ctx->ninsns; + } + + if (i == NR_JIT_ITERATIONS) { + pr_err("bpf-jit: image did not converge in <%d passes!\n", i); + if (jit_data->header) + bpf_jit_binary_free(jit_data->header); + prog = orig_prog; + goto out_offset; + } + + if (extable_size) + prog->aux->extable = (void *)ctx->insns + prog_size; + +skip_init_ctx: + pass++; + ctx->ninsns = 0; + + bpf_jit_build_prologue(ctx); + if (build_body(ctx, extra_pass, NULL)) { + bpf_jit_binary_free(jit_data->header); + prog = orig_prog; + goto out_offset; + } + bpf_jit_build_epilogue(ctx); + + if (HPPA_JIT_DEBUG || bpf_jit_enable > 1) { + if (HPPA_JIT_DUMP) + bpf_jit_dump(prog->len, prog_size, pass, ctx->insns); + if (HPPA_JIT_REBOOT) + { extern int machine_restart(char *); machine_restart(""); } + } + + prog->bpf_func = (void *)ctx->insns; + prog->jited = 1; + prog->jited_len = prog_size; + + bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); + + if (!prog->is_func || extra_pass) { + bpf_jit_binary_lock_ro(jit_data->header); + prologue_len = ctx->epilogue_offset - ctx->body_len; + for (i = 0; i < prog->len; i++) + ctx->offset[i] += prologue_len; + bpf_prog_fill_jited_linfo(prog, ctx->offset); +out_offset: + kfree(ctx->offset); + kfree(jit_data); + prog->aux->jit_data = NULL; + } +out: + if (HPPA_JIT_REBOOT) + { extern int machine_restart(char *); machine_restart(""); } + + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + return prog; +} + +u64 hppa_div64(u64 div, u64 divisor) +{ + div = div64_u64(div, divisor); + return div; +} + +u64 hppa_div64_rem(u64 div, u64 divisor) +{ + u64 rem; + div64_u64_rem(div, divisor, &rem); + return rem; +} diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig index ad1872518992..f25024afdda5 100644 --- a/arch/powerpc/crypto/Kconfig +++ b/arch/powerpc/crypto/Kconfig @@ -111,4 +111,30 @@ config CRYPTO_AES_GCM_P10 Support for cryptographic acceleration instructions on Power10 or later CPU. This module supports stitched acceleration for AES/GCM. +config CRYPTO_CHACHA20_P10 + tristate "Ciphers: ChaCha20, XChacha20, XChacha12 (P10 or later)" + depends on PPC64 && CPU_LITTLE_ENDIAN + select CRYPTO_SKCIPHER + select CRYPTO_LIB_CHACHA_GENERIC + select CRYPTO_ARCH_HAVE_LIB_CHACHA + help + Length-preserving ciphers: ChaCha20, XChaCha20, and XChaCha12 + stream cipher algorithms + + Architecture: PowerPC64 + - Power10 or later + - Little-endian + +config CRYPTO_POLY1305_P10 + tristate "Hash functions: Poly1305 (P10 or later)" + depends on PPC64 && CPU_LITTLE_ENDIAN + select CRYPTO_HASH + select CRYPTO_LIB_POLY1305_GENERIC + help + Poly1305 authenticator algorithm (RFC7539) + + Architecture: PowerPC64 + - Power10 or later + - Little-endian + endmenu diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile index 7b4f516abec1..ebdac1b9eb9a 100644 --- a/arch/powerpc/crypto/Makefile +++ b/arch/powerpc/crypto/Makefile @@ -14,6 +14,8 @@ obj-$(CONFIG_CRYPTO_CRC32C_VPMSUM) += crc32c-vpmsum.o obj-$(CONFIG_CRYPTO_CRCT10DIF_VPMSUM) += crct10dif-vpmsum.o obj-$(CONFIG_CRYPTO_VPMSUM_TESTER) += crc-vpmsum_test.o obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o +obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o +obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o aes-spe-glue.o md5-ppc-y := md5-asm.o md5-glue.o @@ -23,6 +25,8 @@ sha256-ppc-spe-y := sha256-spe-asm.o sha256-spe-glue.o crc32c-vpmsum-y := crc32c-vpmsum_asm.o crc32c-vpmsum_glue.o crct10dif-vpmsum-y := crct10dif-vpmsum_asm.o crct10dif-vpmsum_glue.o aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o ghashp10-ppc.o aesp10-ppc.o +chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o +poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o quiet_cmd_perl = PERL $@ cmd_perl = $(PERL) $< $(if $(CONFIG_CPU_LITTLE_ENDIAN), linux-ppc64le, linux-ppc64) > $@ diff --git a/arch/powerpc/crypto/chacha-p10-glue.c b/arch/powerpc/crypto/chacha-p10-glue.c new file mode 100644 index 000000000000..74fb86b0d209 --- /dev/null +++ b/arch/powerpc/crypto/chacha-p10-glue.c @@ -0,0 +1,221 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * PowerPC P10 (ppc64le) accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) + * + * Copyright 2023- IBM Corp. All rights reserved. + */ + +#include <crypto/algapi.h> +#include <crypto/internal/chacha.h> +#include <crypto/internal/simd.h> +#include <crypto/internal/skcipher.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/cpufeature.h> +#include <linux/sizes.h> +#include <asm/simd.h> +#include <asm/switch_to.h> + +asmlinkage void chacha_p10le_8x(u32 *state, u8 *dst, const u8 *src, + unsigned int len, int nrounds); + +static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_p10); + +static void vsx_begin(void) +{ + preempt_disable(); + enable_kernel_vsx(); +} + +static void vsx_end(void) +{ + disable_kernel_vsx(); + preempt_enable(); +} + +static void chacha_p10_do_8x(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds) +{ + unsigned int l = bytes & ~0x0FF; + + if (l > 0) { + chacha_p10le_8x(state, dst, src, l, nrounds); + bytes -= l; + src += l; + dst += l; + state[12] += l / CHACHA_BLOCK_SIZE; + } + + if (bytes > 0) + chacha_crypt_generic(state, dst, src, bytes, nrounds); +} + +void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds) +{ + hchacha_block_generic(state, stream, nrounds); +} +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) +{ + chacha_init_generic(state, key, iv); +} +EXPORT_SYMBOL(chacha_init_arch); + +void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, unsigned int bytes, + int nrounds) +{ + if (!static_branch_likely(&have_p10) || bytes <= CHACHA_BLOCK_SIZE || + !crypto_simd_usable()) + return chacha_crypt_generic(state, dst, src, bytes, nrounds); + + do { + unsigned int todo = min_t(unsigned int, bytes, SZ_4K); + + vsx_begin(); + chacha_p10_do_8x(state, dst, src, todo, nrounds); + vsx_end(); + + bytes -= todo; + src += todo; + dst += todo; + } while (bytes); +} +EXPORT_SYMBOL(chacha_crypt_arch); + +static int chacha_p10_stream_xor(struct skcipher_request *req, + const struct chacha_ctx *ctx, const u8 *iv) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + if (err) + return err; + + chacha_init_generic(state, ctx->key, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = rounddown(nbytes, walk.stride); + + if (!crypto_simd_usable()) { + chacha_crypt_generic(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, + ctx->nrounds); + } else { + vsx_begin(); + chacha_p10_do_8x(state, walk.dst.virt.addr, + walk.src.virt.addr, nbytes, ctx->nrounds); + vsx_end(); + } + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + if (err) + break; + } + + return err; +} + +static int chacha_p10(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + return chacha_p10_stream_xor(req, ctx, req->iv); +} + +static int xchacha_p10(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + chacha_init_generic(state, ctx->key, req->iv); + hchacha_block_arch(state, subctx.key, ctx->nrounds); + subctx.nrounds = ctx->nrounds; + + memcpy(&real_iv[0], req->iv + 24, 8); + memcpy(&real_iv[8], req->iv + 16, 8); + return chacha_p10_stream_xor(req, &subctx, real_iv); +} + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-p10", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = chacha_p10, + .decrypt = chacha_p10, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-p10", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = xchacha_p10, + .decrypt = xchacha_p10, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-p10", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha12_setkey, + .encrypt = xchacha_p10, + .decrypt = xchacha_p10, + } +}; + +static int __init chacha_p10_init(void) +{ + static_branch_enable(&have_p10); + + return crypto_register_skciphers(algs, ARRAY_SIZE(algs)); +} + +static void __exit chacha_p10_exit(void) +{ + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +} + +module_cpu_feature_match(PPC_MODULE_FEATURE_P10, chacha_p10_init); +module_exit(chacha_p10_exit); + +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (P10 accelerated)"); +MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-p10"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-p10"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-p10"); diff --git a/arch/powerpc/crypto/chacha-p10le-8x.S b/arch/powerpc/crypto/chacha-p10le-8x.S new file mode 100644 index 000000000000..17bedb66b822 --- /dev/null +++ b/arch/powerpc/crypto/chacha-p10le-8x.S @@ -0,0 +1,842 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +# +# Accelerated chacha20 implementation for ppc64le. +# +# Copyright 2023- IBM Corp. All rights reserved +# +#=================================================================================== +# Written by Danny Tsen <dtsen@us.ibm.com> +# +# chacha_p10le_8x(u32 *state, byte *dst, const byte *src, +# size_t len, int nrounds); +# +# do rounds, 8 quarter rounds +# 1. a += b; d ^= a; d <<<= 16; +# 2. c += d; b ^= c; b <<<= 12; +# 3. a += b; d ^= a; d <<<= 8; +# 4. c += d; b ^= c; b <<<= 7 +# +# row1 = (row1 + row2), row4 = row1 xor row4, row4 rotate each word by 16 +# row3 = (row3 + row4), row2 = row3 xor row2, row2 rotate each word by 12 +# row1 = (row1 + row2), row4 = row1 xor row4, row4 rotate each word by 8 +# row3 = (row3 + row4), row2 = row3 xor row2, row2 rotate each word by 7 +# +# 4 blocks (a b c d) +# +# a0 b0 c0 d0 +# a1 b1 c1 d1 +# ... +# a4 b4 c4 d4 +# ... +# a8 b8 c8 d8 +# ... +# a12 b12 c12 d12 +# a13 ... +# a14 ... +# a15 b15 c15 d15 +# +# Column round (v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) +# Diagnal round (v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) +# + +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/asm-compat.h> +#include <linux/linkage.h> + +.machine "any" +.text + +.macro SAVE_GPR GPR OFFSET FRAME + std \GPR,\OFFSET(\FRAME) +.endm + +.macro SAVE_VRS VRS OFFSET FRAME + li 16, \OFFSET + stvx \VRS, 16, \FRAME +.endm + +.macro SAVE_VSX VSX OFFSET FRAME + li 16, \OFFSET + stxvx \VSX, 16, \FRAME +.endm + +.macro RESTORE_GPR GPR OFFSET FRAME + ld \GPR,\OFFSET(\FRAME) +.endm + +.macro RESTORE_VRS VRS OFFSET FRAME + li 16, \OFFSET + lvx \VRS, 16, \FRAME +.endm + +.macro RESTORE_VSX VSX OFFSET FRAME + li 16, \OFFSET + lxvx \VSX, 16, \FRAME +.endm + +.macro SAVE_REGS + mflr 0 + std 0, 16(1) + stdu 1,-752(1) + + SAVE_GPR 14, 112, 1 + SAVE_GPR 15, 120, 1 + SAVE_GPR 16, 128, 1 + SAVE_GPR 17, 136, 1 + SAVE_GPR 18, 144, 1 + SAVE_GPR 19, 152, 1 + SAVE_GPR 20, 160, 1 + SAVE_GPR 21, 168, 1 + SAVE_GPR 22, 176, 1 + SAVE_GPR 23, 184, 1 + SAVE_GPR 24, 192, 1 + SAVE_GPR 25, 200, 1 + SAVE_GPR 26, 208, 1 + SAVE_GPR 27, 216, 1 + SAVE_GPR 28, 224, 1 + SAVE_GPR 29, 232, 1 + SAVE_GPR 30, 240, 1 + SAVE_GPR 31, 248, 1 + + addi 9, 1, 256 + SAVE_VRS 20, 0, 9 + SAVE_VRS 21, 16, 9 + SAVE_VRS 22, 32, 9 + SAVE_VRS 23, 48, 9 + SAVE_VRS 24, 64, 9 + SAVE_VRS 25, 80, 9 + SAVE_VRS 26, 96, 9 + SAVE_VRS 27, 112, 9 + SAVE_VRS 28, 128, 9 + SAVE_VRS 29, 144, 9 + SAVE_VRS 30, 160, 9 + SAVE_VRS 31, 176, 9 + + SAVE_VSX 14, 192, 9 + SAVE_VSX 15, 208, 9 + SAVE_VSX 16, 224, 9 + SAVE_VSX 17, 240, 9 + SAVE_VSX 18, 256, 9 + SAVE_VSX 19, 272, 9 + SAVE_VSX 20, 288, 9 + SAVE_VSX 21, 304, 9 + SAVE_VSX 22, 320, 9 + SAVE_VSX 23, 336, 9 + SAVE_VSX 24, 352, 9 + SAVE_VSX 25, 368, 9 + SAVE_VSX 26, 384, 9 + SAVE_VSX 27, 400, 9 + SAVE_VSX 28, 416, 9 + SAVE_VSX 29, 432, 9 + SAVE_VSX 30, 448, 9 + SAVE_VSX 31, 464, 9 +.endm # SAVE_REGS + +.macro RESTORE_REGS + addi 9, 1, 256 + RESTORE_VRS 20, 0, 9 + RESTORE_VRS 21, 16, 9 + RESTORE_VRS 22, 32, 9 + RESTORE_VRS 23, 48, 9 + RESTORE_VRS 24, 64, 9 + RESTORE_VRS 25, 80, 9 + RESTORE_VRS 26, 96, 9 + RESTORE_VRS 27, 112, 9 + RESTORE_VRS 28, 128, 9 + RESTORE_VRS 29, 144, 9 + RESTORE_VRS 30, 160, 9 + RESTORE_VRS 31, 176, 9 + + RESTORE_VSX 14, 192, 9 + RESTORE_VSX 15, 208, 9 + RESTORE_VSX 16, 224, 9 + RESTORE_VSX 17, 240, 9 + RESTORE_VSX 18, 256, 9 + RESTORE_VSX 19, 272, 9 + RESTORE_VSX 20, 288, 9 + RESTORE_VSX 21, 304, 9 + RESTORE_VSX 22, 320, 9 + RESTORE_VSX 23, 336, 9 + RESTORE_VSX 24, 352, 9 + RESTORE_VSX 25, 368, 9 + RESTORE_VSX 26, 384, 9 + RESTORE_VSX 27, 400, 9 + RESTORE_VSX 28, 416, 9 + RESTORE_VSX 29, 432, 9 + RESTORE_VSX 30, 448, 9 + RESTORE_VSX 31, 464, 9 + + RESTORE_GPR 14, 112, 1 + RESTORE_GPR 15, 120, 1 + RESTORE_GPR 16, 128, 1 + RESTORE_GPR 17, 136, 1 + RESTORE_GPR 18, 144, 1 + RESTORE_GPR 19, 152, 1 + RESTORE_GPR 20, 160, 1 + RESTORE_GPR 21, 168, 1 + RESTORE_GPR 22, 176, 1 + RESTORE_GPR 23, 184, 1 + RESTORE_GPR 24, 192, 1 + RESTORE_GPR 25, 200, 1 + RESTORE_GPR 26, 208, 1 + RESTORE_GPR 27, 216, 1 + RESTORE_GPR 28, 224, 1 + RESTORE_GPR 29, 232, 1 + RESTORE_GPR 30, 240, 1 + RESTORE_GPR 31, 248, 1 + + addi 1, 1, 752 + ld 0, 16(1) + mtlr 0 +.endm # RESTORE_REGS + +.macro QT_loop_8x + # QR(v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) + xxlor 0, 32+25, 32+25 + xxlor 32+25, 20, 20 + vadduwm 0, 0, 4 + vadduwm 1, 1, 5 + vadduwm 2, 2, 6 + vadduwm 3, 3, 7 + vadduwm 16, 16, 20 + vadduwm 17, 17, 21 + vadduwm 18, 18, 22 + vadduwm 19, 19, 23 + + vpermxor 12, 12, 0, 25 + vpermxor 13, 13, 1, 25 + vpermxor 14, 14, 2, 25 + vpermxor 15, 15, 3, 25 + vpermxor 28, 28, 16, 25 + vpermxor 29, 29, 17, 25 + vpermxor 30, 30, 18, 25 + vpermxor 31, 31, 19, 25 + xxlor 32+25, 0, 0 + vadduwm 8, 8, 12 + vadduwm 9, 9, 13 + vadduwm 10, 10, 14 + vadduwm 11, 11, 15 + vadduwm 24, 24, 28 + vadduwm 25, 25, 29 + vadduwm 26, 26, 30 + vadduwm 27, 27, 31 + vxor 4, 4, 8 + vxor 5, 5, 9 + vxor 6, 6, 10 + vxor 7, 7, 11 + vxor 20, 20, 24 + vxor 21, 21, 25 + vxor 22, 22, 26 + vxor 23, 23, 27 + + xxlor 0, 32+25, 32+25 + xxlor 32+25, 21, 21 + vrlw 4, 4, 25 # + vrlw 5, 5, 25 + vrlw 6, 6, 25 + vrlw 7, 7, 25 + vrlw 20, 20, 25 # + vrlw 21, 21, 25 + vrlw 22, 22, 25 + vrlw 23, 23, 25 + xxlor 32+25, 0, 0 + vadduwm 0, 0, 4 + vadduwm 1, 1, 5 + vadduwm 2, 2, 6 + vadduwm 3, 3, 7 + vadduwm 16, 16, 20 + vadduwm 17, 17, 21 + vadduwm 18, 18, 22 + vadduwm 19, 19, 23 + + xxlor 0, 32+25, 32+25 + xxlor 32+25, 22, 22 + vpermxor 12, 12, 0, 25 + vpermxor 13, 13, 1, 25 + vpermxor 14, 14, 2, 25 + vpermxor 15, 15, 3, 25 + vpermxor 28, 28, 16, 25 + vpermxor 29, 29, 17, 25 + vpermxor 30, 30, 18, 25 + vpermxor 31, 31, 19, 25 + xxlor 32+25, 0, 0 + vadduwm 8, 8, 12 + vadduwm 9, 9, 13 + vadduwm 10, 10, 14 + vadduwm 11, 11, 15 + vadduwm 24, 24, 28 + vadduwm 25, 25, 29 + vadduwm 26, 26, 30 + vadduwm 27, 27, 31 + xxlor 0, 32+28, 32+28 + xxlor 32+28, 23, 23 + vxor 4, 4, 8 + vxor 5, 5, 9 + vxor 6, 6, 10 + vxor 7, 7, 11 + vxor 20, 20, 24 + vxor 21, 21, 25 + vxor 22, 22, 26 + vxor 23, 23, 27 + vrlw 4, 4, 28 # + vrlw 5, 5, 28 + vrlw 6, 6, 28 + vrlw 7, 7, 28 + vrlw 20, 20, 28 # + vrlw 21, 21, 28 + vrlw 22, 22, 28 + vrlw 23, 23, 28 + xxlor 32+28, 0, 0 + + # QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) + xxlor 0, 32+25, 32+25 + xxlor 32+25, 20, 20 + vadduwm 0, 0, 5 + vadduwm 1, 1, 6 + vadduwm 2, 2, 7 + vadduwm 3, 3, 4 + vadduwm 16, 16, 21 + vadduwm 17, 17, 22 + vadduwm 18, 18, 23 + vadduwm 19, 19, 20 + + vpermxor 15, 15, 0, 25 + vpermxor 12, 12, 1, 25 + vpermxor 13, 13, 2, 25 + vpermxor 14, 14, 3, 25 + vpermxor 31, 31, 16, 25 + vpermxor 28, 28, 17, 25 + vpermxor 29, 29, 18, 25 + vpermxor 30, 30, 19, 25 + + xxlor 32+25, 0, 0 + vadduwm 10, 10, 15 + vadduwm 11, 11, 12 + vadduwm 8, 8, 13 + vadduwm 9, 9, 14 + vadduwm 26, 26, 31 + vadduwm 27, 27, 28 + vadduwm 24, 24, 29 + vadduwm 25, 25, 30 + vxor 5, 5, 10 + vxor 6, 6, 11 + vxor 7, 7, 8 + vxor 4, 4, 9 + vxor 21, 21, 26 + vxor 22, 22, 27 + vxor 23, 23, 24 + vxor 20, 20, 25 + + xxlor 0, 32+25, 32+25 + xxlor 32+25, 21, 21 + vrlw 5, 5, 25 + vrlw 6, 6, 25 + vrlw 7, 7, 25 + vrlw 4, 4, 25 + vrlw 21, 21, 25 + vrlw 22, 22, 25 + vrlw 23, 23, 25 + vrlw 20, 20, 25 + xxlor 32+25, 0, 0 + + vadduwm 0, 0, 5 + vadduwm 1, 1, 6 + vadduwm 2, 2, 7 + vadduwm 3, 3, 4 + vadduwm 16, 16, 21 + vadduwm 17, 17, 22 + vadduwm 18, 18, 23 + vadduwm 19, 19, 20 + + xxlor 0, 32+25, 32+25 + xxlor 32+25, 22, 22 + vpermxor 15, 15, 0, 25 + vpermxor 12, 12, 1, 25 + vpermxor 13, 13, 2, 25 + vpermxor 14, 14, 3, 25 + vpermxor 31, 31, 16, 25 + vpermxor 28, 28, 17, 25 + vpermxor 29, 29, 18, 25 + vpermxor 30, 30, 19, 25 + xxlor 32+25, 0, 0 + + vadduwm 10, 10, 15 + vadduwm 11, 11, 12 + vadduwm 8, 8, 13 + vadduwm 9, 9, 14 + vadduwm 26, 26, 31 + vadduwm 27, 27, 28 + vadduwm 24, 24, 29 + vadduwm 25, 25, 30 + + xxlor 0, 32+28, 32+28 + xxlor 32+28, 23, 23 + vxor 5, 5, 10 + vxor 6, 6, 11 + vxor 7, 7, 8 + vxor 4, 4, 9 + vxor 21, 21, 26 + vxor 22, 22, 27 + vxor 23, 23, 24 + vxor 20, 20, 25 + vrlw 5, 5, 28 + vrlw 6, 6, 28 + vrlw 7, 7, 28 + vrlw 4, 4, 28 + vrlw 21, 21, 28 + vrlw 22, 22, 28 + vrlw 23, 23, 28 + vrlw 20, 20, 28 + xxlor 32+28, 0, 0 +.endm + +.macro QT_loop_4x + # QR(v0, v4, v8, v12, v1, v5, v9, v13, v2, v6, v10, v14, v3, v7, v11, v15) + vadduwm 0, 0, 4 + vadduwm 1, 1, 5 + vadduwm 2, 2, 6 + vadduwm 3, 3, 7 + vpermxor 12, 12, 0, 20 + vpermxor 13, 13, 1, 20 + vpermxor 14, 14, 2, 20 + vpermxor 15, 15, 3, 20 + vadduwm 8, 8, 12 + vadduwm 9, 9, 13 + vadduwm 10, 10, 14 + vadduwm 11, 11, 15 + vxor 4, 4, 8 + vxor 5, 5, 9 + vxor 6, 6, 10 + vxor 7, 7, 11 + vrlw 4, 4, 21 + vrlw 5, 5, 21 + vrlw 6, 6, 21 + vrlw 7, 7, 21 + vadduwm 0, 0, 4 + vadduwm 1, 1, 5 + vadduwm 2, 2, 6 + vadduwm 3, 3, 7 + vpermxor 12, 12, 0, 22 + vpermxor 13, 13, 1, 22 + vpermxor 14, 14, 2, 22 + vpermxor 15, 15, 3, 22 + vadduwm 8, 8, 12 + vadduwm 9, 9, 13 + vadduwm 10, 10, 14 + vadduwm 11, 11, 15 + vxor 4, 4, 8 + vxor 5, 5, 9 + vxor 6, 6, 10 + vxor 7, 7, 11 + vrlw 4, 4, 23 + vrlw 5, 5, 23 + vrlw 6, 6, 23 + vrlw 7, 7, 23 + + # QR(v0, v5, v10, v15, v1, v6, v11, v12, v2, v7, v8, v13, v3, v4, v9, v14) + vadduwm 0, 0, 5 + vadduwm 1, 1, 6 + vadduwm 2, 2, 7 + vadduwm 3, 3, 4 + vpermxor 15, 15, 0, 20 + vpermxor 12, 12, 1, 20 + vpermxor 13, 13, 2, 20 + vpermxor 14, 14, 3, 20 + vadduwm 10, 10, 15 + vadduwm 11, 11, 12 + vadduwm 8, 8, 13 + vadduwm 9, 9, 14 + vxor 5, 5, 10 + vxor 6, 6, 11 + vxor 7, 7, 8 + vxor 4, 4, 9 + vrlw 5, 5, 21 + vrlw 6, 6, 21 + vrlw 7, 7, 21 + vrlw 4, 4, 21 + vadduwm 0, 0, 5 + vadduwm 1, 1, 6 + vadduwm 2, 2, 7 + vadduwm 3, 3, 4 + vpermxor 15, 15, 0, 22 + vpermxor 12, 12, 1, 22 + vpermxor 13, 13, 2, 22 + vpermxor 14, 14, 3, 22 + vadduwm 10, 10, 15 + vadduwm 11, 11, 12 + vadduwm 8, 8, 13 + vadduwm 9, 9, 14 + vxor 5, 5, 10 + vxor 6, 6, 11 + vxor 7, 7, 8 + vxor 4, 4, 9 + vrlw 5, 5, 23 + vrlw 6, 6, 23 + vrlw 7, 7, 23 + vrlw 4, 4, 23 +.endm + +# Transpose +.macro TP_4x a0 a1 a2 a3 + xxmrghw 10, 32+\a0, 32+\a1 # a0, a1, b0, b1 + xxmrghw 11, 32+\a2, 32+\a3 # a2, a3, b2, b3 + xxmrglw 12, 32+\a0, 32+\a1 # c0, c1, d0, d1 + xxmrglw 13, 32+\a2, 32+\a3 # c2, c3, d2, d3 + xxpermdi 32+\a0, 10, 11, 0 # a0, a1, a2, a3 + xxpermdi 32+\a1, 10, 11, 3 # b0, b1, b2, b3 + xxpermdi 32+\a2, 12, 13, 0 # c0, c1, c2, c3 + xxpermdi 32+\a3, 12, 13, 3 # d0, d1, d2, d3 +.endm + +# key stream = working state + state +.macro Add_state S + vadduwm \S+0, \S+0, 16-\S + vadduwm \S+4, \S+4, 17-\S + vadduwm \S+8, \S+8, 18-\S + vadduwm \S+12, \S+12, 19-\S + + vadduwm \S+1, \S+1, 16-\S + vadduwm \S+5, \S+5, 17-\S + vadduwm \S+9, \S+9, 18-\S + vadduwm \S+13, \S+13, 19-\S + + vadduwm \S+2, \S+2, 16-\S + vadduwm \S+6, \S+6, 17-\S + vadduwm \S+10, \S+10, 18-\S + vadduwm \S+14, \S+14, 19-\S + + vadduwm \S+3, \S+3, 16-\S + vadduwm \S+7, \S+7, 17-\S + vadduwm \S+11, \S+11, 18-\S + vadduwm \S+15, \S+15, 19-\S +.endm + +# +# write 256 bytes +# +.macro Write_256 S + add 9, 14, 5 + add 16, 14, 4 + lxvw4x 0, 0, 9 + lxvw4x 1, 17, 9 + lxvw4x 2, 18, 9 + lxvw4x 3, 19, 9 + lxvw4x 4, 20, 9 + lxvw4x 5, 21, 9 + lxvw4x 6, 22, 9 + lxvw4x 7, 23, 9 + lxvw4x 8, 24, 9 + lxvw4x 9, 25, 9 + lxvw4x 10, 26, 9 + lxvw4x 11, 27, 9 + lxvw4x 12, 28, 9 + lxvw4x 13, 29, 9 + lxvw4x 14, 30, 9 + lxvw4x 15, 31, 9 + + xxlxor \S+32, \S+32, 0 + xxlxor \S+36, \S+36, 1 + xxlxor \S+40, \S+40, 2 + xxlxor \S+44, \S+44, 3 + xxlxor \S+33, \S+33, 4 + xxlxor \S+37, \S+37, 5 + xxlxor \S+41, \S+41, 6 + xxlxor \S+45, \S+45, 7 + xxlxor \S+34, \S+34, 8 + xxlxor \S+38, \S+38, 9 + xxlxor \S+42, \S+42, 10 + xxlxor \S+46, \S+46, 11 + xxlxor \S+35, \S+35, 12 + xxlxor \S+39, \S+39, 13 + xxlxor \S+43, \S+43, 14 + xxlxor \S+47, \S+47, 15 + + stxvw4x \S+32, 0, 16 + stxvw4x \S+36, 17, 16 + stxvw4x \S+40, 18, 16 + stxvw4x \S+44, 19, 16 + + stxvw4x \S+33, 20, 16 + stxvw4x \S+37, 21, 16 + stxvw4x \S+41, 22, 16 + stxvw4x \S+45, 23, 16 + + stxvw4x \S+34, 24, 16 + stxvw4x \S+38, 25, 16 + stxvw4x \S+42, 26, 16 + stxvw4x \S+46, 27, 16 + + stxvw4x \S+35, 28, 16 + stxvw4x \S+39, 29, 16 + stxvw4x \S+43, 30, 16 + stxvw4x \S+47, 31, 16 + +.endm + +# +# chacha20_p10le_8x(u32 *state, byte *dst, const byte *src, size_t len, int nrounds); +# +SYM_FUNC_START(chacha_p10le_8x) +.align 5 + cmpdi 6, 0 + ble Out_no_chacha + + SAVE_REGS + + # r17 - r31 mainly for Write_256 macro. + li 17, 16 + li 18, 32 + li 19, 48 + li 20, 64 + li 21, 80 + li 22, 96 + li 23, 112 + li 24, 128 + li 25, 144 + li 26, 160 + li 27, 176 + li 28, 192 + li 29, 208 + li 30, 224 + li 31, 240 + + mr 15, 6 # len + li 14, 0 # offset to inp and outp + + lxvw4x 48, 0, 3 # vr16, constants + lxvw4x 49, 17, 3 # vr17, key 1 + lxvw4x 50, 18, 3 # vr18, key 2 + lxvw4x 51, 19, 3 # vr19, counter, nonce + + # create (0, 1, 2, 3) counters + vspltisw 0, 0 + vspltisw 1, 1 + vspltisw 2, 2 + vspltisw 3, 3 + vmrghw 4, 0, 1 + vmrglw 5, 2, 3 + vsldoi 30, 4, 5, 8 # vr30 counter, 4 (0, 1, 2, 3) + + vspltisw 21, 12 + vspltisw 23, 7 + + addis 11, 2, permx@toc@ha + addi 11, 11, permx@toc@l + lxvw4x 32+20, 0, 11 + lxvw4x 32+22, 17, 11 + + sradi 8, 7, 1 + + mtctr 8 + + # save constants to vsx + xxlor 16, 48, 48 + xxlor 17, 49, 49 + xxlor 18, 50, 50 + xxlor 19, 51, 51 + + vspltisw 25, 4 + vspltisw 26, 8 + + xxlor 25, 32+26, 32+26 + xxlor 24, 32+25, 32+25 + + vadduwm 31, 30, 25 # counter = (0, 1, 2, 3) + (4, 4, 4, 4) + xxlor 30, 32+30, 32+30 + xxlor 31, 32+31, 32+31 + + xxlor 20, 32+20, 32+20 + xxlor 21, 32+21, 32+21 + xxlor 22, 32+22, 32+22 + xxlor 23, 32+23, 32+23 + + cmpdi 6, 512 + blt Loop_last + +Loop_8x: + xxspltw 32+0, 16, 0 + xxspltw 32+1, 16, 1 + xxspltw 32+2, 16, 2 + xxspltw 32+3, 16, 3 + + xxspltw 32+4, 17, 0 + xxspltw 32+5, 17, 1 + xxspltw 32+6, 17, 2 + xxspltw 32+7, 17, 3 + xxspltw 32+8, 18, 0 + xxspltw 32+9, 18, 1 + xxspltw 32+10, 18, 2 + xxspltw 32+11, 18, 3 + xxspltw 32+12, 19, 0 + xxspltw 32+13, 19, 1 + xxspltw 32+14, 19, 2 + xxspltw 32+15, 19, 3 + vadduwm 12, 12, 30 # increase counter + + xxspltw 32+16, 16, 0 + xxspltw 32+17, 16, 1 + xxspltw 32+18, 16, 2 + xxspltw 32+19, 16, 3 + + xxspltw 32+20, 17, 0 + xxspltw 32+21, 17, 1 + xxspltw 32+22, 17, 2 + xxspltw 32+23, 17, 3 + xxspltw 32+24, 18, 0 + xxspltw 32+25, 18, 1 + xxspltw 32+26, 18, 2 + xxspltw 32+27, 18, 3 + xxspltw 32+28, 19, 0 + xxspltw 32+29, 19, 1 + vadduwm 28, 28, 31 # increase counter + xxspltw 32+30, 19, 2 + xxspltw 32+31, 19, 3 + +.align 5 +quarter_loop_8x: + QT_loop_8x + + bdnz quarter_loop_8x + + xxlor 0, 32+30, 32+30 + xxlor 32+30, 30, 30 + vadduwm 12, 12, 30 + xxlor 32+30, 0, 0 + TP_4x 0, 1, 2, 3 + TP_4x 4, 5, 6, 7 + TP_4x 8, 9, 10, 11 + TP_4x 12, 13, 14, 15 + + xxlor 0, 48, 48 + xxlor 1, 49, 49 + xxlor 2, 50, 50 + xxlor 3, 51, 51 + xxlor 48, 16, 16 + xxlor 49, 17, 17 + xxlor 50, 18, 18 + xxlor 51, 19, 19 + Add_state 0 + xxlor 48, 0, 0 + xxlor 49, 1, 1 + xxlor 50, 2, 2 + xxlor 51, 3, 3 + Write_256 0 + addi 14, 14, 256 # offset +=256 + addi 15, 15, -256 # len -=256 + + xxlor 5, 32+31, 32+31 + xxlor 32+31, 31, 31 + vadduwm 28, 28, 31 + xxlor 32+31, 5, 5 + TP_4x 16+0, 16+1, 16+2, 16+3 + TP_4x 16+4, 16+5, 16+6, 16+7 + TP_4x 16+8, 16+9, 16+10, 16+11 + TP_4x 16+12, 16+13, 16+14, 16+15 + + xxlor 32, 16, 16 + xxlor 33, 17, 17 + xxlor 34, 18, 18 + xxlor 35, 19, 19 + Add_state 16 + Write_256 16 + addi 14, 14, 256 # offset +=256 + addi 15, 15, -256 # len +=256 + + xxlor 32+24, 24, 24 + xxlor 32+25, 25, 25 + xxlor 32+30, 30, 30 + vadduwm 30, 30, 25 + vadduwm 31, 30, 24 + xxlor 30, 32+30, 32+30 + xxlor 31, 32+31, 32+31 + + cmpdi 15, 0 + beq Out_loop + + cmpdi 15, 512 + blt Loop_last + + mtctr 8 + b Loop_8x + +Loop_last: + lxvw4x 48, 0, 3 # vr16, constants + lxvw4x 49, 17, 3 # vr17, key 1 + lxvw4x 50, 18, 3 # vr18, key 2 + lxvw4x 51, 19, 3 # vr19, counter, nonce + + vspltisw 21, 12 + vspltisw 23, 7 + addis 11, 2, permx@toc@ha + addi 11, 11, permx@toc@l + lxvw4x 32+20, 0, 11 + lxvw4x 32+22, 17, 11 + + sradi 8, 7, 1 + mtctr 8 + +Loop_4x: + vspltw 0, 16, 0 + vspltw 1, 16, 1 + vspltw 2, 16, 2 + vspltw 3, 16, 3 + + vspltw 4, 17, 0 + vspltw 5, 17, 1 + vspltw 6, 17, 2 + vspltw 7, 17, 3 + vspltw 8, 18, 0 + vspltw 9, 18, 1 + vspltw 10, 18, 2 + vspltw 11, 18, 3 + vspltw 12, 19, 0 + vadduwm 12, 12, 30 # increase counter + vspltw 13, 19, 1 + vspltw 14, 19, 2 + vspltw 15, 19, 3 + +.align 5 +quarter_loop: + QT_loop_4x + + bdnz quarter_loop + + vadduwm 12, 12, 30 + TP_4x 0, 1, 2, 3 + TP_4x 4, 5, 6, 7 + TP_4x 8, 9, 10, 11 + TP_4x 12, 13, 14, 15 + + Add_state 0 + Write_256 0 + addi 14, 14, 256 # offset += 256 + addi 15, 15, -256 # len += 256 + + # Update state counter + vspltisw 25, 4 + vadduwm 30, 30, 25 + + cmpdi 15, 0 + beq Out_loop + cmpdi 15, 256 + blt Out_loop + + mtctr 8 + b Loop_4x + +Out_loop: + RESTORE_REGS + blr + +Out_no_chacha: + li 3, 0 + blr +SYM_FUNC_END(chacha_p10le_8x) + +SYM_DATA_START_LOCAL(PERMX) +.align 5 +permx: +.long 0x22330011, 0x66774455, 0xaabb8899, 0xeeffccdd +.long 0x11223300, 0x55667744, 0x99aabb88, 0xddeeffcc +SYM_DATA_END(PERMX) diff --git a/arch/powerpc/crypto/poly1305-p10-glue.c b/arch/powerpc/crypto/poly1305-p10-glue.c new file mode 100644 index 000000000000..95dd708573ee --- /dev/null +++ b/arch/powerpc/crypto/poly1305-p10-glue.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Poly1305 authenticator algorithm, RFC7539. + * + * Copyright 2023- IBM Corp. All rights reserved. + */ + +#include <crypto/algapi.h> +#include <linux/crypto.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/jump_label.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/poly1305.h> +#include <crypto/internal/simd.h> +#include <linux/cpufeature.h> +#include <asm/unaligned.h> +#include <asm/simd.h> +#include <asm/switch_to.h> + +asmlinkage void poly1305_p10le_4blocks(void *h, const u8 *m, u32 mlen); +asmlinkage void poly1305_64s(void *h, const u8 *m, u32 mlen, int highbit); +asmlinkage void poly1305_emit_64(void *h, void *s, u8 *dst); + +static void vsx_begin(void) +{ + preempt_disable(); + enable_kernel_vsx(); +} + +static void vsx_end(void) +{ + disable_kernel_vsx(); + preempt_enable(); +} + +static int crypto_poly1305_p10_init(struct shash_desc *desc) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + poly1305_core_init(&dctx->h); + dctx->buflen = 0; + dctx->rset = 0; + dctx->sset = false; + + return 0; +} + +static unsigned int crypto_poly1305_setdctxkey(struct poly1305_desc_ctx *dctx, + const u8 *inp, unsigned int len) +{ + unsigned int acc = 0; + + if (unlikely(!dctx->sset)) { + if (!dctx->rset && len >= POLY1305_BLOCK_SIZE) { + struct poly1305_core_key *key = &dctx->core_r; + + key->key.r64[0] = get_unaligned_le64(&inp[0]); + key->key.r64[1] = get_unaligned_le64(&inp[8]); + inp += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + acc += POLY1305_BLOCK_SIZE; + dctx->rset = 1; + } + if (len >= POLY1305_BLOCK_SIZE) { + dctx->s[0] = get_unaligned_le32(&inp[0]); + dctx->s[1] = get_unaligned_le32(&inp[4]); + dctx->s[2] = get_unaligned_le32(&inp[8]); + dctx->s[3] = get_unaligned_le32(&inp[12]); + acc += POLY1305_BLOCK_SIZE; + dctx->sset = true; + } + } + return acc; +} + +static int crypto_poly1305_p10_update(struct shash_desc *desc, + const u8 *src, unsigned int srclen) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + unsigned int bytes, used; + + if (unlikely(dctx->buflen)) { + bytes = min(srclen, POLY1305_BLOCK_SIZE - dctx->buflen); + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + srclen -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + if (likely(!crypto_poly1305_setdctxkey(dctx, dctx->buf, + POLY1305_BLOCK_SIZE))) { + vsx_begin(); + poly1305_64s(&dctx->h, dctx->buf, + POLY1305_BLOCK_SIZE, 1); + vsx_end(); + } + dctx->buflen = 0; + } + } + + if (likely(srclen >= POLY1305_BLOCK_SIZE)) { + bytes = round_down(srclen, POLY1305_BLOCK_SIZE); + used = crypto_poly1305_setdctxkey(dctx, src, bytes); + if (likely(used)) { + srclen -= used; + src += used; + } + if (crypto_simd_usable() && (srclen >= POLY1305_BLOCK_SIZE*4)) { + vsx_begin(); + poly1305_p10le_4blocks(&dctx->h, src, srclen); + vsx_end(); + src += srclen - (srclen % (POLY1305_BLOCK_SIZE * 4)); + srclen %= POLY1305_BLOCK_SIZE * 4; + } + while (srclen >= POLY1305_BLOCK_SIZE) { + vsx_begin(); + poly1305_64s(&dctx->h, src, POLY1305_BLOCK_SIZE, 1); + vsx_end(); + srclen -= POLY1305_BLOCK_SIZE; + src += POLY1305_BLOCK_SIZE; + } + } + + if (unlikely(srclen)) { + dctx->buflen = srclen; + memcpy(dctx->buf, src, srclen); + } + + return 0; +} + +static int crypto_poly1305_p10_final(struct shash_desc *desc, u8 *dst) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + if (unlikely(!dctx->sset)) + return -ENOKEY; + + if ((dctx->buflen)) { + dctx->buf[dctx->buflen++] = 1; + memset(dctx->buf + dctx->buflen, 0, + POLY1305_BLOCK_SIZE - dctx->buflen); + vsx_begin(); + poly1305_64s(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + vsx_end(); + dctx->buflen = 0; + } + + poly1305_emit_64(&dctx->h, &dctx->s, dst); + return 0; +} + +static struct shash_alg poly1305_alg = { + .digestsize = POLY1305_DIGEST_SIZE, + .init = crypto_poly1305_p10_init, + .update = crypto_poly1305_p10_update, + .final = crypto_poly1305_p10_final, + .descsize = sizeof(struct poly1305_desc_ctx), + .base = { + .cra_name = "poly1305", + .cra_driver_name = "poly1305-p10", + .cra_priority = 300, + .cra_blocksize = POLY1305_BLOCK_SIZE, + .cra_module = THIS_MODULE, + }, +}; + +static int __init poly1305_p10_init(void) +{ + return crypto_register_shash(&poly1305_alg); +} + +static void __exit poly1305_p10_exit(void) +{ + crypto_unregister_shash(&poly1305_alg); +} + +module_cpu_feature_match(PPC_MODULE_FEATURE_P10, poly1305_p10_init); +module_exit(poly1305_p10_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Danny Tsen <dtsen@linux.ibm.com>"); +MODULE_DESCRIPTION("Optimized Poly1305 for P10"); +MODULE_ALIAS_CRYPTO("poly1305"); +MODULE_ALIAS_CRYPTO("poly1305-p10"); diff --git a/arch/powerpc/crypto/poly1305-p10le_64.S b/arch/powerpc/crypto/poly1305-p10le_64.S new file mode 100644 index 000000000000..a3c1987f1ecd --- /dev/null +++ b/arch/powerpc/crypto/poly1305-p10le_64.S @@ -0,0 +1,1075 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +# +# Accelerated poly1305 implementation for ppc64le. +# +# Copyright 2023- IBM Corp. All rights reserved +# +#=================================================================================== +# Written by Danny Tsen <dtsen@us.ibm.com> +# +# Poly1305 - this version mainly using vector/VSX/Scalar +# - 26 bits limbs +# - Handle multiple 64 byte blcok. +# +# Block size 16 bytes +# key = (r, s) +# clamp r &= 0x0FFFFFFC0FFFFFFC 0x0FFFFFFC0FFFFFFF +# p = 2^130 - 5 +# a += m +# a = (r + a) % p +# a += s +# +# Improve performance by breaking down polynominal to the sum of products with +# h4 = m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r +# +# 07/22/21 - this revison based on the above sum of products. Setup r^4, r^3, r^2, r and s3, s2, s1, s0 +# to 9 vectors for multiplications. +# +# setup r^4, r^3, r^2, r vectors +# vs [r^1, r^3, r^2, r^4] +# vs0 = [r0,.....] +# vs1 = [r1,.....] +# vs2 = [r2,.....] +# vs3 = [r3,.....] +# vs4 = [r4,.....] +# vs5 = [r1*5,...] +# vs6 = [r2*5,...] +# vs7 = [r2*5,...] +# vs8 = [r4*5,...] +# +# Each word in a vector consists a member of a "r/s" in [a * r/s]. +# +# r0, r4*5, r3*5, r2*5, r1*5; +# r1, r0, r4*5, r3*5, r2*5; +# r2, r1, r0, r4*5, r3*5; +# r3, r2, r1, r0, r4*5; +# r4, r3, r2, r1, r0 ; +# +# +# poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m) +# k = 32 bytes key +# r3 = k (r, s) +# r4 = mlen +# r5 = m +# +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> +#include <asm/asm-compat.h> +#include <linux/linkage.h> + +.machine "any" + +.text + +.macro SAVE_GPR GPR OFFSET FRAME + std \GPR,\OFFSET(\FRAME) +.endm + +.macro SAVE_VRS VRS OFFSET FRAME + li 16, \OFFSET + stvx \VRS, 16, \FRAME +.endm + +.macro SAVE_VSX VSX OFFSET FRAME + li 16, \OFFSET + stxvx \VSX, 16, \FRAME +.endm + +.macro RESTORE_GPR GPR OFFSET FRAME + ld \GPR,\OFFSET(\FRAME) +.endm + +.macro RESTORE_VRS VRS OFFSET FRAME + li 16, \OFFSET + lvx \VRS, 16, \FRAME +.endm + +.macro RESTORE_VSX VSX OFFSET FRAME + li 16, \OFFSET + lxvx \VSX, 16, \FRAME +.endm + +.macro SAVE_REGS + mflr 0 + std 0, 16(1) + stdu 1,-752(1) + + SAVE_GPR 14, 112, 1 + SAVE_GPR 15, 120, 1 + SAVE_GPR 16, 128, 1 + SAVE_GPR 17, 136, 1 + SAVE_GPR 18, 144, 1 + SAVE_GPR 19, 152, 1 + SAVE_GPR 20, 160, 1 + SAVE_GPR 21, 168, 1 + SAVE_GPR 22, 176, 1 + SAVE_GPR 23, 184, 1 + SAVE_GPR 24, 192, 1 + SAVE_GPR 25, 200, 1 + SAVE_GPR 26, 208, 1 + SAVE_GPR 27, 216, 1 + SAVE_GPR 28, 224, 1 + SAVE_GPR 29, 232, 1 + SAVE_GPR 30, 240, 1 + SAVE_GPR 31, 248, 1 + + addi 9, 1, 256 + SAVE_VRS 20, 0, 9 + SAVE_VRS 21, 16, 9 + SAVE_VRS 22, 32, 9 + SAVE_VRS 23, 48, 9 + SAVE_VRS 24, 64, 9 + SAVE_VRS 25, 80, 9 + SAVE_VRS 26, 96, 9 + SAVE_VRS 27, 112, 9 + SAVE_VRS 28, 128, 9 + SAVE_VRS 29, 144, 9 + SAVE_VRS 30, 160, 9 + SAVE_VRS 31, 176, 9 + + SAVE_VSX 14, 192, 9 + SAVE_VSX 15, 208, 9 + SAVE_VSX 16, 224, 9 + SAVE_VSX 17, 240, 9 + SAVE_VSX 18, 256, 9 + SAVE_VSX 19, 272, 9 + SAVE_VSX 20, 288, 9 + SAVE_VSX 21, 304, 9 + SAVE_VSX 22, 320, 9 + SAVE_VSX 23, 336, 9 + SAVE_VSX 24, 352, 9 + SAVE_VSX 25, 368, 9 + SAVE_VSX 26, 384, 9 + SAVE_VSX 27, 400, 9 + SAVE_VSX 28, 416, 9 + SAVE_VSX 29, 432, 9 + SAVE_VSX 30, 448, 9 + SAVE_VSX 31, 464, 9 +.endm # SAVE_REGS + +.macro RESTORE_REGS + addi 9, 1, 256 + RESTORE_VRS 20, 0, 9 + RESTORE_VRS 21, 16, 9 + RESTORE_VRS 22, 32, 9 + RESTORE_VRS 23, 48, 9 + RESTORE_VRS 24, 64, 9 + RESTORE_VRS 25, 80, 9 + RESTORE_VRS 26, 96, 9 + RESTORE_VRS 27, 112, 9 + RESTORE_VRS 28, 128, 9 + RESTORE_VRS 29, 144, 9 + RESTORE_VRS 30, 160, 9 + RESTORE_VRS 31, 176, 9 + + RESTORE_VSX 14, 192, 9 + RESTORE_VSX 15, 208, 9 + RESTORE_VSX 16, 224, 9 + RESTORE_VSX 17, 240, 9 + RESTORE_VSX 18, 256, 9 + RESTORE_VSX 19, 272, 9 + RESTORE_VSX 20, 288, 9 + RESTORE_VSX 21, 304, 9 + RESTORE_VSX 22, 320, 9 + RESTORE_VSX 23, 336, 9 + RESTORE_VSX 24, 352, 9 + RESTORE_VSX 25, 368, 9 + RESTORE_VSX 26, 384, 9 + RESTORE_VSX 27, 400, 9 + RESTORE_VSX 28, 416, 9 + RESTORE_VSX 29, 432, 9 + RESTORE_VSX 30, 448, 9 + RESTORE_VSX 31, 464, 9 + + RESTORE_GPR 14, 112, 1 + RESTORE_GPR 15, 120, 1 + RESTORE_GPR 16, 128, 1 + RESTORE_GPR 17, 136, 1 + RESTORE_GPR 18, 144, 1 + RESTORE_GPR 19, 152, 1 + RESTORE_GPR 20, 160, 1 + RESTORE_GPR 21, 168, 1 + RESTORE_GPR 22, 176, 1 + RESTORE_GPR 23, 184, 1 + RESTORE_GPR 24, 192, 1 + RESTORE_GPR 25, 200, 1 + RESTORE_GPR 26, 208, 1 + RESTORE_GPR 27, 216, 1 + RESTORE_GPR 28, 224, 1 + RESTORE_GPR 29, 232, 1 + RESTORE_GPR 30, 240, 1 + RESTORE_GPR 31, 248, 1 + + addi 1, 1, 752 + ld 0, 16(1) + mtlr 0 +.endm # RESTORE_REGS + +# +# p[0] = a0*r0 + a1*r4*5 + a2*r3*5 + a3*r2*5 + a4*r1*5; +# p[1] = a0*r1 + a1*r0 + a2*r4*5 + a3*r3*5 + a4*r2*5; +# p[2] = a0*r2 + a1*r1 + a2*r0 + a3*r4*5 + a4*r3*5; +# p[3] = a0*r3 + a1*r2 + a2*r1 + a3*r0 + a4*r4*5; +# p[4] = a0*r4 + a1*r3 + a2*r2 + a3*r1 + a4*r0 ; +# +# [r^2, r^3, r^1, r^4] +# [m3, m2, m4, m1] +# +# multiply odd and even words +.macro mul_odd + vmulouw 14, 4, 26 + vmulouw 10, 5, 3 + vmulouw 11, 6, 2 + vmulouw 12, 7, 1 + vmulouw 13, 8, 0 + vmulouw 15, 4, 27 + vaddudm 14, 14, 10 + vaddudm 14, 14, 11 + vmulouw 10, 5, 26 + vmulouw 11, 6, 3 + vaddudm 14, 14, 12 + vaddudm 14, 14, 13 # x0 + vaddudm 15, 15, 10 + vaddudm 15, 15, 11 + vmulouw 12, 7, 2 + vmulouw 13, 8, 1 + vaddudm 15, 15, 12 + vaddudm 15, 15, 13 # x1 + vmulouw 16, 4, 28 + vmulouw 10, 5, 27 + vmulouw 11, 6, 26 + vaddudm 16, 16, 10 + vaddudm 16, 16, 11 + vmulouw 12, 7, 3 + vmulouw 13, 8, 2 + vaddudm 16, 16, 12 + vaddudm 16, 16, 13 # x2 + vmulouw 17, 4, 29 + vmulouw 10, 5, 28 + vmulouw 11, 6, 27 + vaddudm 17, 17, 10 + vaddudm 17, 17, 11 + vmulouw 12, 7, 26 + vmulouw 13, 8, 3 + vaddudm 17, 17, 12 + vaddudm 17, 17, 13 # x3 + vmulouw 18, 4, 30 + vmulouw 10, 5, 29 + vmulouw 11, 6, 28 + vaddudm 18, 18, 10 + vaddudm 18, 18, 11 + vmulouw 12, 7, 27 + vmulouw 13, 8, 26 + vaddudm 18, 18, 12 + vaddudm 18, 18, 13 # x4 +.endm + +.macro mul_even + vmuleuw 9, 4, 26 + vmuleuw 10, 5, 3 + vmuleuw 11, 6, 2 + vmuleuw 12, 7, 1 + vmuleuw 13, 8, 0 + vaddudm 14, 14, 9 + vaddudm 14, 14, 10 + vaddudm 14, 14, 11 + vaddudm 14, 14, 12 + vaddudm 14, 14, 13 # x0 + + vmuleuw 9, 4, 27 + vmuleuw 10, 5, 26 + vmuleuw 11, 6, 3 + vmuleuw 12, 7, 2 + vmuleuw 13, 8, 1 + vaddudm 15, 15, 9 + vaddudm 15, 15, 10 + vaddudm 15, 15, 11 + vaddudm 15, 15, 12 + vaddudm 15, 15, 13 # x1 + + vmuleuw 9, 4, 28 + vmuleuw 10, 5, 27 + vmuleuw 11, 6, 26 + vmuleuw 12, 7, 3 + vmuleuw 13, 8, 2 + vaddudm 16, 16, 9 + vaddudm 16, 16, 10 + vaddudm 16, 16, 11 + vaddudm 16, 16, 12 + vaddudm 16, 16, 13 # x2 + + vmuleuw 9, 4, 29 + vmuleuw 10, 5, 28 + vmuleuw 11, 6, 27 + vmuleuw 12, 7, 26 + vmuleuw 13, 8, 3 + vaddudm 17, 17, 9 + vaddudm 17, 17, 10 + vaddudm 17, 17, 11 + vaddudm 17, 17, 12 + vaddudm 17, 17, 13 # x3 + + vmuleuw 9, 4, 30 + vmuleuw 10, 5, 29 + vmuleuw 11, 6, 28 + vmuleuw 12, 7, 27 + vmuleuw 13, 8, 26 + vaddudm 18, 18, 9 + vaddudm 18, 18, 10 + vaddudm 18, 18, 11 + vaddudm 18, 18, 12 + vaddudm 18, 18, 13 # x4 +.endm + +# +# poly1305_setup_r +# +# setup r^4, r^3, r^2, r vectors +# [r, r^3, r^2, r^4] +# vs0 = [r0,...] +# vs1 = [r1,...] +# vs2 = [r2,...] +# vs3 = [r3,...] +# vs4 = [r4,...] +# vs5 = [r4*5,...] +# vs6 = [r3*5,...] +# vs7 = [r2*5,...] +# vs8 = [r1*5,...] +# +# r0, r4*5, r3*5, r2*5, r1*5; +# r1, r0, r4*5, r3*5, r2*5; +# r2, r1, r0, r4*5, r3*5; +# r3, r2, r1, r0, r4*5; +# r4, r3, r2, r1, r0 ; +# +.macro poly1305_setup_r + + # save r + xxlor 26, 58, 58 + xxlor 27, 59, 59 + xxlor 28, 60, 60 + xxlor 29, 61, 61 + xxlor 30, 62, 62 + + xxlxor 31, 31, 31 + +# [r, r^3, r^2, r^4] + # compute r^2 + vmr 4, 26 + vmr 5, 27 + vmr 6, 28 + vmr 7, 29 + vmr 8, 30 + bl do_mul # r^2 r^1 + xxpermdi 58, 58, 36, 0x3 # r0 + xxpermdi 59, 59, 37, 0x3 # r1 + xxpermdi 60, 60, 38, 0x3 # r2 + xxpermdi 61, 61, 39, 0x3 # r3 + xxpermdi 62, 62, 40, 0x3 # r4 + xxpermdi 36, 36, 36, 0x3 + xxpermdi 37, 37, 37, 0x3 + xxpermdi 38, 38, 38, 0x3 + xxpermdi 39, 39, 39, 0x3 + xxpermdi 40, 40, 40, 0x3 + vspltisb 13, 2 + vsld 9, 27, 13 + vsld 10, 28, 13 + vsld 11, 29, 13 + vsld 12, 30, 13 + vaddudm 0, 9, 27 + vaddudm 1, 10, 28 + vaddudm 2, 11, 29 + vaddudm 3, 12, 30 + + bl do_mul # r^4 r^3 + vmrgow 26, 26, 4 + vmrgow 27, 27, 5 + vmrgow 28, 28, 6 + vmrgow 29, 29, 7 + vmrgow 30, 30, 8 + vspltisb 13, 2 + vsld 9, 27, 13 + vsld 10, 28, 13 + vsld 11, 29, 13 + vsld 12, 30, 13 + vaddudm 0, 9, 27 + vaddudm 1, 10, 28 + vaddudm 2, 11, 29 + vaddudm 3, 12, 30 + + # r^2 r^4 + xxlor 0, 58, 58 + xxlor 1, 59, 59 + xxlor 2, 60, 60 + xxlor 3, 61, 61 + xxlor 4, 62, 62 + xxlor 5, 32, 32 + xxlor 6, 33, 33 + xxlor 7, 34, 34 + xxlor 8, 35, 35 + + vspltw 9, 26, 3 + vspltw 10, 26, 2 + vmrgow 26, 10, 9 + vspltw 9, 27, 3 + vspltw 10, 27, 2 + vmrgow 27, 10, 9 + vspltw 9, 28, 3 + vspltw 10, 28, 2 + vmrgow 28, 10, 9 + vspltw 9, 29, 3 + vspltw 10, 29, 2 + vmrgow 29, 10, 9 + vspltw 9, 30, 3 + vspltw 10, 30, 2 + vmrgow 30, 10, 9 + + vsld 9, 27, 13 + vsld 10, 28, 13 + vsld 11, 29, 13 + vsld 12, 30, 13 + vaddudm 0, 9, 27 + vaddudm 1, 10, 28 + vaddudm 2, 11, 29 + vaddudm 3, 12, 30 +.endm + +SYM_FUNC_START_LOCAL(do_mul) + mul_odd + + # do reduction ( h %= p ) + # carry reduction + vspltisb 9, 2 + vsrd 10, 14, 31 + vsrd 11, 17, 31 + vand 7, 17, 25 + vand 4, 14, 25 + vaddudm 18, 18, 11 + vsrd 12, 18, 31 + vaddudm 15, 15, 10 + + vsrd 11, 15, 31 + vand 8, 18, 25 + vand 5, 15, 25 + vaddudm 4, 4, 12 + vsld 10, 12, 9 + vaddudm 6, 16, 11 + + vsrd 13, 6, 31 + vand 6, 6, 25 + vaddudm 4, 4, 10 + vsrd 10, 4, 31 + vaddudm 7, 7, 13 + + vsrd 11, 7, 31 + vand 7, 7, 25 + vand 4, 4, 25 + vaddudm 5, 5, 10 + vaddudm 8, 8, 11 + blr +SYM_FUNC_END(do_mul) + +# +# init key +# +.macro do_poly1305_init + addis 10, 2, rmask@toc@ha + addi 10, 10, rmask@toc@l + + ld 11, 0(10) + ld 12, 8(10) + + li 14, 16 + li 15, 32 + addis 10, 2, cnum@toc@ha + addi 10, 10, cnum@toc@l + lvx 25, 0, 10 # v25 - mask + lvx 31, 14, 10 # v31 = 1a + lvx 19, 15, 10 # v19 = 1 << 24 + lxv 24, 48(10) # vs24 + lxv 25, 64(10) # vs25 + + # initialize + # load key from r3 to vectors + ld 9, 24(3) + ld 10, 32(3) + and. 9, 9, 11 + and. 10, 10, 12 + + # break 26 bits + extrdi 14, 9, 26, 38 + extrdi 15, 9, 26, 12 + extrdi 16, 9, 12, 0 + mtvsrdd 58, 0, 14 + insrdi 16, 10, 14, 38 + mtvsrdd 59, 0, 15 + extrdi 17, 10, 26, 24 + mtvsrdd 60, 0, 16 + extrdi 18, 10, 24, 0 + mtvsrdd 61, 0, 17 + mtvsrdd 62, 0, 18 + + # r1 = r1 * 5, r2 = r2 * 5, r3 = r3 * 5, r4 = r4 * 5 + li 9, 5 + mtvsrdd 36, 0, 9 + vmulouw 0, 27, 4 # v0 = rr0 + vmulouw 1, 28, 4 # v1 = rr1 + vmulouw 2, 29, 4 # v2 = rr2 + vmulouw 3, 30, 4 # v3 = rr3 +.endm + +# +# poly1305_p10le_4blocks( uint8_t *k, uint32_t mlen, uint8_t *m) +# k = 32 bytes key +# r3 = k (r, s) +# r4 = mlen +# r5 = m +# +SYM_FUNC_START(poly1305_p10le_4blocks) +.align 5 + cmpdi 5, 64 + blt Out_no_poly1305 + + SAVE_REGS + + do_poly1305_init + + li 21, 0 # counter to message + + poly1305_setup_r + + # load previous H state + # break/convert r6 to 26 bits + ld 9, 0(3) + ld 10, 8(3) + ld 19, 16(3) + sldi 19, 19, 24 + mtvsrdd 41, 0, 19 + extrdi 14, 9, 26, 38 + extrdi 15, 9, 26, 12 + extrdi 16, 9, 12, 0 + mtvsrdd 36, 0, 14 + insrdi 16, 10, 14, 38 + mtvsrdd 37, 0, 15 + extrdi 17, 10, 26, 24 + mtvsrdd 38, 0, 16 + extrdi 18, 10, 24, 0 + mtvsrdd 39, 0, 17 + mtvsrdd 40, 0, 18 + vor 8, 8, 9 + + # input m1 m2 + add 20, 4, 21 + xxlor 49, 24, 24 + xxlor 50, 25, 25 + lxvw4x 43, 0, 20 + addi 17, 20, 16 + lxvw4x 44, 0, 17 + vperm 14, 11, 12, 17 + vperm 15, 11, 12, 18 + vand 9, 14, 25 # a0 + vsrd 10, 14, 31 # >> 26 + vsrd 11, 10, 31 # 12 bits left + vand 10, 10, 25 # a1 + vspltisb 13, 12 + vand 16, 15, 25 + vsld 12, 16, 13 + vor 11, 11, 12 + vand 11, 11, 25 # a2 + vspltisb 13, 14 + vsrd 12, 15, 13 # >> 14 + vsrd 13, 12, 31 # >> 26, a4 + vand 12, 12, 25 # a3 + + vaddudm 20, 4, 9 + vaddudm 21, 5, 10 + vaddudm 22, 6, 11 + vaddudm 23, 7, 12 + vaddudm 24, 8, 13 + + # m3 m4 + addi 17, 17, 16 + lxvw4x 43, 0, 17 + addi 17, 17, 16 + lxvw4x 44, 0, 17 + vperm 14, 11, 12, 17 + vperm 15, 11, 12, 18 + vand 9, 14, 25 # a0 + vsrd 10, 14, 31 # >> 26 + vsrd 11, 10, 31 # 12 bits left + vand 10, 10, 25 # a1 + vspltisb 13, 12 + vand 16, 15, 25 + vsld 12, 16, 13 + vspltisb 13, 14 + vor 11, 11, 12 + vand 11, 11, 25 # a2 + vsrd 12, 15, 13 # >> 14 + vsrd 13, 12, 31 # >> 26, a4 + vand 12, 12, 25 # a3 + + # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1] + vmrgow 4, 9, 20 + vmrgow 5, 10, 21 + vmrgow 6, 11, 22 + vmrgow 7, 12, 23 + vmrgow 8, 13, 24 + vaddudm 8, 8, 19 + + addi 5, 5, -64 # len -= 64 + addi 21, 21, 64 # offset += 64 + + li 9, 64 + divdu 31, 5, 9 + + cmpdi 31, 0 + ble Skip_block_loop + + mtctr 31 + +# h4 = m1 * r⁴ + m2 * r³ + m3 * r² + m4 * r +# Rewrite the polynominal sum of product as follows, +# h1 = (h0 + m1) * r^2, h2 = (h0 + m2) * r^2 +# h3 = (h1 + m3) * r^2, h4 = (h2 + m4) * r^2 --> (h0 + m1) r*4 + (h3 + m3) r^2, (h0 + m2) r^4 + (h0 + m4) r^2 +# .... Repeat +# h5 = (h3 + m5) * r^2, h6 = (h4 + m6) * r^2 --> +# h7 = (h5 + m7) * r^2, h8 = (h6 + m8) * r^1 --> m5 * r^4 + m6 * r^3 + m7 * r^2 + m8 * r +# +loop_4blocks: + + # Multiply odd words and even words + mul_odd + mul_even + # carry reduction + vspltisb 9, 2 + vsrd 10, 14, 31 + vsrd 11, 17, 31 + vand 7, 17, 25 + vand 4, 14, 25 + vaddudm 18, 18, 11 + vsrd 12, 18, 31 + vaddudm 15, 15, 10 + + vsrd 11, 15, 31 + vand 8, 18, 25 + vand 5, 15, 25 + vaddudm 4, 4, 12 + vsld 10, 12, 9 + vaddudm 6, 16, 11 + + vsrd 13, 6, 31 + vand 6, 6, 25 + vaddudm 4, 4, 10 + vsrd 10, 4, 31 + vaddudm 7, 7, 13 + + vsrd 11, 7, 31 + vand 7, 7, 25 + vand 4, 4, 25 + vaddudm 5, 5, 10 + vaddudm 8, 8, 11 + + # input m1 m2 m3 m4 + add 20, 4, 21 + xxlor 49, 24, 24 + xxlor 50, 25, 25 + lxvw4x 43, 0, 20 + addi 17, 20, 16 + lxvw4x 44, 0, 17 + vperm 14, 11, 12, 17 + vperm 15, 11, 12, 18 + addi 17, 17, 16 + lxvw4x 43, 0, 17 + addi 17, 17, 16 + lxvw4x 44, 0, 17 + vperm 17, 11, 12, 17 + vperm 18, 11, 12, 18 + + vand 20, 14, 25 # a0 + vand 9, 17, 25 # a0 + vsrd 21, 14, 31 # >> 26 + vsrd 22, 21, 31 # 12 bits left + vsrd 10, 17, 31 # >> 26 + vsrd 11, 10, 31 # 12 bits left + + vand 21, 21, 25 # a1 + vand 10, 10, 25 # a1 + + vspltisb 13, 12 + vand 16, 15, 25 + vsld 23, 16, 13 + vor 22, 22, 23 + vand 22, 22, 25 # a2 + vand 16, 18, 25 + vsld 12, 16, 13 + vor 11, 11, 12 + vand 11, 11, 25 # a2 + vspltisb 13, 14 + vsrd 23, 15, 13 # >> 14 + vsrd 24, 23, 31 # >> 26, a4 + vand 23, 23, 25 # a3 + vsrd 12, 18, 13 # >> 14 + vsrd 13, 12, 31 # >> 26, a4 + vand 12, 12, 25 # a3 + + vaddudm 4, 4, 20 + vaddudm 5, 5, 21 + vaddudm 6, 6, 22 + vaddudm 7, 7, 23 + vaddudm 8, 8, 24 + + # Smash 4 message blocks into 5 vectors of [m4, m2, m3, m1] + vmrgow 4, 9, 4 + vmrgow 5, 10, 5 + vmrgow 6, 11, 6 + vmrgow 7, 12, 7 + vmrgow 8, 13, 8 + vaddudm 8, 8, 19 + + addi 5, 5, -64 # len -= 64 + addi 21, 21, 64 # offset += 64 + + bdnz loop_4blocks + +Skip_block_loop: + xxlor 58, 0, 0 + xxlor 59, 1, 1 + xxlor 60, 2, 2 + xxlor 61, 3, 3 + xxlor 62, 4, 4 + xxlor 32, 5, 5 + xxlor 33, 6, 6 + xxlor 34, 7, 7 + xxlor 35, 8, 8 + + # Multiply odd words and even words + mul_odd + mul_even + + # Sum the products. + xxpermdi 41, 31, 46, 0 + xxpermdi 42, 31, 47, 0 + vaddudm 4, 14, 9 + xxpermdi 36, 31, 36, 3 + vaddudm 5, 15, 10 + xxpermdi 37, 31, 37, 3 + xxpermdi 43, 31, 48, 0 + vaddudm 6, 16, 11 + xxpermdi 38, 31, 38, 3 + xxpermdi 44, 31, 49, 0 + vaddudm 7, 17, 12 + xxpermdi 39, 31, 39, 3 + xxpermdi 45, 31, 50, 0 + vaddudm 8, 18, 13 + xxpermdi 40, 31, 40, 3 + + # carry reduction + vspltisb 9, 2 + vsrd 10, 4, 31 + vsrd 11, 7, 31 + vand 7, 7, 25 + vand 4, 4, 25 + vaddudm 8, 8, 11 + vsrd 12, 8, 31 + vaddudm 5, 5, 10 + + vsrd 11, 5, 31 + vand 8, 8, 25 + vand 5, 5, 25 + vaddudm 4, 4, 12 + vsld 10, 12, 9 + vaddudm 6, 6, 11 + + vsrd 13, 6, 31 + vand 6, 6, 25 + vaddudm 4, 4, 10 + vsrd 10, 4, 31 + vaddudm 7, 7, 13 + + vsrd 11, 7, 31 + vand 7, 7, 25 + vand 4, 4, 25 + vaddudm 5, 5, 10 + vsrd 10, 5, 31 + vand 5, 5, 25 + vaddudm 6, 6, 10 + vaddudm 8, 8, 11 + + b do_final_update + +do_final_update: + # combine 26 bit limbs + # v4, v5, v6, v7 and v8 are 26 bit vectors + vsld 5, 5, 31 + vor 20, 4, 5 + vspltisb 11, 12 + vsrd 12, 6, 11 + vsld 6, 6, 31 + vsld 6, 6, 31 + vor 20, 20, 6 + vspltisb 11, 14 + vsld 7, 7, 11 + vor 21, 7, 12 + mfvsrld 16, 40 # save last 2 bytes + vsld 8, 8, 11 + vsld 8, 8, 31 + vor 21, 21, 8 + mfvsrld 17, 52 + mfvsrld 19, 53 + srdi 16, 16, 24 + + std 17, 0(3) + std 19, 8(3) + stw 16, 16(3) + +Out_loop: + li 3, 0 + + RESTORE_REGS + + blr + +Out_no_poly1305: + li 3, 0 + blr +SYM_FUNC_END(poly1305_p10le_4blocks) + +# +# ======================================================================= +# The following functions implement 64 x 64 bits multiplication poly1305. +# +SYM_FUNC_START_LOCAL(Poly1305_init_64) + # mask 0x0FFFFFFC0FFFFFFC + # mask 0x0FFFFFFC0FFFFFFF + addis 10, 2, rmask@toc@ha + addi 10, 10, rmask@toc@l + ld 11, 0(10) + ld 12, 8(10) + + # initialize + # load key from r3 + ld 9, 24(3) + ld 10, 32(3) + and. 9, 9, 11 # cramp mask r0 + and. 10, 10, 12 # cramp mask r1 + + srdi 21, 10, 2 + add 19, 21, 10 # s1: r19 - (r1 >> 2) *5 + + # setup r and s + li 25, 0 + mtvsrdd 32+0, 9, 19 # r0, s1 + mtvsrdd 32+1, 10, 9 # r1, r0 + mtvsrdd 32+2, 19, 25 # s1 + mtvsrdd 32+3, 9, 25 # r0 + + blr +SYM_FUNC_END(Poly1305_init_64) + +# Poly1305_mult +# v6 = (h0, h1), v8 = h2 +# v0 = (r0, s1), v1 = (r1, r0), v2 = s1, v3 = r0 +# +# Output: v7, v10, v11 +# +SYM_FUNC_START_LOCAL(Poly1305_mult) + # + # d0 = h0 * r0 + h1 * s1 + vmsumudm 7, 6, 0, 9 # h0 * r0, h1 * s1 + + # d1 = h0 * r1 + h1 * r0 + h2 * s1 + vmsumudm 11, 6, 1, 9 # h0 * r1, h1 * r0 + vmsumudm 10, 8, 2, 11 # d1 += h2 * s1 + + # d2 = r0 + vmsumudm 11, 8, 3, 9 # d2 = h2 * r0 + blr +SYM_FUNC_END(Poly1305_mult) + +# +# carry reduction +# h %=p +# +# Input: v7, v10, v11 +# Output: r27, r28, r29 +# +SYM_FUNC_START_LOCAL(Carry_reduction) + mfvsrld 27, 32+7 + mfvsrld 28, 32+10 + mfvsrld 29, 32+11 + mfvsrd 20, 32+7 # h0.h + mfvsrd 21, 32+10 # h1.h + + addc 28, 28, 20 + adde 29, 29, 21 + srdi 22, 29, 0x2 + sldi 23, 22, 0x2 + add 23, 23, 22 # (h2 & 3) * 5 + addc 27, 27, 23 # h0 + addze 28, 28 # h1 + andi. 29, 29, 0x3 # h2 + blr +SYM_FUNC_END(Carry_reduction) + +# +# poly1305 multiplication +# h *= r, h %= p +# d0 = h0 * r0 + h1 * s1 +# d1 = h0 * r1 + h1 * r0 + h2 * s1 +# d2 = h0 * r0 +# +# +# unsigned int poly1305_test_64s(unisgned char *state, const byte *src, size_t len, highbit) +# - no highbit if final leftover block (highbit = 0) +# +SYM_FUNC_START(poly1305_64s) + cmpdi 5, 0 + ble Out_no_poly1305_64 + + mflr 0 + std 0, 16(1) + stdu 1,-400(1) + + SAVE_GPR 14, 112, 1 + SAVE_GPR 15, 120, 1 + SAVE_GPR 16, 128, 1 + SAVE_GPR 17, 136, 1 + SAVE_GPR 18, 144, 1 + SAVE_GPR 19, 152, 1 + SAVE_GPR 20, 160, 1 + SAVE_GPR 21, 168, 1 + SAVE_GPR 22, 176, 1 + SAVE_GPR 23, 184, 1 + SAVE_GPR 24, 192, 1 + SAVE_GPR 25, 200, 1 + SAVE_GPR 26, 208, 1 + SAVE_GPR 27, 216, 1 + SAVE_GPR 28, 224, 1 + SAVE_GPR 29, 232, 1 + SAVE_GPR 30, 240, 1 + SAVE_GPR 31, 248, 1 + + # Init poly1305 + bl Poly1305_init_64 + + li 25, 0 # offset to inp and outp + + add 11, 25, 4 + + # load h + # h0, h1, h2? + ld 27, 0(3) + ld 28, 8(3) + lwz 29, 16(3) + + li 30, 16 + divdu 31, 5, 30 + + mtctr 31 + + mr 24, 6 # highbit + +Loop_block_64: + vxor 9, 9, 9 + + ld 20, 0(11) + ld 21, 8(11) + addi 11, 11, 16 + + addc 27, 27, 20 + adde 28, 28, 21 + adde 29, 29, 24 + + li 22, 0 + mtvsrdd 32+6, 27, 28 # h0, h1 + mtvsrdd 32+8, 29, 22 # h2 + + bl Poly1305_mult + + bl Carry_reduction + + bdnz Loop_block_64 + + std 27, 0(3) + std 28, 8(3) + stw 29, 16(3) + + li 3, 0 + + RESTORE_GPR 14, 112, 1 + RESTORE_GPR 15, 120, 1 + RESTORE_GPR 16, 128, 1 + RESTORE_GPR 17, 136, 1 + RESTORE_GPR 18, 144, 1 + RESTORE_GPR 19, 152, 1 + RESTORE_GPR 20, 160, 1 + RESTORE_GPR 21, 168, 1 + RESTORE_GPR 22, 176, 1 + RESTORE_GPR 23, 184, 1 + RESTORE_GPR 24, 192, 1 + RESTORE_GPR 25, 200, 1 + RESTORE_GPR 26, 208, 1 + RESTORE_GPR 27, 216, 1 + RESTORE_GPR 28, 224, 1 + RESTORE_GPR 29, 232, 1 + RESTORE_GPR 30, 240, 1 + RESTORE_GPR 31, 248, 1 + + addi 1, 1, 400 + ld 0, 16(1) + mtlr 0 + + blr + +Out_no_poly1305_64: + li 3, 0 + blr +SYM_FUNC_END(poly1305_64s) + +# +# Input: r3 = h, r4 = s, r5 = mac +# mac = h + s +# +SYM_FUNC_START(poly1305_emit_64) + ld 10, 0(3) + ld 11, 8(3) + ld 12, 16(3) + + # compare modulus + # h + 5 + (-p) + mr 6, 10 + mr 7, 11 + mr 8, 12 + addic. 6, 6, 5 + addze 7, 7 + addze 8, 8 + srdi 9, 8, 2 # overflow? + cmpdi 9, 0 + beq Skip_h64 + mr 10, 6 + mr 11, 7 + mr 12, 8 + +Skip_h64: + ld 6, 0(4) + ld 7, 8(4) + addc 10, 10, 6 + adde 11, 11, 7 + addze 12, 12 + + std 10, 0(5) + std 11, 8(5) + blr +SYM_FUNC_END(poly1305_emit_64) + +SYM_DATA_START_LOCAL(RMASK) +.align 5 +rmask: +.byte 0xff, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f, 0xfc, 0xff, 0xff, 0x0f +cnum: +.long 0x03ffffff, 0x00000000, 0x03ffffff, 0x00000000 +.long 0x1a, 0x00, 0x1a, 0x00 +.long 0x01000000, 0x01000000, 0x01000000, 0x01000000 +.long 0x00010203, 0x04050607, 0x10111213, 0x14151617 +.long 0x08090a0b, 0x0c0d0e0f, 0x18191a1b, 0x1c1d1e1f +SYM_DATA_END(RMASK) diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 8a6754ffdc7e..a6c7069bec5d 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -393,7 +393,6 @@ int validate_sp_size(unsigned long sp, struct task_struct *p, */ #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH static inline void prefetch(const void *x) { @@ -411,8 +410,6 @@ static inline void prefetchw(const void *x) __asm__ __volatile__ ("dcbtst 0,%0" : : "r" (x)); } -#define spin_lock_prefetch(x) prefetchw(x) - /* asm stubs */ extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val); extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val); diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h index 46c31fb8748d..30a12d208687 100644 --- a/arch/powerpc/include/asm/word-at-a-time.h +++ b/arch/powerpc/include/asm/word-at-a-time.h @@ -34,7 +34,7 @@ static inline long find_zero(unsigned long mask) return leading_zero_bits >> 3; } -static inline bool has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) +static inline unsigned long has_zero(unsigned long val, unsigned long *data, const struct word_at_a_time *c) { unsigned long rhs = val | c->low_bits; *data = rhs; diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S index f132d8704263..6440b1bb332a 100644 --- a/arch/powerpc/kernel/head_64.S +++ b/arch/powerpc/kernel/head_64.S @@ -375,8 +375,7 @@ _GLOBAL(generic_secondary_smp_init) beq 20f /* start the specified thread */ - LOAD_REG_ADDR(r5, fsl_secondary_thread_init) - ld r4, 0(r5) + LOAD_REG_ADDR(r5, DOTSYM(fsl_secondary_thread_init)) bl book3e_start_thread /* stop the current thread */ diff --git a/arch/powerpc/kernel/rtas_flash.c b/arch/powerpc/kernel/rtas_flash.c index 4caf5e3079eb..359577ec1680 100644 --- a/arch/powerpc/kernel/rtas_flash.c +++ b/arch/powerpc/kernel/rtas_flash.c @@ -709,9 +709,9 @@ static int __init rtas_flash_init(void) if (!rtas_validate_flash_data.buf) return -ENOMEM; - flash_block_cache = kmem_cache_create("rtas_flash_cache", - RTAS_BLK_SIZE, RTAS_BLK_SIZE, 0, - NULL); + flash_block_cache = kmem_cache_create_usercopy("rtas_flash_cache", + RTAS_BLK_SIZE, RTAS_BLK_SIZE, + 0, 0, RTAS_BLK_SIZE, NULL); if (!flash_block_cache) { printk(KERN_ERR "%s: failed to create block cache\n", __func__); diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 8c0b08b7a80e..20e50586e8a2 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -538,3 +538,4 @@ 449 common futex_waitv sys_futex_waitv 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 diff --git a/arch/powerpc/kernel/trace/ftrace_mprofile.S b/arch/powerpc/kernel/trace/ftrace_mprofile.S index ffb1db386849..1f7d86de1538 100644 --- a/arch/powerpc/kernel/trace/ftrace_mprofile.S +++ b/arch/powerpc/kernel/trace/ftrace_mprofile.S @@ -33,6 +33,9 @@ * and then arrange for the ftrace function to be called. */ .macro ftrace_regs_entry allregs + /* Create a minimal stack frame for representing B */ + PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1) + /* Create our stack frame + pt_regs */ PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) @@ -42,7 +45,7 @@ #ifdef CONFIG_PPC64 /* Save the original return address in A's stack frame */ - std r0, LRSAVE+SWITCH_FRAME_SIZE(r1) + std r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 @@ -77,6 +80,8 @@ mflr r7 /* Save it as pt_regs->nip */ PPC_STL r7, _NIP(r1) + /* Also save it in B's stackframe header for proper unwind */ + PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1) /* Save the read LR in pt_regs->link */ PPC_STL r0, _LINK(r1) @@ -142,7 +147,7 @@ #endif /* Pop our stack frame */ - addi r1, r1, SWITCH_FRAME_SIZE + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE #ifdef CONFIG_LIVEPATCH_64 /* Based on the cmpd above, if the NIP was altered handle livepatch */ diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index f7930360406e..e0208cb12058 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -326,8 +326,7 @@ static void __ref __vmemmap_free(unsigned long start, unsigned long end, start = ALIGN_DOWN(start, page_size); if (altmap) { alt_start = altmap->base_pfn; - alt_end = altmap->base_pfn + altmap->reserve + - altmap->free + altmap->alloc + altmap->align; + alt_end = altmap->base_pfn + altmap->reserve + altmap->free; } pr_debug("vmemmap_free %lx...%lx\n", start, end); diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index 9c43cf32f4c9..40aa58206888 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -180,7 +180,7 @@ static void wake_hw_thread(void *info) unsigned long inia; int cpu = *(const int *)info; - inia = *(unsigned long *)fsl_secondary_thread_init; + inia = ppc_function_entry(fsl_secondary_thread_init); book3e_start_thread(cpu_thread_in_core(cpu), inia); } #endif diff --git a/arch/powerpc/platforms/8xx/adder875.c b/arch/powerpc/platforms/8xx/adder875.c index 7e83eb6746f4..f6bd232f8323 100644 --- a/arch/powerpc/platforms/8xx/adder875.c +++ b/arch/powerpc/platforms/8xx/adder875.c @@ -7,7 +7,6 @@ */ #include <linux/init.h> -#include <linux/fs_enet_pd.h> #include <linux/of_platform.h> #include <asm/time.h> diff --git a/arch/powerpc/platforms/8xx/mpc885ads_setup.c b/arch/powerpc/platforms/8xx/mpc885ads_setup.c index 2fc7cacbcd96..c7c4f082b838 100644 --- a/arch/powerpc/platforms/8xx/mpc885ads_setup.c +++ b/arch/powerpc/platforms/8xx/mpc885ads_setup.c @@ -21,7 +21,6 @@ #include <linux/device.h> #include <linux/delay.h> -#include <linux/fs_enet_pd.h> #include <linux/fs_uart_pd.h> #include <linux/fsl_devices.h> #include <linux/mii.h> diff --git a/arch/powerpc/platforms/8xx/tqm8xx_setup.c b/arch/powerpc/platforms/8xx/tqm8xx_setup.c index 7d8eb50bb9cd..6e56be852b2c 100644 --- a/arch/powerpc/platforms/8xx/tqm8xx_setup.c +++ b/arch/powerpc/platforms/8xx/tqm8xx_setup.c @@ -24,7 +24,6 @@ #include <linux/device.h> #include <linux/delay.h> -#include <linux/fs_enet_pd.h> #include <linux/fs_uart_pd.h> #include <linux/fsl_devices.h> #include <linux/mii.h> diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c index ea807aa0c31a..38c5be34c895 100644 --- a/arch/powerpc/platforms/cell/spufs/inode.c +++ b/arch/powerpc/platforms/cell/spufs/inode.c @@ -86,7 +86,7 @@ spufs_new_inode(struct super_block *sb, umode_t mode) inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); out: return inode; } diff --git a/arch/powerpc/platforms/powermac/time.c b/arch/powerpc/platforms/powermac/time.c index 4c5790aff1b5..8633891b7aa5 100644 --- a/arch/powerpc/platforms/powermac/time.c +++ b/arch/powerpc/platforms/powermac/time.c @@ -26,8 +26,8 @@ #include <linux/rtc.h> #include <linux/of_address.h> +#include <asm/early_ioremap.h> #include <asm/sections.h> -#include <asm/io.h> #include <asm/machdep.h> #include <asm/time.h> #include <asm/nvram.h> @@ -182,7 +182,7 @@ static int __init via_calibrate_decr(void) return 0; } of_node_put(vias); - via = ioremap(rsrc.start, resource_size(&rsrc)); + via = early_ioremap(rsrc.start, resource_size(&rsrc)); if (via == NULL) { printk(KERN_ERR "Failed to map VIA for timer calibration !\n"); return 0; @@ -207,7 +207,7 @@ static int __init via_calibrate_decr(void) ppc_tb_freq = (dstart - dend) * 100 / 6; - iounmap(via); + early_iounmap((void *)via, resource_size(&rsrc)); return 1; } diff --git a/arch/powerpc/sysdev/fsl_soc.c b/arch/powerpc/sysdev/fsl_soc.c index 68709743450e..c11771542bec 100644 --- a/arch/powerpc/sysdev/fsl_soc.c +++ b/arch/powerpc/sysdev/fsl_soc.c @@ -23,7 +23,6 @@ #include <linux/phy.h> #include <linux/spi/spi.h> #include <linux/fsl_devices.h> -#include <linux/fs_enet_pd.h> #include <linux/fs_uart_pd.h> #include <linux/reboot.h> @@ -37,8 +36,6 @@ #include <asm/cpm2.h> #include <asm/fsl_hcalls.h> /* For the Freescale hypervisor */ -extern void init_fcc_ioports(struct fs_platform_info*); -extern void init_fec_ioports(struct fs_platform_info*); extern void init_smc_ioports(struct fs_uart_platform_info*); static phys_addr_t immrbase = -1; diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6943d34c1ec1..b08d0e1a93b9 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -570,24 +570,30 @@ config TOOLCHAIN_HAS_ZIHINTPAUSE config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI def_bool y # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc - depends on AS_IS_GNU && AS_VERSION >= 23800 - help - Newer binutils versions default to ISA spec version 20191213 which - moves some instructions from the I extension to the Zicsr and Zifencei - extensions. + # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=98416dbb0a62579d4a7a4a76bab51b5b52fec2cd + depends on AS_IS_GNU && AS_VERSION >= 23600 + help + Binutils-2.38 and GCC-12.1.0 bumped the default ISA spec to the newer + 20191213 version, which moves some instructions from the I extension to + the Zicsr and Zifencei extensions. This requires explicitly specifying + Zicsr and Zifencei when binutils >= 2.38 or GCC >= 12.1.0. Zicsr + and Zifencei are supported in binutils from version 2.36 onwards. + To make life easier, and avoid forcing toolchains that default to a + newer ISA spec to version 2.2, relax the check to binutils >= 2.36. + For clang < 17 or GCC < 11.3.0, for which this is not possible or need + special treatment, this is dealt with in TOOLCHAIN_NEEDS_OLD_ISA_SPEC. config TOOLCHAIN_NEEDS_OLD_ISA_SPEC def_bool y depends on TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI # https://github.com/llvm/llvm-project/commit/22e199e6afb1263c943c0c0d4498694e15bf8a16 - depends on CC_IS_CLANG && CLANG_VERSION < 170000 - help - Certain versions of clang do not support zicsr and zifencei via -march - but newer versions of binutils require it for the reasons noted in the - help text of CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI. This - option causes an older ISA spec compatible with these older versions - of clang to be passed to GAS, which has the same result as passing zicsr - and zifencei to -march. + # https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=d29f5d6ab513c52fd872f532c492e35ae9fd6671 + depends on (CC_IS_CLANG && CLANG_VERSION < 170000) || (CC_IS_GCC && GCC_VERSION < 110300) + help + Certain versions of clang and GCC do not support zicsr and zifencei via + -march. This option causes an older ISA spec compatible with these older + versions of clang and GCC to be passed to GAS, which has the same result + as passing zicsr and zifencei to -march. config FPU bool "FPU support" diff --git a/arch/riscv/include/asm/acpi.h b/arch/riscv/include/asm/acpi.h index f71ce21ff684..d5604d2073bc 100644 --- a/arch/riscv/include/asm/acpi.h +++ b/arch/riscv/include/asm/acpi.h @@ -19,7 +19,7 @@ typedef u64 phys_cpuid_t; #define PHYS_CPUID_INVALID INVALID_HARTID /* ACPI table mapping after acpi_permanent_mmap is set */ -void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size); #define acpi_os_ioremap acpi_os_ioremap #define acpi_strict 1 /* No out-of-spec workarounds on RISC-V */ diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 0d8c92c5dfb7..c4dca559bb97 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -36,6 +36,10 @@ static inline void flush_dcache_page(struct page *page) #define flush_icache_user_page(vma, pg, addr, len) \ flush_icache_mm(vma->vm_mm, 0) +#ifdef CONFIG_64BIT +#define flush_cache_vmap(start, end) flush_tlb_kernel_range(start, end) +#endif + #ifndef CONFIG_SMP #define flush_icache_all() local_flush_icache_all() diff --git a/arch/riscv/include/asm/efi.h b/arch/riscv/include/asm/efi.h index 29e9a0d84b16..8a6a128ec57f 100644 --- a/arch/riscv/include/asm/efi.h +++ b/arch/riscv/include/asm/efi.h @@ -21,12 +21,6 @@ extern void efi_init(void); int efi_create_mapping(struct mm_struct *mm, efi_memory_desc_t *md); int efi_set_mapping_permissions(struct mm_struct *mm, efi_memory_desc_t *md, bool); -#define arch_efi_call_virt_setup() ({ \ - sync_kernel_mappings(efi_mm.pgd); \ - efi_virtmap_load(); \ - }) -#define arch_efi_call_virt_teardown() efi_virtmap_unload() - #define ARCH_EFI_IRQ_FLAGS_MASK (SR_IE | SR_SPIE) /* Load initrd anywhere in system RAM */ @@ -46,8 +40,8 @@ static inline unsigned long efi_get_kimg_min_align(void) #define EFI_KIMG_PREFERRED_ADDRESS efi_get_kimg_min_align() -void efi_virtmap_load(void); -void efi_virtmap_unload(void); +void arch_efi_call_virt_setup(void); +void arch_efi_call_virt_teardown(void); unsigned long stext_offset(void); diff --git a/arch/riscv/include/asm/insn.h b/arch/riscv/include/asm/insn.h index 4e1505cef8aa..fce00400c9bc 100644 --- a/arch/riscv/include/asm/insn.h +++ b/arch/riscv/include/asm/insn.h @@ -110,6 +110,7 @@ #define RVC_INSN_FUNCT4_OPOFF 12 #define RVC_INSN_FUNCT3_MASK GENMASK(15, 13) #define RVC_INSN_FUNCT3_OPOFF 13 +#define RVC_INSN_J_RS1_MASK GENMASK(11, 7) #define RVC_INSN_J_RS2_MASK GENMASK(6, 2) #define RVC_INSN_OPCODE_MASK GENMASK(1, 0) #define RVC_ENCODE_FUNCT3(f_) (RVC_FUNCT3_##f_ << RVC_INSN_FUNCT3_OPOFF) @@ -245,8 +246,6 @@ __RISCV_INSN_FUNCS(c_jal, RVC_MASK_C_JAL, RVC_MATCH_C_JAL) __RISCV_INSN_FUNCS(auipc, RVG_MASK_AUIPC, RVG_MATCH_AUIPC) __RISCV_INSN_FUNCS(jalr, RVG_MASK_JALR, RVG_MATCH_JALR) __RISCV_INSN_FUNCS(jal, RVG_MASK_JAL, RVG_MATCH_JAL) -__RISCV_INSN_FUNCS(c_jr, RVC_MASK_C_JR, RVC_MATCH_C_JR) -__RISCV_INSN_FUNCS(c_jalr, RVC_MASK_C_JALR, RVC_MATCH_C_JALR) __RISCV_INSN_FUNCS(c_j, RVC_MASK_C_J, RVC_MATCH_C_J) __RISCV_INSN_FUNCS(beq, RVG_MASK_BEQ, RVG_MATCH_BEQ) __RISCV_INSN_FUNCS(bne, RVG_MASK_BNE, RVG_MATCH_BNE) @@ -273,6 +272,18 @@ static __always_inline bool riscv_insn_is_branch(u32 code) return (code & RV_INSN_OPCODE_MASK) == RVG_OPCODE_BRANCH; } +static __always_inline bool riscv_insn_is_c_jr(u32 code) +{ + return (code & RVC_MASK_C_JR) == RVC_MATCH_C_JR && + (code & RVC_INSN_J_RS1_MASK) != 0; +} + +static __always_inline bool riscv_insn_is_c_jalr(u32 code) +{ + return (code & RVC_MASK_C_JALR) == RVC_MATCH_C_JALR && + (code & RVC_INSN_J_RS1_MASK) != 0; +} + #define RV_IMM_SIGN(x) (-(((x) >> 31) & 1)) #define RVC_IMM_SIGN(x) (-(((x) >> 12) & 1)) #define RV_X(X, s, mask) (((X) >> (s)) & (mask)) diff --git a/arch/riscv/include/asm/mmio.h b/arch/riscv/include/asm/mmio.h index aff6c33ab0c0..4c58ee7f95ec 100644 --- a/arch/riscv/include/asm/mmio.h +++ b/arch/riscv/include/asm/mmio.h @@ -101,9 +101,9 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) * Relaxed I/O memory access primitives. These follow the Device memory * ordering rules but do not guarantee any ordering relative to Normal memory * accesses. These are defined to order the indicated access (either a read or - * write) with all other I/O memory accesses. Since the platform specification - * defines that all I/O regions are strongly ordered on channel 2, no explicit - * fences are required to enforce this ordering. + * write) with all other I/O memory accesses to the same peripheral. Since the + * platform specification defines that all I/O regions are strongly ordered on + * channel 0, no explicit fences are required to enforce this ordering. */ /* FIXME: These are now the same as asm-generic */ #define __io_rbr() do {} while (0) @@ -125,14 +125,14 @@ static inline u64 __raw_readq(const volatile void __iomem *addr) #endif /* - * I/O memory access primitives. Reads are ordered relative to any - * following Normal memory access. Writes are ordered relative to any prior - * Normal memory access. The memory barriers here are necessary as RISC-V + * I/O memory access primitives. Reads are ordered relative to any following + * Normal memory read and delay() loop. Writes are ordered relative to any + * prior Normal memory write. The memory barriers here are necessary as RISC-V * doesn't define any ordering between the memory space and the I/O space. */ #define __io_br() do {} while (0) -#define __io_ar(v) __asm__ __volatile__ ("fence i,r" : : : "memory") -#define __io_bw() __asm__ __volatile__ ("fence w,o" : : : "memory") +#define __io_ar(v) ({ __asm__ __volatile__ ("fence i,ir" : : : "memory"); }) +#define __io_bw() ({ __asm__ __volatile__ ("fence w,o" : : : "memory"); }) #define __io_aw() mmiowb_set_pending() #define readb(c) ({ u8 __v; __io_br(); __v = readb_cpu(c); __io_ar(__v); __v; }) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index ac42e9121e52..a6f47c092bdc 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -188,6 +188,8 @@ extern struct pt_alloc_ops pt_ops __initdata; #define PAGE_KERNEL_IO __pgprot(_PAGE_IOREMAP) extern pgd_t swapper_pg_dir[]; +extern pgd_t trampoline_pg_dir[]; +extern pgd_t early_pg_dir[]; #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline int pmd_present(pmd_t pmd) diff --git a/arch/riscv/include/asm/vector.h b/arch/riscv/include/asm/vector.h index 3d78930cab51..c5ee07b3df07 100644 --- a/arch/riscv/include/asm/vector.h +++ b/arch/riscv/include/asm/vector.h @@ -70,8 +70,9 @@ static __always_inline void __vstate_csr_save(struct __riscv_v_ext_state *dest) "csrr %1, " __stringify(CSR_VTYPE) "\n\t" "csrr %2, " __stringify(CSR_VL) "\n\t" "csrr %3, " __stringify(CSR_VCSR) "\n\t" + "csrr %4, " __stringify(CSR_VLENB) "\n\t" : "=r" (dest->vstart), "=r" (dest->vtype), "=r" (dest->vl), - "=r" (dest->vcsr) : :); + "=r" (dest->vcsr), "=r" (dest->vlenb) : :); } static __always_inline void __vstate_csr_restore(struct __riscv_v_ext_state *src) diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h index 58d3e447f191..924d01b56c9a 100644 --- a/arch/riscv/include/asm/vmalloc.h +++ b/arch/riscv/include/asm/vmalloc.h @@ -3,12 +3,14 @@ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +extern bool pgtable_l4_enabled, pgtable_l5_enabled; + #define IOREMAP_MAX_ORDER (PUD_SHIFT) #define arch_vmap_pud_supported arch_vmap_pud_supported static inline bool arch_vmap_pud_supported(pgprot_t prot) { - return true; + return pgtable_l4_enabled || pgtable_l5_enabled; } #define arch_vmap_pmd_supported arch_vmap_pmd_supported diff --git a/arch/riscv/include/uapi/asm/bitsperlong.h b/arch/riscv/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000000..7d0b32e3b701 --- /dev/null +++ b/arch/riscv/include/uapi/asm/bitsperlong.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */ +/* + * Copyright (C) 2012 ARM Ltd. + * Copyright (C) 2015 Regents of the University of California + */ + +#ifndef _UAPI_ASM_RISCV_BITSPERLONG_H +#define _UAPI_ASM_RISCV_BITSPERLONG_H + +#define __BITS_PER_LONG (__SIZEOF_POINTER__ * 8) + +#include <asm-generic/bitsperlong.h> + +#endif /* _UAPI_ASM_RISCV_BITSPERLONG_H */ diff --git a/arch/riscv/include/uapi/asm/ptrace.h b/arch/riscv/include/uapi/asm/ptrace.h index e17c550986a6..283800130614 100644 --- a/arch/riscv/include/uapi/asm/ptrace.h +++ b/arch/riscv/include/uapi/asm/ptrace.h @@ -97,6 +97,7 @@ struct __riscv_v_ext_state { unsigned long vl; unsigned long vtype; unsigned long vcsr; + unsigned long vlenb; void *datap; /* * In signal handler, datap will be set a correct user stack offset diff --git a/arch/riscv/kernel/acpi.c b/arch/riscv/kernel/acpi.c index 5ee03ebab80e..56cb2c986c48 100644 --- a/arch/riscv/kernel/acpi.c +++ b/arch/riscv/kernel/acpi.c @@ -215,9 +215,9 @@ void __init __acpi_unmap_table(void __iomem *map, unsigned long size) early_iounmap(map, size); } -void *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) +void __iomem *acpi_os_ioremap(acpi_physical_address phys, acpi_size size) { - return memremap(phys, size, MEMREMAP_WB); + return (void __iomem *)memremap(phys, size, MEMREMAP_WB); } #ifdef CONFIG_PCI diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 189345773e7e..b86e5e2c3aea 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -11,7 +11,13 @@ compat_vdso-syms += flush_icache COMPAT_CC := $(CC) COMPAT_LD := $(LD) -COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 +# binutils 2.35 does not support the zifencei extension, but in the ISA +# spec 20191213, G stands for IMAFD_ZICSR_ZIFENCEI. +ifdef CONFIG_TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI + COMPAT_CC_FLAGS := -march=rv32g -mabi=ilp32 +else + COMPAT_CC_FLAGS := -march=rv32imafd -mabi=ilp32 +endif COMPAT_LD_FLAGS := -melf32lriscv # Disable attributes, as they're useless and break the build. diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index a2fc952318e9..35b854cf078e 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -17,6 +17,11 @@ #include <asm/smp.h> #include <asm/pgtable.h> +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpuid_to_hartid_map(cpu); +} + /* * Returns the hart ID of the given device tree node, or -ENODEV if the node * isn't an enabled and valid RISC-V hart node. diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c index b351a3c01355..55f1d7856b54 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/crash_core.c @@ -18,4 +18,6 @@ void arch_crash_save_vmcoreinfo(void) vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); #endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); + vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", + kernel_map.va_kernel_pa_offset); } diff --git a/arch/riscv/kernel/elf_kexec.c b/arch/riscv/kernel/elf_kexec.c index 5372b708fae2..c08bb5c3b385 100644 --- a/arch/riscv/kernel/elf_kexec.c +++ b/arch/riscv/kernel/elf_kexec.c @@ -281,7 +281,7 @@ static void *elf_kexec_load(struct kimage *image, char *kernel_buf, kbuf.buffer = initrd; kbuf.bufsz = kbuf.memsz = initrd_len; kbuf.buf_align = PAGE_SIZE; - kbuf.top_down = false; + kbuf.top_down = true; kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; ret = kexec_add_buffer(&kbuf); if (ret) @@ -425,6 +425,7 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi, * sym, instead of searching the whole relsec. */ case R_RISCV_PCREL_HI20: + case R_RISCV_CALL_PLT: case R_RISCV_CALL: *(u64 *)loc = CLEAN_IMM(UITYPE, *(u64 *)loc) | ENCODE_UJTYPE_IMM(val - addr); diff --git a/arch/riscv/kernel/irq.c b/arch/riscv/kernel/irq.c index d0577cc6a081..a8efa053c4a5 100644 --- a/arch/riscv/kernel/irq.c +++ b/arch/riscv/kernel/irq.c @@ -84,6 +84,9 @@ void do_softirq_own_stack(void) : [sp] "r" (sp) : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", +#ifndef CONFIG_FRAME_POINTER + "s0", +#endif "memory"); } else #endif diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 1d572cf3140f..487303e3ef22 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -25,9 +25,6 @@ enum riscv_regset { #ifdef CONFIG_FPU REGSET_F, #endif -#ifdef CONFIG_RISCV_ISA_V - REGSET_V, -#endif }; static int riscv_gpr_get(struct task_struct *target, @@ -84,61 +81,6 @@ static int riscv_fpr_set(struct task_struct *target, } #endif -#ifdef CONFIG_RISCV_ISA_V -static int riscv_vr_get(struct task_struct *target, - const struct user_regset *regset, - struct membuf to) -{ - struct __riscv_v_ext_state *vstate = &target->thread.vstate; - - if (!riscv_v_vstate_query(task_pt_regs(target))) - return -EINVAL; - - /* - * Ensure the vector registers have been saved to the memory before - * copying them to membuf. - */ - if (target == current) - riscv_v_vstate_save(current, task_pt_regs(current)); - - /* Copy vector header from vstate. */ - membuf_write(&to, vstate, offsetof(struct __riscv_v_ext_state, datap)); - membuf_zero(&to, sizeof(vstate->datap)); - - /* Copy all the vector registers from vstate. */ - return membuf_write(&to, vstate->datap, riscv_v_vsize); -} - -static int riscv_vr_set(struct task_struct *target, - const struct user_regset *regset, - unsigned int pos, unsigned int count, - const void *kbuf, const void __user *ubuf) -{ - int ret, size; - struct __riscv_v_ext_state *vstate = &target->thread.vstate; - - if (!riscv_v_vstate_query(task_pt_regs(target))) - return -EINVAL; - - /* Copy rest of the vstate except datap */ - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate, 0, - offsetof(struct __riscv_v_ext_state, datap)); - if (unlikely(ret)) - return ret; - - /* Skip copy datap. */ - size = sizeof(vstate->datap); - count -= size; - ubuf += size; - - /* Copy all the vector registers. */ - pos = 0; - ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, vstate->datap, - 0, riscv_v_vsize); - return ret; -} -#endif - static const struct user_regset riscv_user_regset[] = { [REGSET_X] = { .core_note_type = NT_PRSTATUS, @@ -158,17 +100,6 @@ static const struct user_regset riscv_user_regset[] = { .set = riscv_fpr_set, }, #endif -#ifdef CONFIG_RISCV_ISA_V - [REGSET_V] = { - .core_note_type = NT_RISCV_VECTOR, - .align = 16, - .n = ((32 * RISCV_MAX_VLENB) + - sizeof(struct __riscv_v_ext_state)) / sizeof(__u32), - .size = sizeof(__u32), - .regset_get = riscv_vr_get, - .set = riscv_vr_set, - }, -#endif }; static const struct user_regset_view riscv_user_native_view = { diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 85bbce0f758c..40420afbb1a0 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -61,11 +61,6 @@ int riscv_hartid_to_cpuid(unsigned long hartid) return -ENOENT; } -bool arch_match_cpu_phys_id(int cpu, u64 phys_id) -{ - return phys_id == cpuid_to_hartid_map(cpu); -} - static void ipi_stop(void) { set_cpu_online(smp_processor_id(), false); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f910dfccbf5d..f798c853bede 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -297,7 +297,7 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) { if (user_mode(regs)) { - ulong syscall = regs->a7; + long syscall = regs->a7; regs->epc += 4; regs->orig_a0 = regs->a0; @@ -306,9 +306,9 @@ asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) syscall = syscall_enter_from_user_mode(regs, syscall); - if (syscall < NR_syscalls) + if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else + else if (syscall != -1) regs->a0 = -ENOSYS; syscall_exit_to_user_mode(regs); @@ -372,6 +372,9 @@ asmlinkage void noinstr do_irq(struct pt_regs *regs) : [sp] "r" (sp), [regs] "r" (regs) : "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", "t4", "t5", "t6", +#ifndef CONFIG_FRAME_POINTER + "s0", +#endif "memory"); } else #endif diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index ec486e5369d9..09b47ebacf2e 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -17,8 +17,11 @@ ENTRY(__asm_copy_from_user) li t6, SR_SUM csrs CSR_STATUS, t6 - /* Save for return value */ - mv t5, a2 + /* + * Save the terminal address which will be used to compute the number + * of bytes copied in case of a fixup exception. + */ + add t5, a0, a2 /* * Register allocation for code below: @@ -176,7 +179,7 @@ ENTRY(__asm_copy_from_user) 10: /* Disable access to user memory */ csrc CSR_STATUS, t6 - mv a0, t5 + sub a0, t5, a0 ret ENDPROC(__asm_copy_to_user) ENDPROC(__asm_copy_from_user) @@ -228,7 +231,7 @@ ENTRY(__clear_user) 11: /* Disable access to user memory */ csrc CSR_STATUS, t6 - mv a0, a1 + sub a0, a3, a0 ret ENDPROC(__clear_user) EXPORT_SYMBOL(__clear_user) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 430a3d05a841..c07ff3e2c90a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -26,12 +26,13 @@ #include <linux/kfence.h> #include <asm/fixmap.h> -#include <asm/tlbflush.h> -#include <asm/sections.h> -#include <asm/soc.h> #include <asm/io.h> -#include <asm/ptdump.h> #include <asm/numa.h> +#include <asm/pgtable.h> +#include <asm/ptdump.h> +#include <asm/sections.h> +#include <asm/soc.h> +#include <asm/tlbflush.h> #include "../kernel/head.h" @@ -214,8 +215,13 @@ static void __init setup_bootmem(void) memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); phys_ram_end = memblock_end_of_DRAM(); + + /* + * Make sure we align the start of the memory on a PMD boundary so that + * at worst, we map the linear mapping with PMD mappings. + */ if (!IS_ENABLED(CONFIG_XIP_KERNEL)) - phys_ram_base = memblock_start_of_DRAM(); + phys_ram_base = memblock_start_of_DRAM() & PMD_MASK; /* * In 64-bit, any use of __va/__pa before this point is wrong as we diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 8fc0efcf905c..a01bc15dce24 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -22,7 +22,6 @@ * region is not and then we have to go down to the PUD level. */ -extern pgd_t early_pg_dir[PTRS_PER_PGD]; pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss; p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss; pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss; diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index 2717f5490428..d21c6c92a683 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -431,11 +431,21 @@ static inline u32 rv_mulhu(u8 rd, u8 rs1, u8 rs2) return rv_r_insn(1, rs2, rs1, 3, rd, 0x33); } +static inline u32 rv_div(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 4, rd, 0x33); +} + static inline u32 rv_divu(u8 rd, u8 rs1, u8 rs2) { return rv_r_insn(1, rs2, rs1, 5, rd, 0x33); } +static inline u32 rv_rem(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 6, rd, 0x33); +} + static inline u32 rv_remu(u8 rd, u8 rs1, u8 rs2) { return rv_r_insn(1, rs2, rs1, 7, rd, 0x33); @@ -501,6 +511,16 @@ static inline u32 rv_ble(u8 rs1, u8 rs2, u16 imm12_1) return rv_bge(rs2, rs1, imm12_1); } +static inline u32 rv_lb(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 0, rd, 0x03); +} + +static inline u32 rv_lh(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 1, rd, 0x03); +} + static inline u32 rv_lw(u8 rd, u16 imm11_0, u8 rs1) { return rv_i_insn(imm11_0, rs1, 2, rd, 0x03); @@ -766,11 +786,21 @@ static inline u32 rv_mulw(u8 rd, u8 rs1, u8 rs2) return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b); } +static inline u32 rv_divw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 4, rd, 0x3b); +} + static inline u32 rv_divuw(u8 rd, u8 rs1, u8 rs2) { return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b); } +static inline u32 rv_remw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 6, rd, 0x3b); +} + static inline u32 rv_remuw(u8 rd, u8 rs1, u8 rs2) { return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b); diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index c648864c8cd1..8423f4ddf8f5 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -13,6 +13,8 @@ #include <asm/patch.h> #include "bpf_jit.h" +#define RV_FENTRY_NINSNS 2 + #define RV_REG_TCC RV_REG_A6 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ @@ -241,7 +243,7 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) if (!is_tail_call) emit_mv(RV_REG_A0, RV_REG_A5, ctx); emit_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, - is_tail_call ? 20 : 0, /* skip reserved nops and TCC init */ + is_tail_call ? (RV_FENTRY_NINSNS + 1) * 4 : 0, /* skip reserved nops and TCC init */ ctx); } @@ -578,7 +580,8 @@ static int add_exception_handler(const struct bpf_insn *insn, unsigned long pc; off_t offset; - if (!ctx->insns || !ctx->prog->aux->extable || BPF_MODE(insn->code) != BPF_PROBE_MEM) + if (!ctx->insns || !ctx->prog->aux->extable || + (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX)) return 0; if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries)) @@ -618,32 +621,7 @@ static int add_exception_handler(const struct bpf_insn *insn, return 0; } -static int gen_call_or_nops(void *target, void *ip, u32 *insns) -{ - s64 rvoff; - int i, ret; - struct rv_jit_context ctx; - - ctx.ninsns = 0; - ctx.insns = (u16 *)insns; - - if (!target) { - for (i = 0; i < 4; i++) - emit(rv_nop(), &ctx); - return 0; - } - - rvoff = (s64)(target - (ip + 4)); - emit(rv_sd(RV_REG_SP, -8, RV_REG_RA), &ctx); - ret = emit_jump_and_link(RV_REG_RA, rvoff, false, &ctx); - if (ret) - return ret; - emit(rv_ld(RV_REG_RA, -8, RV_REG_SP), &ctx); - - return 0; -} - -static int gen_jump_or_nops(void *target, void *ip, u32 *insns) +static int gen_jump_or_nops(void *target, void *ip, u32 *insns, bool is_call) { s64 rvoff; struct rv_jit_context ctx; @@ -658,38 +636,35 @@ static int gen_jump_or_nops(void *target, void *ip, u32 *insns) } rvoff = (s64)(target - ip); - return emit_jump_and_link(RV_REG_ZERO, rvoff, false, &ctx); + return emit_jump_and_link(is_call ? RV_REG_T0 : RV_REG_ZERO, rvoff, false, &ctx); } int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, void *old_addr, void *new_addr) { - u32 old_insns[4], new_insns[4]; + u32 old_insns[RV_FENTRY_NINSNS], new_insns[RV_FENTRY_NINSNS]; bool is_call = poke_type == BPF_MOD_CALL; - int (*gen_insns)(void *target, void *ip, u32 *insns); - int ninsns = is_call ? 4 : 2; int ret; - if (!is_bpf_text_address((unsigned long)ip)) + if (!is_kernel_text((unsigned long)ip) && + !is_bpf_text_address((unsigned long)ip)) return -ENOTSUPP; - gen_insns = is_call ? gen_call_or_nops : gen_jump_or_nops; - - ret = gen_insns(old_addr, ip, old_insns); + ret = gen_jump_or_nops(old_addr, ip, old_insns, is_call); if (ret) return ret; - if (memcmp(ip, old_insns, ninsns * 4)) + if (memcmp(ip, old_insns, RV_FENTRY_NINSNS * 4)) return -EFAULT; - ret = gen_insns(new_addr, ip, new_insns); + ret = gen_jump_or_nops(new_addr, ip, new_insns, is_call); if (ret) return ret; cpus_read_lock(); mutex_lock(&text_mutex); - if (memcmp(ip, new_insns, ninsns * 4)) - ret = patch_text(ip, new_insns, ninsns); + if (memcmp(ip, new_insns, RV_FENTRY_NINSNS * 4)) + ret = patch_text(ip, new_insns, RV_FENTRY_NINSNS); mutex_unlock(&text_mutex); cpus_read_unlock(); @@ -787,8 +762,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, int i, ret, offset; int *branches_off = NULL; int stack_size = 0, nregs = m->nr_args; - int retaddr_off, fp_off, retval_off, args_off; - int nregs_off, ip_off, run_ctx_off, sreg_off; + int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; @@ -796,13 +770,27 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, bool save_ret; u32 insn; - /* Generated trampoline stack layout: + /* Two types of generated trampoline stack layout: * - * FP - 8 [ RA of parent func ] return address of parent + * 1. trampoline called from function entry + * -------------------------------------- + * FP + 8 [ RA to parent func ] return address to parent * function - * FP - retaddr_off [ RA of traced func ] return address of traced + * FP + 0 [ FP of parent func ] frame pointer of parent * function - * FP - fp_off [ FP of parent func ] + * FP - 8 [ T0 to traced func ] return address of traced + * function + * FP - 16 [ FP of traced func ] frame pointer of traced + * function + * -------------------------------------- + * + * 2. trampoline called directly + * -------------------------------------- + * FP - 8 [ RA to caller func ] return address to caller + * function + * FP - 16 [ FP of caller func ] frame pointer of caller + * function + * -------------------------------------- * * FP - retval_off [ return value ] BPF_TRAMP_F_CALL_ORIG or * BPF_TRAMP_F_RET_FENTRY_RET @@ -833,14 +821,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, if (nregs > 8) return -ENOTSUPP; - /* room for parent function return address */ - stack_size += 8; - - stack_size += 8; - retaddr_off = stack_size; - - stack_size += 8; - fp_off = stack_size; + /* room of trampoline frame to store return address and frame pointer */ + stack_size += 16; save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); if (save_ret) { @@ -867,12 +849,29 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, stack_size = round_up(stack_size, 16); - emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx); - - emit_sd(RV_REG_SP, stack_size - retaddr_off, RV_REG_RA, ctx); - emit_sd(RV_REG_SP, stack_size - fp_off, RV_REG_FP, ctx); - - emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx); + if (func_addr) { + /* For the trampoline called from function entry, + * the frame of traced function and the frame of + * trampoline need to be considered. + */ + emit_addi(RV_REG_SP, RV_REG_SP, -16, ctx); + emit_sd(RV_REG_SP, 8, RV_REG_RA, ctx); + emit_sd(RV_REG_SP, 0, RV_REG_FP, ctx); + emit_addi(RV_REG_FP, RV_REG_SP, 16, ctx); + + emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx); + emit_sd(RV_REG_SP, stack_size - 8, RV_REG_T0, ctx); + emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx); + emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx); + } else { + /* For the trampoline called directly, just handle + * the frame of trampoline. + */ + emit_addi(RV_REG_SP, RV_REG_SP, -stack_size, ctx); + emit_sd(RV_REG_SP, stack_size - 8, RV_REG_RA, ctx); + emit_sd(RV_REG_SP, stack_size - 16, RV_REG_FP, ctx); + emit_addi(RV_REG_FP, RV_REG_SP, stack_size, ctx); + } /* callee saved register S1 to pass start time */ emit_sd(RV_REG_FP, -sreg_off, RV_REG_S1, ctx); @@ -890,7 +889,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, /* skip to actual body of traced function */ if (flags & BPF_TRAMP_F_SKIP_FRAME) - orig_call += 16; + orig_call += RV_FENTRY_NINSNS * 4; if (flags & BPF_TRAMP_F_CALL_ORIG) { emit_imm(RV_REG_A0, (const s64)im, ctx); @@ -967,17 +966,30 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, emit_ld(RV_REG_S1, -sreg_off, RV_REG_FP, ctx); - if (flags & BPF_TRAMP_F_SKIP_FRAME) - /* return address of parent function */ - emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx); - else - /* return address of traced function */ - emit_ld(RV_REG_RA, stack_size - retaddr_off, RV_REG_SP, ctx); + if (func_addr) { + /* trampoline called from function entry */ + emit_ld(RV_REG_T0, stack_size - 8, RV_REG_SP, ctx); + emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx); + emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx); - emit_ld(RV_REG_FP, stack_size - fp_off, RV_REG_SP, ctx); - emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx); + emit_ld(RV_REG_RA, 8, RV_REG_SP, ctx); + emit_ld(RV_REG_FP, 0, RV_REG_SP, ctx); + emit_addi(RV_REG_SP, RV_REG_SP, 16, ctx); - emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx); + if (flags & BPF_TRAMP_F_SKIP_FRAME) + /* return to parent function */ + emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx); + else + /* return to traced function */ + emit_jalr(RV_REG_ZERO, RV_REG_T0, 0, ctx); + } else { + /* trampoline called directly */ + emit_ld(RV_REG_RA, stack_size - 8, RV_REG_SP, ctx); + emit_ld(RV_REG_FP, stack_size - 16, RV_REG_SP, ctx); + emit_addi(RV_REG_SP, RV_REG_SP, stack_size, ctx); + + emit_jalr(RV_REG_ZERO, RV_REG_RA, 0, ctx); + } ret = ctx->ninsns; out: @@ -1035,7 +1047,19 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_zext_32(rd, ctx); break; } - emit_mv(rd, rs, ctx); + switch (insn->off) { + case 0: + emit_mv(rd, rs, ctx); + break; + case 8: + case 16: + emit_slli(RV_REG_T1, rs, 64 - insn->off, ctx); + emit_srai(rd, RV_REG_T1, 64 - insn->off, ctx); + break; + case 32: + emit_addiw(rd, rs, 0, ctx); + break; + } if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -1083,13 +1107,19 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, break; case BPF_ALU | BPF_DIV | BPF_X: case BPF_ALU64 | BPF_DIV | BPF_X: - emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); + if (off) + emit(is64 ? rv_div(rd, rd, rs) : rv_divw(rd, rd, rs), ctx); + else + emit(is64 ? rv_divu(rd, rd, rs) : rv_divuw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU64 | BPF_MOD | BPF_X: - emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); + if (off) + emit(is64 ? rv_rem(rd, rd, rs) : rv_remw(rd, rd, rs), ctx); + else + emit(is64 ? rv_remu(rd, rd, rs) : rv_remuw(rd, rd, rs), ctx); if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -1138,6 +1168,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, break; case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: emit_li(RV_REG_T2, 0, ctx); emit_andi(RV_REG_T1, rd, 0xff, ctx); @@ -1260,16 +1291,24 @@ out_be: case BPF_ALU | BPF_DIV | BPF_K: case BPF_ALU64 | BPF_DIV | BPF_K: emit_imm(RV_REG_T1, imm, ctx); - emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : - rv_divuw(rd, rd, RV_REG_T1), ctx); + if (off) + emit(is64 ? rv_div(rd, rd, RV_REG_T1) : + rv_divw(rd, rd, RV_REG_T1), ctx); + else + emit(is64 ? rv_divu(rd, rd, RV_REG_T1) : + rv_divuw(rd, rd, RV_REG_T1), ctx); if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; case BPF_ALU | BPF_MOD | BPF_K: case BPF_ALU64 | BPF_MOD | BPF_K: emit_imm(RV_REG_T1, imm, ctx); - emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : - rv_remuw(rd, rd, RV_REG_T1), ctx); + if (off) + emit(is64 ? rv_rem(rd, rd, RV_REG_T1) : + rv_remw(rd, rd, RV_REG_T1), ctx); + else + emit(is64 ? rv_remu(rd, rd, RV_REG_T1) : + rv_remuw(rd, rd, RV_REG_T1), ctx); if (!is64 && !aux->verifier_zext) emit_zext_32(rd, ctx); break; @@ -1303,7 +1342,11 @@ out_be: /* JUMP off */ case BPF_JMP | BPF_JA: - rvoff = rv_offset(i, off, ctx); + case BPF_JMP32 | BPF_JA: + if (BPF_CLASS(code) == BPF_JMP) + rvoff = rv_offset(i, off, ctx); + else + rvoff = rv_offset(i, imm, ctx); ret = emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); if (ret) return ret; @@ -1475,7 +1518,7 @@ out_be: return 1; } - /* LDX: dst = *(size *)(src + off) */ + /* LDX: dst = *(unsigned size *)(src + off) */ case BPF_LDX | BPF_MEM | BPF_B: case BPF_LDX | BPF_MEM | BPF_H: case BPF_LDX | BPF_MEM | BPF_W: @@ -1484,14 +1527,28 @@ out_be: case BPF_LDX | BPF_PROBE_MEM | BPF_H: case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* LDSX: dst = *(signed size *)(src + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: { int insn_len, insns_start; + bool sign_ext; + + sign_ext = BPF_MODE(insn->code) == BPF_MEMSX || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX; switch (BPF_SIZE(code)) { case BPF_B: if (is_12b_int(off)) { insns_start = ctx->ninsns; - emit(rv_lbu(rd, off, rs), ctx); + if (sign_ext) + emit(rv_lb(rd, off, rs), ctx); + else + emit(rv_lbu(rd, off, rs), ctx); insn_len = ctx->ninsns - insns_start; break; } @@ -1499,15 +1556,19 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); insns_start = ctx->ninsns; - emit(rv_lbu(rd, 0, RV_REG_T1), ctx); + if (sign_ext) + emit(rv_lb(rd, 0, RV_REG_T1), ctx); + else + emit(rv_lbu(rd, 0, RV_REG_T1), ctx); insn_len = ctx->ninsns - insns_start; - if (insn_is_zext(&insn[1])) - return 1; break; case BPF_H: if (is_12b_int(off)) { insns_start = ctx->ninsns; - emit(rv_lhu(rd, off, rs), ctx); + if (sign_ext) + emit(rv_lh(rd, off, rs), ctx); + else + emit(rv_lhu(rd, off, rs), ctx); insn_len = ctx->ninsns - insns_start; break; } @@ -1515,15 +1576,19 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); insns_start = ctx->ninsns; - emit(rv_lhu(rd, 0, RV_REG_T1), ctx); + if (sign_ext) + emit(rv_lh(rd, 0, RV_REG_T1), ctx); + else + emit(rv_lhu(rd, 0, RV_REG_T1), ctx); insn_len = ctx->ninsns - insns_start; - if (insn_is_zext(&insn[1])) - return 1; break; case BPF_W: if (is_12b_int(off)) { insns_start = ctx->ninsns; - emit(rv_lwu(rd, off, rs), ctx); + if (sign_ext) + emit(rv_lw(rd, off, rs), ctx); + else + emit(rv_lwu(rd, off, rs), ctx); insn_len = ctx->ninsns - insns_start; break; } @@ -1531,10 +1596,11 @@ out_be: emit_imm(RV_REG_T1, off, ctx); emit_add(RV_REG_T1, RV_REG_T1, rs, ctx); insns_start = ctx->ninsns; - emit(rv_lwu(rd, 0, RV_REG_T1), ctx); + if (sign_ext) + emit(rv_lw(rd, 0, RV_REG_T1), ctx); + else + emit(rv_lwu(rd, 0, RV_REG_T1), ctx); insn_len = ctx->ninsns - insns_start; - if (insn_is_zext(&insn[1])) - return 1; break; case BPF_DW: if (is_12b_int(off)) { @@ -1555,6 +1621,9 @@ out_be: ret = add_exception_handler(insn, ctx, rd, insn_len); if (ret) return ret; + + if (BPF_SIZE(code) != BPF_DW && insn_is_zext(&insn[1])) + return 1; break; } /* speculation barrier */ @@ -1691,8 +1760,8 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx) store_offset = stack_adjust - 8; - /* reserve 4 nop insns */ - for (i = 0; i < 4; i++) + /* nops reserved for auipc+jalr pair */ + for (i = 0; i < RV_FENTRY_NINSNS; i++) emit(rv_nop(), ctx); /* First instruction is always setting the tail-call-counter diff --git a/arch/s390/Kbuild b/arch/s390/Kbuild index 76e362277179..8e4d74f51115 100644 --- a/arch/s390/Kbuild +++ b/arch/s390/Kbuild @@ -3,7 +3,7 @@ obj-y += kernel/ obj-y += mm/ obj-$(CONFIG_KVM) += kvm/ obj-y += crypto/ -obj-$(CONFIG_S390_HYPFS_FS) += hypfs/ +obj-$(CONFIG_S390_HYPFS) += hypfs/ obj-$(CONFIG_APPLDATA_BASE) += appldata/ obj-y += net/ obj-$(CONFIG_PCI) += pci/ diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 8ff6d1c21e38..4f364cfe9dd0 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -175,6 +175,7 @@ config S390 select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_ARG_ACCESS_API select HAVE_FUNCTION_ERROR_INJECTION + select HAVE_FUNCTION_GRAPH_RETVAL select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER select HAVE_GCC_PLUGINS @@ -513,6 +514,17 @@ config KEXEC_SIG verification for the corresponding kernel image type being loaded in order for this to work. +config CERT_STORE + bool "Get user certificates via DIAG320" + depends on KEYS + select CRYPTO_LIB_SHA256 + help + Enable this option if you want to access user-provided secure boot + certificates via DIAG 0x320. + + These certificates will be made available via the keyring named + 'cert_store'. + config KERNEL_NOBP def_bool n prompt "Enable modified branch prediction for the kernel by default" @@ -744,9 +756,9 @@ config CRASH_DUMP Crash dump kernels are loaded in the main kernel with kexec-tools into a specially reserved region and then later executed after a crash by kdump/kexec. - Refer to <file:Documentation/s390/zfcpdump.rst> for more details on this. + Refer to <file:Documentation/arch/s390/zfcpdump.rst> for more details on this. This option also enables s390 zfcpdump. - See also <file:Documentation/s390/zfcpdump.rst> + See also <file:Documentation/arch/s390/zfcpdump.rst> endmenu @@ -868,13 +880,24 @@ config APPLDATA_NET_SUM This can also be compiled as a module, which will be called appldata_net_sum.o. -config S390_HYPFS_FS +config S390_HYPFS def_bool y + prompt "s390 hypervisor information" + help + This provides several binary files at (debugfs)/s390_hypfs/ to + provide accounting information in an s390 hypervisor environment. + +config S390_HYPFS_FS + def_bool n prompt "s390 hypervisor file system support" select SYS_HYPERVISOR + depends on S390_HYPFS help This is a virtual file system intended to provide accounting - information in an s390 hypervisor environment. + information in an s390 hypervisor environment. This file system + is deprecated and should not be used. + + Say N if you are unsure. source "arch/s390/kvm/Kconfig" diff --git a/arch/s390/Makefile b/arch/s390/Makefile index 5ed242897b0d..a53a36ee0731 100644 --- a/arch/s390/Makefile +++ b/arch/s390/Makefile @@ -119,7 +119,6 @@ export KBUILD_CFLAGS_DECOMPRESSOR OBJCOPYFLAGS := -O binary libs-y += arch/s390/lib/ -drivers-y += drivers/s390/ boot := arch/s390/boot syscalls := arch/s390/kernel/syscalls diff --git a/arch/s390/boot/startup.c b/arch/s390/boot/startup.c index 64bd7ac3e35d..b9681cb22753 100644 --- a/arch/s390/boot/startup.c +++ b/arch/s390/boot/startup.c @@ -27,6 +27,7 @@ struct page *__bootdata_preserved(vmemmap); unsigned long __bootdata_preserved(vmemmap_size); unsigned long __bootdata_preserved(MODULES_VADDR); unsigned long __bootdata_preserved(MODULES_END); +unsigned long __bootdata_preserved(max_mappable); unsigned long __bootdata(ident_map_size); u64 __bootdata_preserved(stfle_fac_list[16]); @@ -176,6 +177,7 @@ static unsigned long setup_kernel_memory_layout(void) unsigned long asce_limit; unsigned long rte_size; unsigned long pages; + unsigned long vsize; unsigned long vmax; pages = ident_map_size / PAGE_SIZE; @@ -183,19 +185,19 @@ static unsigned long setup_kernel_memory_layout(void) vmemmap_size = SECTION_ALIGN_UP(pages) * sizeof(struct page); /* choose kernel address space layout: 4 or 3 levels. */ - vmemmap_start = round_up(ident_map_size, _REGION3_SIZE); - if (IS_ENABLED(CONFIG_KASAN) || - vmalloc_size > _REGION2_SIZE || - vmemmap_start + vmemmap_size + vmalloc_size + MODULES_LEN > - _REGION2_SIZE) { + vsize = round_up(ident_map_size, _REGION3_SIZE) + vmemmap_size + + MODULES_LEN + MEMCPY_REAL_SIZE + ABS_LOWCORE_MAP_SIZE; + vsize = size_add(vsize, vmalloc_size); + if (IS_ENABLED(CONFIG_KASAN) || (vsize > _REGION2_SIZE)) { asce_limit = _REGION1_SIZE; rte_size = _REGION2_SIZE; } else { asce_limit = _REGION2_SIZE; rte_size = _REGION3_SIZE; } + /* - * forcing modules and vmalloc area under the ultravisor + * Forcing modules and vmalloc area under the ultravisor * secure storage limit, so that any vmalloc allocation * we do could be used to back secure guest storage. */ @@ -204,7 +206,7 @@ static unsigned long setup_kernel_memory_layout(void) /* force vmalloc and modules below kasan shadow */ vmax = min(vmax, KASAN_SHADOW_START); #endif - __memcpy_real_area = round_down(vmax - PAGE_SIZE, PAGE_SIZE); + __memcpy_real_area = round_down(vmax - MEMCPY_REAL_SIZE, PAGE_SIZE); __abs_lowcore = round_down(__memcpy_real_area - ABS_LOWCORE_MAP_SIZE, sizeof(struct lowcore)); MODULES_END = round_down(__abs_lowcore, _SEGMENT_SIZE); @@ -220,8 +222,9 @@ static unsigned long setup_kernel_memory_layout(void) pages = SECTION_ALIGN_UP(pages); /* keep vmemmap_start aligned to a top level region table entry */ vmemmap_start = round_down(VMALLOC_START - pages * sizeof(struct page), rte_size); - /* vmemmap_start is the future VMEM_MAX_PHYS, make sure it is within MAX_PHYSMEM */ vmemmap_start = min(vmemmap_start, 1UL << MAX_PHYSMEM_BITS); + /* maximum mappable address as seen by arch_get_mappable_range() */ + max_mappable = vmemmap_start; /* make sure identity map doesn't overlay with vmemmap */ ident_map_size = min(ident_map_size, vmemmap_start); vmemmap_size = SECTION_ALIGN_UP(ident_map_size / PAGE_SIZE) * sizeof(struct page); @@ -286,8 +289,9 @@ void startup_kernel(void) setup_lpp(); safe_addr = mem_safe_offset(); + /* - * reserve decompressor memory together with decompression heap, buffer and + * Reserve decompressor memory together with decompression heap, buffer and * memory which might be occupied by uncompressed kernel at default 1Mb * position (if KASLR is off or failed). */ diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index d03d4cb9332c..af2fbe48e16c 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -116,7 +116,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_NET_TC_SKB_EXT=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -193,6 +192,7 @@ CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m +CONFIG_NETFILTER_XTABLES_COMPAT=y CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -379,6 +379,7 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_GATE=m +CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m @@ -395,6 +396,7 @@ CONFIG_HOTPLUG_PCI=y CONFIG_HOTPLUG_PCI_S390=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_SAFE=y +# CONFIG_FW_LOADER is not set CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m @@ -502,7 +504,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_WANGXUN is not set # CONFIG_NET_VENDOR_LITEX is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m @@ -542,6 +543,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VERTEXCOM is not set # CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WANGXUN is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set CONFIG_PPP=m @@ -646,7 +648,6 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_TMPFS_INODE64=y CONFIG_HUGETLBFS=y -CONFIG_CONFIGFS_FS=m CONFIG_ECRYPT_FS=m CONFIG_CRAMFS=m CONFIG_SQUASHFS=m @@ -690,7 +691,6 @@ CONFIG_HARDENED_USERCOPY=y CONFIG_FORTIFY_SOURCE=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_LOCKDOWN_LSM=y CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_SECURITY_LANDLOCK=y @@ -744,7 +744,6 @@ CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -836,6 +835,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT=300 # CONFIG_RCU_TRACE is not set CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y +CONFIG_FUNCTION_GRAPH_RETVAL=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y @@ -844,6 +844,7 @@ CONFIG_PREEMPT_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_USER_EVENTS=y CONFIG_HIST_TRIGGERS=y CONFIG_FTRACE_STARTUP_TEST=y # CONFIG_EVENT_TRACE_STARTUP_TEST is not set @@ -866,6 +867,7 @@ CONFIG_FAIL_MAKE_REQUEST=y CONFIG_FAIL_IO_TIMEOUT=y CONFIG_FAIL_FUTEX=y CONFIG_FAULT_INJECTION_DEBUG_FS=y +CONFIG_FAULT_INJECTION_CONFIGFS=y CONFIG_FAULT_INJECTION_STACKTRACE_FILTER=y CONFIG_LKDTM=m CONFIG_TEST_MIN_HEAP=y diff --git a/arch/s390/configs/defconfig b/arch/s390/configs/defconfig index 1855759cdc6a..3f263b767a4c 100644 --- a/arch/s390/configs/defconfig +++ b/arch/s390/configs/defconfig @@ -107,7 +107,6 @@ CONFIG_UNIX=y CONFIG_UNIX_DIAG=m CONFIG_XFRM_USER=m CONFIG_NET_KEY=m -CONFIG_NET_TC_SKB_EXT=y CONFIG_SMC=m CONFIG_SMC_DIAG=m CONFIG_INET=y @@ -184,6 +183,7 @@ CONFIG_NFT_REJECT=m CONFIG_NFT_COMPAT=m CONFIG_NFT_HASH=m CONFIG_NFT_FIB_INET=m +CONFIG_NETFILTER_XTABLES_COMPAT=y CONFIG_NETFILTER_XT_SET=m CONFIG_NETFILTER_XT_TARGET_AUDIT=m CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m @@ -369,6 +369,7 @@ CONFIG_NET_ACT_SIMP=m CONFIG_NET_ACT_SKBEDIT=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_GATE=m +CONFIG_NET_TC_SKB_EXT=y CONFIG_DNS_RESOLVER=y CONFIG_OPENVSWITCH=m CONFIG_VSOCKETS=m @@ -385,6 +386,7 @@ CONFIG_HOTPLUG_PCI_S390=y CONFIG_UEVENT_HELPER=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_SAFE=y +# CONFIG_FW_LOADER is not set CONFIG_CONNECTOR=y CONFIG_ZRAM=y CONFIG_BLK_DEV_LOOP=m @@ -492,7 +494,6 @@ CONFIG_NLMON=m # CONFIG_NET_VENDOR_GOOGLE is not set # CONFIG_NET_VENDOR_HUAWEI is not set # CONFIG_NET_VENDOR_INTEL is not set -# CONFIG_NET_VENDOR_WANGXUN is not set # CONFIG_NET_VENDOR_LITEX is not set # CONFIG_NET_VENDOR_MARVELL is not set CONFIG_MLX4_EN=m @@ -532,6 +533,7 @@ CONFIG_MLX5_CORE_EN=y # CONFIG_NET_VENDOR_TI is not set # CONFIG_NET_VENDOR_VERTEXCOM is not set # CONFIG_NET_VENDOR_VIA is not set +# CONFIG_NET_VENDOR_WANGXUN is not set # CONFIG_NET_VENDOR_WIZNET is not set # CONFIG_NET_VENDOR_XILINX is not set CONFIG_PPP=m @@ -673,7 +675,6 @@ CONFIG_SECURITY=y CONFIG_SECURITY_NETWORK=y CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y -CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_SECURITY_LOCKDOWN_LSM=y CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=y CONFIG_SECURITY_LANDLOCK=y @@ -729,7 +730,6 @@ CONFIG_CRYPTO_MD4=m CONFIG_CRYPTO_MD5=y CONFIG_CRYPTO_MICHAEL_MIC=m CONFIG_CRYPTO_RMD160=m -CONFIG_CRYPTO_SHA3=m CONFIG_CRYPTO_SM3_GENERIC=m CONFIG_CRYPTO_VMAC=m CONFIG_CRYPTO_WP512=m @@ -787,12 +787,14 @@ CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=60 CONFIG_LATENCYTOP=y CONFIG_BOOTTIME_TRACING=y +CONFIG_FUNCTION_GRAPH_RETVAL=y CONFIG_FPROBE=y CONFIG_FUNCTION_PROFILER=y CONFIG_STACK_TRACER=y CONFIG_SCHED_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_BLK_DEV_IO_TRACE=y +CONFIG_USER_EVENTS=y CONFIG_HIST_TRIGGERS=y CONFIG_SAMPLES=y CONFIG_SAMPLE_TRACE_PRINTK=m diff --git a/arch/s390/configs/zfcpdump_defconfig b/arch/s390/configs/zfcpdump_defconfig index 6f68b39817ef..e62fb2015102 100644 --- a/arch/s390/configs/zfcpdump_defconfig +++ b/arch/s390/configs/zfcpdump_defconfig @@ -53,7 +53,6 @@ CONFIG_ZFCP=y # CONFIG_HVC_IUCV is not set # CONFIG_HW_RANDOM_S390 is not set # CONFIG_HMC_DRV is not set -# CONFIG_S390_UV_UAPI is not set # CONFIG_S390_TAPE is not set # CONFIG_VMCP is not set # CONFIG_MONWRITER is not set diff --git a/arch/s390/crypto/paes_s390.c b/arch/s390/crypto/paes_s390.c index 38349150c96e..8b541e44151d 100644 --- a/arch/s390/crypto/paes_s390.c +++ b/arch/s390/crypto/paes_s390.c @@ -35,7 +35,7 @@ * and padding is also possible, the limits need to be generous. */ #define PAES_MIN_KEYSIZE 16 -#define PAES_MAX_KEYSIZE 320 +#define PAES_MAX_KEYSIZE MAXEP11AESKEYBLOBSIZE static u8 *ctrblk; static DEFINE_MUTEX(ctrblk_lock); diff --git a/arch/s390/hypfs/Makefile b/arch/s390/hypfs/Makefile index 06f601509ce9..c34854d298f8 100644 --- a/arch/s390/hypfs/Makefile +++ b/arch/s390/hypfs/Makefile @@ -3,7 +3,12 @@ # Makefile for the linux hypfs filesystem routines. # -obj-$(CONFIG_S390_HYPFS_FS) += s390_hypfs.o +obj-$(CONFIG_S390_HYPFS) += hypfs_dbfs.o +obj-$(CONFIG_S390_HYPFS) += hypfs_diag.o +obj-$(CONFIG_S390_HYPFS) += hypfs_diag0c.o +obj-$(CONFIG_S390_HYPFS) += hypfs_sprp.o +obj-$(CONFIG_S390_HYPFS) += hypfs_vm.o -s390_hypfs-objs := inode.o hypfs_diag.o hypfs_vm.o hypfs_dbfs.o hypfs_sprp.o -s390_hypfs-objs += hypfs_diag0c.o +obj-$(CONFIG_S390_HYPFS_FS) += hypfs_diag_fs.o +obj-$(CONFIG_S390_HYPFS_FS) += hypfs_vm_fs.o +obj-$(CONFIG_S390_HYPFS_FS) += inode.o diff --git a/arch/s390/hypfs/hypfs.h b/arch/s390/hypfs/hypfs.h index 05f3f9aee5fc..65f4036fd541 100644 --- a/arch/s390/hypfs/hypfs.h +++ b/arch/s390/hypfs/hypfs.h @@ -46,6 +46,15 @@ void hypfs_diag0c_exit(void); void hypfs_sprp_init(void); void hypfs_sprp_exit(void); +int __hypfs_fs_init(void); + +static inline int hypfs_fs_init(void) +{ + if (IS_ENABLED(CONFIG_S390_HYPFS_FS)) + return __hypfs_fs_init(); + return 0; +} + /* debugfs interface */ struct hypfs_dbfs_file; @@ -69,7 +78,6 @@ struct hypfs_dbfs_file { struct dentry *dentry; }; -extern void hypfs_dbfs_init(void); extern void hypfs_dbfs_exit(void); extern void hypfs_dbfs_create_file(struct hypfs_dbfs_file *df); extern void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df); diff --git a/arch/s390/hypfs/hypfs_dbfs.c b/arch/s390/hypfs/hypfs_dbfs.c index f4c7dbfaf8ee..4024599eb448 100644 --- a/arch/s390/hypfs/hypfs_dbfs.c +++ b/arch/s390/hypfs/hypfs_dbfs.c @@ -90,12 +90,33 @@ void hypfs_dbfs_remove_file(struct hypfs_dbfs_file *df) debugfs_remove(df->dentry); } -void hypfs_dbfs_init(void) +static int __init hypfs_dbfs_init(void) { - dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); -} + int rc = -ENODATA; -void hypfs_dbfs_exit(void) -{ + dbfs_dir = debugfs_create_dir("s390_hypfs", NULL); + if (hypfs_diag_init()) + goto fail_dbfs_exit; + if (hypfs_vm_init()) + goto fail_hypfs_diag_exit; + hypfs_sprp_init(); + if (hypfs_diag0c_init()) + goto fail_hypfs_sprp_exit; + rc = hypfs_fs_init(); + if (rc) + goto fail_hypfs_diag0c_exit; + return 0; + +fail_hypfs_diag0c_exit: + hypfs_diag0c_exit(); +fail_hypfs_sprp_exit: + hypfs_sprp_exit(); + hypfs_vm_exit(); +fail_hypfs_diag_exit: + hypfs_diag_exit(); + pr_err("Initialization of hypfs failed with rc=%i\n", rc); +fail_dbfs_exit: debugfs_remove(dbfs_dir); + return rc; } +device_initcall(hypfs_dbfs_init) diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index c3be533c4cd3..279b7bba4d43 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -18,188 +18,27 @@ #include <linux/mm.h> #include <asm/diag.h> #include <asm/ebcdic.h> +#include "hypfs_diag.h" #include "hypfs.h" -#define TMP_SIZE 64 /* size of temporary buffers */ - #define DBFS_D204_HDR_VERSION 0 -static char *diag224_cpu_names; /* diag 224 name table */ static enum diag204_sc diag204_store_sc; /* used subcode for store */ static enum diag204_format diag204_info_type; /* used diag 204 data format */ static void *diag204_buf; /* 4K aligned buffer for diag204 data */ -static void *diag204_buf_vmalloc; /* vmalloc pointer for diag204 data */ static int diag204_buf_pages; /* number of pages for diag204 data */ static struct dentry *dbfs_d204_file; -/* - * DIAG 204 member access functions. - * - * Since we have two different diag 204 data formats for old and new s390 - * machines, we do not access the structs directly, but use getter functions for - * each struct member instead. This should make the code more readable. - */ - -/* Time information block */ - -static inline int info_blk_hdr__size(enum diag204_format type) -{ - if (type == DIAG204_INFO_SIMPLE) - return sizeof(struct diag204_info_blk_hdr); - else /* DIAG204_INFO_EXT */ - return sizeof(struct diag204_x_info_blk_hdr); -} - -static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_info_blk_hdr *)hdr)->npar; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_info_blk_hdr *)hdr)->npar; -} - -static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_info_blk_hdr *)hdr)->flags; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_info_blk_hdr *)hdr)->flags; -} - -/* Partition header */ - -static inline int part_hdr__size(enum diag204_format type) -{ - if (type == DIAG204_INFO_SIMPLE) - return sizeof(struct diag204_part_hdr); - else /* DIAG204_INFO_EXT */ - return sizeof(struct diag204_x_part_hdr); -} - -static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_part_hdr *)hdr)->cpus; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_part_hdr *)hdr)->rcpus; -} - -static inline void part_hdr__part_name(enum diag204_format type, void *hdr, - char *name) -{ - if (type == DIAG204_INFO_SIMPLE) - memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name, - DIAG204_LPAR_NAME_LEN); - else /* DIAG204_INFO_EXT */ - memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name, - DIAG204_LPAR_NAME_LEN); - EBCASC(name, DIAG204_LPAR_NAME_LEN); - name[DIAG204_LPAR_NAME_LEN] = 0; - strim(name); -} - -/* CPU info block */ - -static inline int cpu_info__size(enum diag204_format type) -{ - if (type == DIAG204_INFO_SIMPLE) - return sizeof(struct diag204_cpu_info); - else /* DIAG204_INFO_EXT */ - return sizeof(struct diag204_x_cpu_info); -} - -static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_cpu_info *)hdr)->ctidx; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_cpu_info *)hdr)->ctidx; -} - -static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr) +enum diag204_format diag204_get_info_type(void) { - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_cpu_info *)hdr)->cpu_addr; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_cpu_info *)hdr)->cpu_addr; + return diag204_info_type; } -static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr) +static void diag204_set_info_type(enum diag204_format type) { - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_cpu_info *)hdr)->acc_time; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_cpu_info *)hdr)->acc_time; -} - -static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_cpu_info *)hdr)->lp_time; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_cpu_info *)hdr)->lp_time; -} - -static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return 0; /* online_time not available in simple info */ - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_cpu_info *)hdr)->online_time; -} - -/* Physical header */ - -static inline int phys_hdr__size(enum diag204_format type) -{ - if (type == DIAG204_INFO_SIMPLE) - return sizeof(struct diag204_phys_hdr); - else /* DIAG204_INFO_EXT */ - return sizeof(struct diag204_x_phys_hdr); -} - -static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_phys_hdr *)hdr)->cpus; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_phys_hdr *)hdr)->cpus; -} - -/* Physical CPU info block */ - -static inline int phys_cpu__size(enum diag204_format type) -{ - if (type == DIAG204_INFO_SIMPLE) - return sizeof(struct diag204_phys_cpu); - else /* DIAG204_INFO_EXT */ - return sizeof(struct diag204_x_phys_cpu); -} - -static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_phys_cpu *)hdr)->cpu_addr; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr; -} - -static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_phys_cpu *)hdr)->mgm_time; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_phys_cpu *)hdr)->mgm_time; -} - -static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) -{ - if (type == DIAG204_INFO_SIMPLE) - return ((struct diag204_phys_cpu *)hdr)->ctidx; - else /* DIAG204_INFO_EXT */ - return ((struct diag204_x_phys_cpu *)hdr)->ctidx; + diag204_info_type = type; } /* Diagnose 204 functions */ @@ -212,43 +51,11 @@ static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) static void diag204_free_buffer(void) { - if (!diag204_buf) - return; - if (diag204_buf_vmalloc) { - vfree(diag204_buf_vmalloc); - diag204_buf_vmalloc = NULL; - } else { - free_pages((unsigned long) diag204_buf, 0); - } + vfree(diag204_buf); diag204_buf = NULL; } -static void *page_align_ptr(void *ptr) -{ - return (void *) PAGE_ALIGN((unsigned long) ptr); -} - -static void *diag204_alloc_vbuf(int pages) -{ - /* The buffer has to be page aligned! */ - diag204_buf_vmalloc = vmalloc(array_size(PAGE_SIZE, (pages + 1))); - if (!diag204_buf_vmalloc) - return ERR_PTR(-ENOMEM); - diag204_buf = page_align_ptr(diag204_buf_vmalloc); - diag204_buf_pages = pages; - return diag204_buf; -} - -static void *diag204_alloc_rbuf(void) -{ - diag204_buf = (void*)__get_free_pages(GFP_KERNEL,0); - if (!diag204_buf) - return ERR_PTR(-ENOMEM); - diag204_buf_pages = 1; - return diag204_buf; -} - -static void *diag204_get_buffer(enum diag204_format fmt, int *pages) +void *diag204_get_buffer(enum diag204_format fmt, int *pages) { if (diag204_buf) { *pages = diag204_buf_pages; @@ -256,15 +63,19 @@ static void *diag204_get_buffer(enum diag204_format fmt, int *pages) } if (fmt == DIAG204_INFO_SIMPLE) { *pages = 1; - return diag204_alloc_rbuf(); } else {/* DIAG204_INFO_EXT */ *pages = diag204((unsigned long)DIAG204_SUBC_RSI | (unsigned long)DIAG204_INFO_EXT, 0, NULL); if (*pages <= 0) - return ERR_PTR(-ENOSYS); - else - return diag204_alloc_vbuf(*pages); + return ERR_PTR(-EOPNOTSUPP); } + diag204_buf = __vmalloc_node(array_size(*pages, PAGE_SIZE), + PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, + __builtin_return_address(0)); + if (!diag204_buf) + return ERR_PTR(-ENOMEM); + diag204_buf_pages = *pages; + return diag204_buf; } /* @@ -291,13 +102,13 @@ static int diag204_probe(void) if (diag204((unsigned long)DIAG204_SUBC_STIB7 | (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) { diag204_store_sc = DIAG204_SUBC_STIB7; - diag204_info_type = DIAG204_INFO_EXT; + diag204_set_info_type(DIAG204_INFO_EXT); goto out; } if (diag204((unsigned long)DIAG204_SUBC_STIB6 | (unsigned long)DIAG204_INFO_EXT, pages, buf) >= 0) { diag204_store_sc = DIAG204_SUBC_STIB6; - diag204_info_type = DIAG204_INFO_EXT; + diag204_set_info_type(DIAG204_INFO_EXT); goto out; } diag204_free_buffer(); @@ -313,10 +124,10 @@ static int diag204_probe(void) if (diag204((unsigned long)DIAG204_SUBC_STIB4 | (unsigned long)DIAG204_INFO_SIMPLE, pages, buf) >= 0) { diag204_store_sc = DIAG204_SUBC_STIB4; - diag204_info_type = DIAG204_INFO_SIMPLE; + diag204_set_info_type(DIAG204_INFO_SIMPLE); goto out; } else { - rc = -ENOSYS; + rc = -EOPNOTSUPP; goto fail_store; } out: @@ -327,58 +138,13 @@ fail_alloc: return rc; } -static int diag204_do_store(void *buf, int pages) +int diag204_store(void *buf, int pages) { int rc; - rc = diag204((unsigned long) diag204_store_sc | - (unsigned long) diag204_info_type, pages, buf); - return rc < 0 ? -ENOSYS : 0; -} - -static void *diag204_store(void) -{ - void *buf; - int pages, rc; - - buf = diag204_get_buffer(diag204_info_type, &pages); - if (IS_ERR(buf)) - goto out; - rc = diag204_do_store(buf, pages); - if (rc) - return ERR_PTR(rc); -out: - return buf; -} - -/* Diagnose 224 functions */ - -static int diag224_get_name_table(void) -{ - /* memory must be below 2GB */ - diag224_cpu_names = (char *) __get_free_page(GFP_KERNEL | GFP_DMA); - if (!diag224_cpu_names) - return -ENOMEM; - if (diag224(diag224_cpu_names)) { - free_page((unsigned long) diag224_cpu_names); - return -EOPNOTSUPP; - } - EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16); - return 0; -} - -static void diag224_delete_name_table(void) -{ - free_page((unsigned long) diag224_cpu_names); -} - -static int diag224_idx2name(int index, char *name) -{ - memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN), - DIAG204_CPU_NAME_LEN); - name[DIAG204_CPU_NAME_LEN] = 0; - strim(name); - return 0; + rc = diag204((unsigned long)diag204_store_sc | + (unsigned long)diag204_get_info_type(), pages, buf); + return rc < 0 ? -EOPNOTSUPP : 0; } struct dbfs_d204_hdr { @@ -403,8 +169,8 @@ static int dbfs_d204_create(void **data, void **data_free_ptr, size_t *size) base = vzalloc(buf_size); if (!base) return -ENOMEM; - d204 = page_align_ptr(base + sizeof(d204->hdr)) - sizeof(d204->hdr); - rc = diag204_do_store(d204->buf, diag204_buf_pages); + d204 = PTR_ALIGN(base + sizeof(d204->hdr), PAGE_SIZE) - sizeof(d204->hdr); + rc = diag204_store(d204->buf, diag204_buf_pages); if (rc) { vfree(base); return rc; @@ -433,176 +199,21 @@ __init int hypfs_diag_init(void) return -ENODATA; } - if (diag204_info_type == DIAG204_INFO_EXT) + if (diag204_get_info_type() == DIAG204_INFO_EXT) hypfs_dbfs_create_file(&dbfs_file_d204); - if (MACHINE_IS_LPAR) { - rc = diag224_get_name_table(); - if (rc) { - pr_err("The hardware system does not provide all " - "functions required by hypfs\n"); - debugfs_remove(dbfs_d204_file); - return rc; - } + rc = hypfs_diag_fs_init(); + if (rc) { + pr_err("The hardware system does not provide all functions required by hypfs\n"); + debugfs_remove(dbfs_d204_file); } - return 0; + return rc; } void hypfs_diag_exit(void) { debugfs_remove(dbfs_d204_file); - diag224_delete_name_table(); + hypfs_diag_fs_exit(); diag204_free_buffer(); hypfs_dbfs_remove_file(&dbfs_file_d204); } - -/* - * Functions to create the directory structure - * ******************************************* - */ - -static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) -{ - struct dentry *cpu_dir; - char buffer[TMP_SIZE]; - void *rc; - - snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_info_type, - cpu_info)); - cpu_dir = hypfs_mkdir(cpus_dir, buffer); - rc = hypfs_create_u64(cpu_dir, "mgmtime", - cpu_info__acc_time(diag204_info_type, cpu_info) - - cpu_info__lp_time(diag204_info_type, cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); - rc = hypfs_create_u64(cpu_dir, "cputime", - cpu_info__lp_time(diag204_info_type, cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); - if (diag204_info_type == DIAG204_INFO_EXT) { - rc = hypfs_create_u64(cpu_dir, "onlinetime", - cpu_info__online_time(diag204_info_type, - cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); - } - diag224_idx2name(cpu_info__ctidx(diag204_info_type, cpu_info), buffer); - rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_ERR_OR_ZERO(rc); -} - -static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) -{ - struct dentry *cpus_dir; - struct dentry *lpar_dir; - char lpar_name[DIAG204_LPAR_NAME_LEN + 1]; - void *cpu_info; - int i; - - part_hdr__part_name(diag204_info_type, part_hdr, lpar_name); - lpar_name[DIAG204_LPAR_NAME_LEN] = 0; - lpar_dir = hypfs_mkdir(systems_dir, lpar_name); - if (IS_ERR(lpar_dir)) - return lpar_dir; - cpus_dir = hypfs_mkdir(lpar_dir, "cpus"); - if (IS_ERR(cpus_dir)) - return cpus_dir; - cpu_info = part_hdr + part_hdr__size(diag204_info_type); - for (i = 0; i < part_hdr__rcpus(diag204_info_type, part_hdr); i++) { - int rc; - rc = hypfs_create_cpu_files(cpus_dir, cpu_info); - if (rc) - return ERR_PTR(rc); - cpu_info += cpu_info__size(diag204_info_type); - } - return cpu_info; -} - -static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) -{ - struct dentry *cpu_dir; - char buffer[TMP_SIZE]; - void *rc; - - snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_info_type, - cpu_info)); - cpu_dir = hypfs_mkdir(cpus_dir, buffer); - if (IS_ERR(cpu_dir)) - return PTR_ERR(cpu_dir); - rc = hypfs_create_u64(cpu_dir, "mgmtime", - phys_cpu__mgm_time(diag204_info_type, cpu_info)); - if (IS_ERR(rc)) - return PTR_ERR(rc); - diag224_idx2name(phys_cpu__ctidx(diag204_info_type, cpu_info), buffer); - rc = hypfs_create_str(cpu_dir, "type", buffer); - return PTR_ERR_OR_ZERO(rc); -} - -static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) -{ - int i; - void *cpu_info; - struct dentry *cpus_dir; - - cpus_dir = hypfs_mkdir(parent_dir, "cpus"); - if (IS_ERR(cpus_dir)) - return cpus_dir; - cpu_info = phys_hdr + phys_hdr__size(diag204_info_type); - for (i = 0; i < phys_hdr__cpus(diag204_info_type, phys_hdr); i++) { - int rc; - rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info); - if (rc) - return ERR_PTR(rc); - cpu_info += phys_cpu__size(diag204_info_type); - } - return cpu_info; -} - -int hypfs_diag_create_files(struct dentry *root) -{ - struct dentry *systems_dir, *hyp_dir; - void *time_hdr, *part_hdr; - int i, rc; - void *buffer, *ptr; - - buffer = diag204_store(); - if (IS_ERR(buffer)) - return PTR_ERR(buffer); - - systems_dir = hypfs_mkdir(root, "systems"); - if (IS_ERR(systems_dir)) { - rc = PTR_ERR(systems_dir); - goto err_out; - } - time_hdr = (struct x_info_blk_hdr *)buffer; - part_hdr = time_hdr + info_blk_hdr__size(diag204_info_type); - for (i = 0; i < info_blk_hdr__npar(diag204_info_type, time_hdr); i++) { - part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr); - if (IS_ERR(part_hdr)) { - rc = PTR_ERR(part_hdr); - goto err_out; - } - } - if (info_blk_hdr__flags(diag204_info_type, time_hdr) & - DIAG204_LPAR_PHYS_FLG) { - ptr = hypfs_create_phys_files(root, part_hdr); - if (IS_ERR(ptr)) { - rc = PTR_ERR(ptr); - goto err_out; - } - } - hyp_dir = hypfs_mkdir(root, "hyp"); - if (IS_ERR(hyp_dir)) { - rc = PTR_ERR(hyp_dir); - goto err_out; - } - ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); - if (IS_ERR(ptr)) { - rc = PTR_ERR(ptr); - goto err_out; - } - rc = 0; - -err_out: - return rc; -} diff --git a/arch/s390/hypfs/hypfs_diag.h b/arch/s390/hypfs/hypfs_diag.h new file mode 100644 index 000000000000..7090eff27fef --- /dev/null +++ b/arch/s390/hypfs/hypfs_diag.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hypervisor filesystem for Linux on s390. Diag 204 and 224 + * implementation. + * + * Copyright IBM Corp. 2006, 2008 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#ifndef _S390_HYPFS_DIAG_H_ +#define _S390_HYPFS_DIAG_H_ + +#include <asm/diag.h> + +enum diag204_format diag204_get_info_type(void); +void *diag204_get_buffer(enum diag204_format fmt, int *pages); +int diag204_store(void *buf, int pages); + +int __hypfs_diag_fs_init(void); +void __hypfs_diag_fs_exit(void); + +static inline int hypfs_diag_fs_init(void) +{ + if (IS_ENABLED(CONFIG_S390_HYPFS_FS)) + return __hypfs_diag_fs_init(); + return 0; +} + +static inline void hypfs_diag_fs_exit(void) +{ + if (IS_ENABLED(CONFIG_S390_HYPFS_FS)) + __hypfs_diag_fs_exit(); +} + +#endif /* _S390_HYPFS_DIAG_H_ */ diff --git a/arch/s390/hypfs/hypfs_diag_fs.c b/arch/s390/hypfs/hypfs_diag_fs.c new file mode 100644 index 000000000000..00a6d370a280 --- /dev/null +++ b/arch/s390/hypfs/hypfs_diag_fs.c @@ -0,0 +1,393 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hypervisor filesystem for Linux on s390. Diag 204 and 224 + * implementation. + * + * Copyright IBM Corp. 2006, 2008 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#define KMSG_COMPONENT "hypfs" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <asm/diag.h> +#include <asm/ebcdic.h> +#include "hypfs_diag.h" +#include "hypfs.h" + +#define TMP_SIZE 64 /* size of temporary buffers */ + +static char *diag224_cpu_names; /* diag 224 name table */ +static int diag224_idx2name(int index, char *name); + +/* + * DIAG 204 member access functions. + * + * Since we have two different diag 204 data formats for old and new s390 + * machines, we do not access the structs directly, but use getter functions for + * each struct member instead. This should make the code more readable. + */ + +/* Time information block */ + +static inline int info_blk_hdr__size(enum diag204_format type) +{ + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_info_blk_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_info_blk_hdr); +} + +static inline __u8 info_blk_hdr__npar(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->npar; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->npar; +} + +static inline __u8 info_blk_hdr__flags(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_info_blk_hdr *)hdr)->flags; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_info_blk_hdr *)hdr)->flags; +} + +/* Partition header */ + +static inline int part_hdr__size(enum diag204_format type) +{ + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_part_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_part_hdr); +} + +static inline __u8 part_hdr__rcpus(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_part_hdr *)hdr)->cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_part_hdr *)hdr)->rcpus; +} + +static inline void part_hdr__part_name(enum diag204_format type, void *hdr, + char *name) +{ + if (type == DIAG204_INFO_SIMPLE) + memcpy(name, ((struct diag204_part_hdr *)hdr)->part_name, + DIAG204_LPAR_NAME_LEN); + else /* DIAG204_INFO_EXT */ + memcpy(name, ((struct diag204_x_part_hdr *)hdr)->part_name, + DIAG204_LPAR_NAME_LEN); + EBCASC(name, DIAG204_LPAR_NAME_LEN); + name[DIAG204_LPAR_NAME_LEN] = 0; + strim(name); +} + +/* CPU info block */ + +static inline int cpu_info__size(enum diag204_format type) +{ + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_cpu_info); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_cpu_info); +} + +static inline __u8 cpu_info__ctidx(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->ctidx; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->ctidx; +} + +static inline __u16 cpu_info__cpu_addr(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->cpu_addr; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->cpu_addr; +} + +static inline __u64 cpu_info__acc_time(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->acc_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->acc_time; +} + +static inline __u64 cpu_info__lp_time(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_cpu_info *)hdr)->lp_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->lp_time; +} + +static inline __u64 cpu_info__online_time(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return 0; /* online_time not available in simple info */ + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_cpu_info *)hdr)->online_time; +} + +/* Physical header */ + +static inline int phys_hdr__size(enum diag204_format type) +{ + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_phys_hdr); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_phys_hdr); +} + +static inline __u8 phys_hdr__cpus(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_hdr *)hdr)->cpus; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_hdr *)hdr)->cpus; +} + +/* Physical CPU info block */ + +static inline int phys_cpu__size(enum diag204_format type) +{ + if (type == DIAG204_INFO_SIMPLE) + return sizeof(struct diag204_phys_cpu); + else /* DIAG204_INFO_EXT */ + return sizeof(struct diag204_x_phys_cpu); +} + +static inline __u16 phys_cpu__cpu_addr(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->cpu_addr; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->cpu_addr; +} + +static inline __u64 phys_cpu__mgm_time(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->mgm_time; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->mgm_time; +} + +static inline __u64 phys_cpu__ctidx(enum diag204_format type, void *hdr) +{ + if (type == DIAG204_INFO_SIMPLE) + return ((struct diag204_phys_cpu *)hdr)->ctidx; + else /* DIAG204_INFO_EXT */ + return ((struct diag204_x_phys_cpu *)hdr)->ctidx; +} + +/* + * Functions to create the directory structure + * ******************************************* + */ + +static int hypfs_create_cpu_files(struct dentry *cpus_dir, void *cpu_info) +{ + struct dentry *cpu_dir; + char buffer[TMP_SIZE]; + void *rc; + + snprintf(buffer, TMP_SIZE, "%d", cpu_info__cpu_addr(diag204_get_info_type(), + cpu_info)); + cpu_dir = hypfs_mkdir(cpus_dir, buffer); + rc = hypfs_create_u64(cpu_dir, "mgmtime", + cpu_info__acc_time(diag204_get_info_type(), cpu_info) - + cpu_info__lp_time(diag204_get_info_type(), cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + rc = hypfs_create_u64(cpu_dir, "cputime", + cpu_info__lp_time(diag204_get_info_type(), cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + if (diag204_get_info_type() == DIAG204_INFO_EXT) { + rc = hypfs_create_u64(cpu_dir, "onlinetime", + cpu_info__online_time(diag204_get_info_type(), + cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + } + diag224_idx2name(cpu_info__ctidx(diag204_get_info_type(), cpu_info), buffer); + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_ERR_OR_ZERO(rc); +} + +static void *hypfs_create_lpar_files(struct dentry *systems_dir, void *part_hdr) +{ + struct dentry *cpus_dir; + struct dentry *lpar_dir; + char lpar_name[DIAG204_LPAR_NAME_LEN + 1]; + void *cpu_info; + int i; + + part_hdr__part_name(diag204_get_info_type(), part_hdr, lpar_name); + lpar_name[DIAG204_LPAR_NAME_LEN] = 0; + lpar_dir = hypfs_mkdir(systems_dir, lpar_name); + if (IS_ERR(lpar_dir)) + return lpar_dir; + cpus_dir = hypfs_mkdir(lpar_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return cpus_dir; + cpu_info = part_hdr + part_hdr__size(diag204_get_info_type()); + for (i = 0; i < part_hdr__rcpus(diag204_get_info_type(), part_hdr); i++) { + int rc; + + rc = hypfs_create_cpu_files(cpus_dir, cpu_info); + if (rc) + return ERR_PTR(rc); + cpu_info += cpu_info__size(diag204_get_info_type()); + } + return cpu_info; +} + +static int hypfs_create_phys_cpu_files(struct dentry *cpus_dir, void *cpu_info) +{ + struct dentry *cpu_dir; + char buffer[TMP_SIZE]; + void *rc; + + snprintf(buffer, TMP_SIZE, "%i", phys_cpu__cpu_addr(diag204_get_info_type(), + cpu_info)); + cpu_dir = hypfs_mkdir(cpus_dir, buffer); + if (IS_ERR(cpu_dir)) + return PTR_ERR(cpu_dir); + rc = hypfs_create_u64(cpu_dir, "mgmtime", + phys_cpu__mgm_time(diag204_get_info_type(), cpu_info)); + if (IS_ERR(rc)) + return PTR_ERR(rc); + diag224_idx2name(phys_cpu__ctidx(diag204_get_info_type(), cpu_info), buffer); + rc = hypfs_create_str(cpu_dir, "type", buffer); + return PTR_ERR_OR_ZERO(rc); +} + +static void *hypfs_create_phys_files(struct dentry *parent_dir, void *phys_hdr) +{ + int i; + void *cpu_info; + struct dentry *cpus_dir; + + cpus_dir = hypfs_mkdir(parent_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return cpus_dir; + cpu_info = phys_hdr + phys_hdr__size(diag204_get_info_type()); + for (i = 0; i < phys_hdr__cpus(diag204_get_info_type(), phys_hdr); i++) { + int rc; + + rc = hypfs_create_phys_cpu_files(cpus_dir, cpu_info); + if (rc) + return ERR_PTR(rc); + cpu_info += phys_cpu__size(diag204_get_info_type()); + } + return cpu_info; +} + +int hypfs_diag_create_files(struct dentry *root) +{ + struct dentry *systems_dir, *hyp_dir; + void *time_hdr, *part_hdr; + void *buffer, *ptr; + int i, rc, pages; + + buffer = diag204_get_buffer(diag204_get_info_type(), &pages); + if (IS_ERR(buffer)) + return PTR_ERR(buffer); + rc = diag204_store(buffer, pages); + if (rc) + return rc; + + systems_dir = hypfs_mkdir(root, "systems"); + if (IS_ERR(systems_dir)) { + rc = PTR_ERR(systems_dir); + goto err_out; + } + time_hdr = (struct x_info_blk_hdr *)buffer; + part_hdr = time_hdr + info_blk_hdr__size(diag204_get_info_type()); + for (i = 0; i < info_blk_hdr__npar(diag204_get_info_type(), time_hdr); i++) { + part_hdr = hypfs_create_lpar_files(systems_dir, part_hdr); + if (IS_ERR(part_hdr)) { + rc = PTR_ERR(part_hdr); + goto err_out; + } + } + if (info_blk_hdr__flags(diag204_get_info_type(), time_hdr) & + DIAG204_LPAR_PHYS_FLG) { + ptr = hypfs_create_phys_files(root, part_hdr); + if (IS_ERR(ptr)) { + rc = PTR_ERR(ptr); + goto err_out; + } + } + hyp_dir = hypfs_mkdir(root, "hyp"); + if (IS_ERR(hyp_dir)) { + rc = PTR_ERR(hyp_dir); + goto err_out; + } + ptr = hypfs_create_str(hyp_dir, "type", "LPAR Hypervisor"); + if (IS_ERR(ptr)) { + rc = PTR_ERR(ptr); + goto err_out; + } + rc = 0; + +err_out: + return rc; +} + +/* Diagnose 224 functions */ + +static int diag224_idx2name(int index, char *name) +{ + memcpy(name, diag224_cpu_names + ((index + 1) * DIAG204_CPU_NAME_LEN), + DIAG204_CPU_NAME_LEN); + name[DIAG204_CPU_NAME_LEN] = 0; + strim(name); + return 0; +} + +static int diag224_get_name_table(void) +{ + /* memory must be below 2GB */ + diag224_cpu_names = (char *)__get_free_page(GFP_KERNEL | GFP_DMA); + if (!diag224_cpu_names) + return -ENOMEM; + if (diag224(diag224_cpu_names)) { + free_page((unsigned long)diag224_cpu_names); + return -EOPNOTSUPP; + } + EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16); + return 0; +} + +static void diag224_delete_name_table(void) +{ + free_page((unsigned long)diag224_cpu_names); +} + +int __init __hypfs_diag_fs_init(void) +{ + if (MACHINE_IS_LPAR) + return diag224_get_name_table(); + return 0; +} + +void __hypfs_diag_fs_exit(void) +{ + diag224_delete_name_table(); +} diff --git a/arch/s390/hypfs/hypfs_vm.c b/arch/s390/hypfs/hypfs_vm.c index a3d881ca0a98..3db40ad853e0 100644 --- a/arch/s390/hypfs/hypfs_vm.c +++ b/arch/s390/hypfs/hypfs_vm.c @@ -14,47 +14,15 @@ #include <asm/diag.h> #include <asm/ebcdic.h> #include <asm/timex.h> +#include "hypfs_vm.h" #include "hypfs.h" -#define NAME_LEN 8 #define DBFS_D2FC_HDR_VERSION 0 static char local_guest[] = " "; static char all_guests[] = "* "; static char *all_groups = all_guests; -static char *guest_query; - -struct diag2fc_data { - __u32 version; - __u32 flags; - __u64 used_cpu; - __u64 el_time; - __u64 mem_min_kb; - __u64 mem_max_kb; - __u64 mem_share_kb; - __u64 mem_used_kb; - __u32 pcpus; - __u32 lcpus; - __u32 vcpus; - __u32 ocpus; - __u32 cpu_max; - __u32 cpu_shares; - __u32 cpu_use_samp; - __u32 cpu_delay_samp; - __u32 page_wait_samp; - __u32 idle_samp; - __u32 other_samp; - __u32 total_samp; - char guest_name[NAME_LEN]; -}; - -struct diag2fc_parm_list { - char userid[NAME_LEN]; - char aci_grp[NAME_LEN]; - __u64 addr; - __u32 size; - __u32 fmt; -}; +char *diag2fc_guest_query; static int diag2fc(int size, char* query, void *addr) { @@ -62,10 +30,10 @@ static int diag2fc(int size, char* query, void *addr) unsigned long rc; struct diag2fc_parm_list parm_list; - memcpy(parm_list.userid, query, NAME_LEN); - ASCEBC(parm_list.userid, NAME_LEN); - memcpy(parm_list.aci_grp, all_groups, NAME_LEN); - ASCEBC(parm_list.aci_grp, NAME_LEN); + memcpy(parm_list.userid, query, DIAG2FC_NAME_LEN); + ASCEBC(parm_list.userid, DIAG2FC_NAME_LEN); + memcpy(parm_list.aci_grp, all_groups, DIAG2FC_NAME_LEN); + ASCEBC(parm_list.aci_grp, DIAG2FC_NAME_LEN); parm_list.addr = (unsigned long)addr; parm_list.size = size; parm_list.fmt = 0x02; @@ -87,7 +55,7 @@ static int diag2fc(int size, char* query, void *addr) /* * Allocate buffer for "query" and store diag 2fc at "offset" */ -static void *diag2fc_store(char *query, unsigned int *count, int offset) +void *diag2fc_store(char *query, unsigned int *count, int offset) { void *data; int size; @@ -108,132 +76,11 @@ static void *diag2fc_store(char *query, unsigned int *count, int offset) return data; } -static void diag2fc_free(const void *data) +void diag2fc_free(const void *data) { vfree(data); } -#define ATTRIBUTE(dir, name, member) \ -do { \ - void *rc; \ - rc = hypfs_create_u64(dir, name, member); \ - if (IS_ERR(rc)) \ - return PTR_ERR(rc); \ -} while(0) - -static int hypfs_vm_create_guest(struct dentry *systems_dir, - struct diag2fc_data *data) -{ - char guest_name[NAME_LEN + 1] = {}; - struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir; - int dedicated_flag, capped_value; - - capped_value = (data->flags & 0x00000006) >> 1; - dedicated_flag = (data->flags & 0x00000008) >> 3; - - /* guest dir */ - memcpy(guest_name, data->guest_name, NAME_LEN); - EBCASC(guest_name, NAME_LEN); - strim(guest_name); - guest_dir = hypfs_mkdir(systems_dir, guest_name); - if (IS_ERR(guest_dir)) - return PTR_ERR(guest_dir); - ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time); - - /* logical cpu information */ - cpus_dir = hypfs_mkdir(guest_dir, "cpus"); - if (IS_ERR(cpus_dir)) - return PTR_ERR(cpus_dir); - ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu); - ATTRIBUTE(cpus_dir, "capped", capped_value); - ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag); - ATTRIBUTE(cpus_dir, "count", data->vcpus); - /* - * Note: The "weight_min" attribute got the wrong name. - * The value represents the number of non-stopped (operating) - * CPUS. - */ - ATTRIBUTE(cpus_dir, "weight_min", data->ocpus); - ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max); - ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares); - - /* memory information */ - mem_dir = hypfs_mkdir(guest_dir, "mem"); - if (IS_ERR(mem_dir)) - return PTR_ERR(mem_dir); - ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb); - ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb); - ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb); - ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb); - - /* samples */ - samples_dir = hypfs_mkdir(guest_dir, "samples"); - if (IS_ERR(samples_dir)) - return PTR_ERR(samples_dir); - ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp); - ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp); - ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp); - ATTRIBUTE(samples_dir, "idle", data->idle_samp); - ATTRIBUTE(samples_dir, "other", data->other_samp); - ATTRIBUTE(samples_dir, "total", data->total_samp); - return 0; -} - -int hypfs_vm_create_files(struct dentry *root) -{ - struct dentry *dir, *file; - struct diag2fc_data *data; - unsigned int count = 0; - int rc, i; - - data = diag2fc_store(guest_query, &count, 0); - if (IS_ERR(data)) - return PTR_ERR(data); - - /* Hypervisor Info */ - dir = hypfs_mkdir(root, "hyp"); - if (IS_ERR(dir)) { - rc = PTR_ERR(dir); - goto failed; - } - file = hypfs_create_str(dir, "type", "z/VM Hypervisor"); - if (IS_ERR(file)) { - rc = PTR_ERR(file); - goto failed; - } - - /* physical cpus */ - dir = hypfs_mkdir(root, "cpus"); - if (IS_ERR(dir)) { - rc = PTR_ERR(dir); - goto failed; - } - file = hypfs_create_u64(dir, "count", data->lcpus); - if (IS_ERR(file)) { - rc = PTR_ERR(file); - goto failed; - } - - /* guests */ - dir = hypfs_mkdir(root, "systems"); - if (IS_ERR(dir)) { - rc = PTR_ERR(dir); - goto failed; - } - - for (i = 0; i < count; i++) { - rc = hypfs_vm_create_guest(dir, &(data[i])); - if (rc) - goto failed; - } - diag2fc_free(data); - return 0; - -failed: - diag2fc_free(data); - return rc; -} - struct dbfs_d2fc_hdr { u64 len; /* Length of d2fc buffer without header */ u16 version; /* Version of header */ @@ -252,7 +99,7 @@ static int dbfs_diag2fc_create(void **data, void **data_free_ptr, size_t *size) struct dbfs_d2fc *d2fc; unsigned int count; - d2fc = diag2fc_store(guest_query, &count, sizeof(d2fc->hdr)); + d2fc = diag2fc_store(diag2fc_guest_query, &count, sizeof(d2fc->hdr)); if (IS_ERR(d2fc)) return PTR_ERR(d2fc); store_tod_clock_ext(&d2fc->hdr.tod_ext); @@ -277,9 +124,9 @@ int hypfs_vm_init(void) if (!MACHINE_IS_VM) return 0; if (diag2fc(0, all_guests, NULL) > 0) - guest_query = all_guests; + diag2fc_guest_query = all_guests; else if (diag2fc(0, local_guest, NULL) > 0) - guest_query = local_guest; + diag2fc_guest_query = local_guest; else return -EACCES; hypfs_dbfs_create_file(&dbfs_file_2fc); diff --git a/arch/s390/hypfs/hypfs_vm.h b/arch/s390/hypfs/hypfs_vm.h new file mode 100644 index 000000000000..fe2e5851addd --- /dev/null +++ b/arch/s390/hypfs/hypfs_vm.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Hypervisor filesystem for Linux on s390. z/VM implementation. + * + * Copyright IBM Corp. 2006 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#ifndef _S390_HYPFS_VM_H_ +#define _S390_HYPFS_VM_H_ + +#define DIAG2FC_NAME_LEN 8 + +struct diag2fc_data { + __u32 version; + __u32 flags; + __u64 used_cpu; + __u64 el_time; + __u64 mem_min_kb; + __u64 mem_max_kb; + __u64 mem_share_kb; + __u64 mem_used_kb; + __u32 pcpus; + __u32 lcpus; + __u32 vcpus; + __u32 ocpus; + __u32 cpu_max; + __u32 cpu_shares; + __u32 cpu_use_samp; + __u32 cpu_delay_samp; + __u32 page_wait_samp; + __u32 idle_samp; + __u32 other_samp; + __u32 total_samp; + char guest_name[DIAG2FC_NAME_LEN]; +}; + +struct diag2fc_parm_list { + char userid[DIAG2FC_NAME_LEN]; + char aci_grp[DIAG2FC_NAME_LEN]; + __u64 addr; + __u32 size; + __u32 fmt; +}; + +void *diag2fc_store(char *query, unsigned int *count, int offset); +void diag2fc_free(const void *data); +extern char *diag2fc_guest_query; + +#endif /* _S390_HYPFS_VM_H_ */ diff --git a/arch/s390/hypfs/hypfs_vm_fs.c b/arch/s390/hypfs/hypfs_vm_fs.c new file mode 100644 index 000000000000..6011289afa8c --- /dev/null +++ b/arch/s390/hypfs/hypfs_vm_fs.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Hypervisor filesystem for Linux on s390. z/VM implementation. + * + * Copyright IBM Corp. 2006 + * Author(s): Michael Holzheu <holzheu@de.ibm.com> + */ + +#include <linux/types.h> +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/vmalloc.h> +#include <asm/extable.h> +#include <asm/diag.h> +#include <asm/ebcdic.h> +#include <asm/timex.h> +#include "hypfs_vm.h" +#include "hypfs.h" + +#define ATTRIBUTE(dir, name, member) \ +do { \ + void *rc; \ + rc = hypfs_create_u64(dir, name, member); \ + if (IS_ERR(rc)) \ + return PTR_ERR(rc); \ +} while (0) + +static int hypfs_vm_create_guest(struct dentry *systems_dir, + struct diag2fc_data *data) +{ + char guest_name[DIAG2FC_NAME_LEN + 1] = {}; + struct dentry *guest_dir, *cpus_dir, *samples_dir, *mem_dir; + int dedicated_flag, capped_value; + + capped_value = (data->flags & 0x00000006) >> 1; + dedicated_flag = (data->flags & 0x00000008) >> 3; + + /* guest dir */ + memcpy(guest_name, data->guest_name, DIAG2FC_NAME_LEN); + EBCASC(guest_name, DIAG2FC_NAME_LEN); + strim(guest_name); + guest_dir = hypfs_mkdir(systems_dir, guest_name); + if (IS_ERR(guest_dir)) + return PTR_ERR(guest_dir); + ATTRIBUTE(guest_dir, "onlinetime_us", data->el_time); + + /* logical cpu information */ + cpus_dir = hypfs_mkdir(guest_dir, "cpus"); + if (IS_ERR(cpus_dir)) + return PTR_ERR(cpus_dir); + ATTRIBUTE(cpus_dir, "cputime_us", data->used_cpu); + ATTRIBUTE(cpus_dir, "capped", capped_value); + ATTRIBUTE(cpus_dir, "dedicated", dedicated_flag); + ATTRIBUTE(cpus_dir, "count", data->vcpus); + /* + * Note: The "weight_min" attribute got the wrong name. + * The value represents the number of non-stopped (operating) + * CPUS. + */ + ATTRIBUTE(cpus_dir, "weight_min", data->ocpus); + ATTRIBUTE(cpus_dir, "weight_max", data->cpu_max); + ATTRIBUTE(cpus_dir, "weight_cur", data->cpu_shares); + + /* memory information */ + mem_dir = hypfs_mkdir(guest_dir, "mem"); + if (IS_ERR(mem_dir)) + return PTR_ERR(mem_dir); + ATTRIBUTE(mem_dir, "min_KiB", data->mem_min_kb); + ATTRIBUTE(mem_dir, "max_KiB", data->mem_max_kb); + ATTRIBUTE(mem_dir, "used_KiB", data->mem_used_kb); + ATTRIBUTE(mem_dir, "share_KiB", data->mem_share_kb); + + /* samples */ + samples_dir = hypfs_mkdir(guest_dir, "samples"); + if (IS_ERR(samples_dir)) + return PTR_ERR(samples_dir); + ATTRIBUTE(samples_dir, "cpu_using", data->cpu_use_samp); + ATTRIBUTE(samples_dir, "cpu_delay", data->cpu_delay_samp); + ATTRIBUTE(samples_dir, "mem_delay", data->page_wait_samp); + ATTRIBUTE(samples_dir, "idle", data->idle_samp); + ATTRIBUTE(samples_dir, "other", data->other_samp); + ATTRIBUTE(samples_dir, "total", data->total_samp); + return 0; +} + +int hypfs_vm_create_files(struct dentry *root) +{ + struct dentry *dir, *file; + struct diag2fc_data *data; + unsigned int count = 0; + int rc, i; + + data = diag2fc_store(diag2fc_guest_query, &count, 0); + if (IS_ERR(data)) + return PTR_ERR(data); + + /* Hypervisor Info */ + dir = hypfs_mkdir(root, "hyp"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + file = hypfs_create_str(dir, "type", "z/VM Hypervisor"); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + goto failed; + } + + /* physical cpus */ + dir = hypfs_mkdir(root, "cpus"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + file = hypfs_create_u64(dir, "count", data->lcpus); + if (IS_ERR(file)) { + rc = PTR_ERR(file); + goto failed; + } + + /* guests */ + dir = hypfs_mkdir(root, "systems"); + if (IS_ERR(dir)) { + rc = PTR_ERR(dir); + goto failed; + } + + for (i = 0; i < count; i++) { + rc = hypfs_vm_create_guest(dir, &data[i]); + if (rc) + goto failed; + } + diag2fc_free(data); + return 0; + +failed: + diag2fc_free(data); + return rc; +} diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index ee919bfc8186..ada83149932f 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -53,7 +53,7 @@ static void hypfs_update_update(struct super_block *sb) struct inode *inode = d_inode(sb_info->update_file); sb_info->last_update = ktime_get_seconds(); - inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); + inode->i_atime = inode->i_mtime = inode_set_ctime_current(inode); } /* directory tree removal functions */ @@ -101,7 +101,7 @@ static struct inode *hypfs_make_inode(struct super_block *sb, umode_t mode) ret->i_mode = mode; ret->i_uid = hypfs_info->uid; ret->i_gid = hypfs_info->gid; - ret->i_atime = ret->i_mtime = ret->i_ctime = current_time(ret); + ret->i_atime = ret->i_mtime = inode_set_ctime_current(ret); if (S_ISDIR(mode)) set_nlink(ret, 2); } @@ -460,45 +460,18 @@ static const struct super_operations hypfs_s_ops = { .show_options = hypfs_show_options, }; -static int __init hypfs_init(void) +int __init __hypfs_fs_init(void) { int rc; - hypfs_dbfs_init(); - - if (hypfs_diag_init()) { - rc = -ENODATA; - goto fail_dbfs_exit; - } - if (hypfs_vm_init()) { - rc = -ENODATA; - goto fail_hypfs_diag_exit; - } - hypfs_sprp_init(); - if (hypfs_diag0c_init()) { - rc = -ENODATA; - goto fail_hypfs_sprp_exit; - } rc = sysfs_create_mount_point(hypervisor_kobj, "s390"); if (rc) - goto fail_hypfs_diag0c_exit; + return rc; rc = register_filesystem(&hypfs_type); if (rc) - goto fail_filesystem; + goto fail; return 0; - -fail_filesystem: +fail: sysfs_remove_mount_point(hypervisor_kobj, "s390"); -fail_hypfs_diag0c_exit: - hypfs_diag0c_exit(); -fail_hypfs_sprp_exit: - hypfs_sprp_exit(); - hypfs_vm_exit(); -fail_hypfs_diag_exit: - hypfs_diag_exit(); - pr_err("Initialization of hypfs failed with rc=%i\n", rc); -fail_dbfs_exit: - hypfs_dbfs_exit(); return rc; } -device_initcall(hypfs_init) diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 1a18d7b82f86..4b904110d27c 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild @@ -5,6 +5,5 @@ generated-y += syscall_table.h generated-y += unistd_nr.h generic-y += asm-offsets.h -generic-y += export.h generic-y += kvm_types.h generic-y += mcs_spinlock.h diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index ac665b9670c5..ccd4e148b5ed 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -222,7 +222,7 @@ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level, /* * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are - * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details! + * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details! */ extern debug_entry_t * __debug_sprintf_event(debug_info_t *id, int level, char *string, ...) @@ -350,7 +350,7 @@ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level, /* * IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are - * stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details! + * stored in the s390dbf. See Documentation/arch/s390/s390dbf.rst for more details! */ extern debug_entry_t * __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) diff --git a/arch/s390/include/asm/diag.h b/arch/s390/include/asm/diag.h index 902e0330dd91..bed804137537 100644 --- a/arch/s390/include/asm/diag.h +++ b/arch/s390/include/asm/diag.h @@ -36,6 +36,7 @@ enum diag_stat_enum { DIAG_STAT_X304, DIAG_STAT_X308, DIAG_STAT_X318, + DIAG_STAT_X320, DIAG_STAT_X500, NR_DIAG_STAT }; @@ -108,6 +109,8 @@ enum diag204_sc { DIAG204_SUBC_STIB7 = 7 }; +#define DIAG204_SUBCODE_MASK 0xffff + /* The two available diag 204 data formats */ enum diag204_format { DIAG204_INFO_SIMPLE = 0, diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index e5c5cb1207e2..5a82b08f03cd 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -54,6 +54,23 @@ static __always_inline struct pt_regs *arch_ftrace_get_regs(struct ftrace_regs * return NULL; } +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +struct fgraph_ret_regs { + unsigned long gpr2; + unsigned long fp; +}; + +static __always_inline unsigned long fgraph_ret_regs_return_value(struct fgraph_ret_regs *ret_regs) +{ + return ret_regs->gpr2; +} + +static __always_inline unsigned long fgraph_ret_regs_frame_pointer(struct fgraph_ret_regs *ret_regs) +{ + return ret_regs->fp; +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ + static __always_inline unsigned long ftrace_regs_get_instruction_pointer(const struct ftrace_regs *fregs) { diff --git a/arch/s390/include/asm/kfence.h b/arch/s390/include/asm/kfence.h index d55ba878378b..e47fd8cbe701 100644 --- a/arch/s390/include/asm/kfence.h +++ b/arch/s390/include/asm/kfence.h @@ -35,7 +35,7 @@ static __always_inline void kfence_split_mapping(void) static inline bool kfence_protect_page(unsigned long addr, bool protect) { - __kernel_map_pages(virt_to_page(addr), 1, !protect); + __kernel_map_pages(virt_to_page((void *)addr), 1, !protect); return true; } diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 2bbc3d54959d..91bfecb91321 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -1028,6 +1028,9 @@ static inline int sie64a(struct kvm_s390_sie_block *sie_block, u64 *rsa) extern char sie_exit; +bool kvm_s390_pv_is_protected(struct kvm *kvm); +bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu); + extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc); extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc); diff --git a/arch/s390/include/asm/maccess.h b/arch/s390/include/asm/maccess.h index cfec3141fdba..50225940d971 100644 --- a/arch/s390/include/asm/maccess.h +++ b/arch/s390/include/asm/maccess.h @@ -4,6 +4,9 @@ #include <linux/types.h> +#define MEMCPY_REAL_SIZE PAGE_SIZE +#define MEMCPY_REAL_MASK PAGE_MASK + struct iov_iter; extern unsigned long __memcpy_real_area; diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index a9c138fcd2ad..cfec0743314e 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -191,8 +191,16 @@ int arch_make_page_accessible(struct page *page); #define phys_to_page(phys) pfn_to_page(phys_to_pfn(phys)) #define page_to_phys(page) pfn_to_phys(page_to_pfn(page)) -#define pfn_to_virt(pfn) __va(pfn_to_phys(pfn)) -#define virt_to_pfn(kaddr) (phys_to_pfn(__pa(kaddr))) +static inline void *pfn_to_virt(unsigned long pfn) +{ + return __va(pfn_to_phys(pfn)); +} + +static inline unsigned long virt_to_pfn(const void *kaddr) +{ + return phys_to_pfn(__pa(kaddr)); +} + #define pfn_to_kaddr(pfn) pfn_to_virt(pfn) #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr)) diff --git a/arch/s390/include/asm/pfault.h b/arch/s390/include/asm/pfault.h new file mode 100644 index 000000000000..a1bee4a1e470 --- /dev/null +++ b/arch/s390/include/asm/pfault.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright IBM Corp. 1999, 2023 + */ +#ifndef _ASM_S390_PFAULT_H +#define _ASM_S390_PFAULT_H + +#include <linux/errno.h> + +int __pfault_init(void); +void __pfault_fini(void); + +static inline int pfault_init(void) +{ + if (IS_ENABLED(CONFIG_PFAULT)) + return __pfault_init(); + return -EOPNOTSUPP; +} + +static inline void pfault_fini(void) +{ + if (IS_ENABLED(CONFIG_PFAULT)) + __pfault_fini(); +} + +#endif /* _ASM_S390_PFAULT_H */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 02973c740a5b..d28d2e5e68ee 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -90,8 +90,6 @@ extern unsigned long __bootdata_preserved(VMALLOC_END); extern struct page *__bootdata_preserved(vmemmap); extern unsigned long __bootdata_preserved(vmemmap_size); -#define VMEM_MAX_PHYS ((unsigned long) vmemmap) - extern unsigned long __bootdata_preserved(MODULES_VADDR); extern unsigned long __bootdata_preserved(MODULES_END); #define MODULES_VADDR MODULES_VADDR diff --git a/arch/s390/include/asm/sclp.h b/arch/s390/include/asm/sclp.h index dac7da88f61f..5742d23bba13 100644 --- a/arch/s390/include/asm/sclp.h +++ b/arch/s390/include/asm/sclp.h @@ -86,6 +86,7 @@ struct sclp_info { unsigned char has_kss : 1; unsigned char has_gisaf : 1; unsigned char has_diag318 : 1; + unsigned char has_diag320 : 1; unsigned char has_sipl : 1; unsigned char has_sipl_eckd : 1; unsigned char has_dirq : 1; diff --git a/arch/s390/include/asm/setup.h b/arch/s390/include/asm/setup.h index f191255c60db..b30fe91166e3 100644 --- a/arch/s390/include/asm/setup.h +++ b/arch/s390/include/asm/setup.h @@ -74,6 +74,7 @@ extern unsigned int zlib_dfltcc_support; extern int noexec_disabled; extern unsigned long ident_map_size; +extern unsigned long max_mappable; /* The Write Back bit position in the physaddr is given by the SLPC PCI */ extern unsigned long mio_wb_bit_mask; @@ -117,14 +118,6 @@ extern unsigned int console_irq; #define SET_CONSOLE_VT220 do { console_mode = 4; } while (0) #define SET_CONSOLE_HVC do { console_mode = 5; } while (0) -#ifdef CONFIG_PFAULT -extern int pfault_init(void); -extern void pfault_fini(void); -#else /* CONFIG_PFAULT */ -#define pfault_init() ({-1;}) -#define pfault_fini() do { } while (0) -#endif /* CONFIG_PFAULT */ - #ifdef CONFIG_VMCP void vmcp_cma_reserve(void); #else diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index d6bb2f4f78d1..d2cd42bb2c26 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -463,6 +463,7 @@ static inline int is_prot_virt_host(void) return prot_virt_host; } +int uv_pin_shared(unsigned long paddr); int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb); int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr); int uv_destroy_owned_page(unsigned long paddr); @@ -475,6 +476,11 @@ void setup_uv(void); #define is_prot_virt_host() 0 static inline void setup_uv(void) {} +static inline int uv_pin_shared(unsigned long paddr) +{ + return 0; +} + static inline int uv_destroy_owned_page(unsigned long paddr) { return 0; diff --git a/arch/s390/include/uapi/asm/pkey.h b/arch/s390/include/uapi/asm/pkey.h index 5faf0a1d2c16..5ad76471e73f 100644 --- a/arch/s390/include/uapi/asm/pkey.h +++ b/arch/s390/include/uapi/asm/pkey.h @@ -26,7 +26,7 @@ #define MAXCLRKEYSIZE 32 /* a clear key value may be up to 32 bytes */ #define MAXAESCIPHERKEYSIZE 136 /* our aes cipher keys have always 136 bytes */ #define MINEP11AESKEYBLOBSIZE 256 /* min EP11 AES key blob size */ -#define MAXEP11AESKEYBLOBSIZE 320 /* max EP11 AES key blob size */ +#define MAXEP11AESKEYBLOBSIZE 336 /* max EP11 AES key blob size */ /* Minimum size of a key blob */ #define MINKEYBLOBSIZE SECKEYBLOBSIZE diff --git a/arch/s390/include/uapi/asm/ptrace.h b/arch/s390/include/uapi/asm/ptrace.h index f0fe3bcc78a8..bb0826024bb9 100644 --- a/arch/s390/include/uapi/asm/ptrace.h +++ b/arch/s390/include/uapi/asm/ptrace.h @@ -8,6 +8,8 @@ #ifndef _UAPI_S390_PTRACE_H #define _UAPI_S390_PTRACE_H +#include <linux/const.h> + /* * Offsets in the user_regs_struct. They are used for the ptrace * system call and in entry.S diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 6b2a051e1f8a..0df2b88cc0da 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -37,9 +37,9 @@ CFLAGS_unwind_bc.o += -fno-optimize-sibling-calls obj-y := head64.o traps.o time.o process.o earlypgm.o early.o setup.o idle.o vtime.o obj-y += processor.o syscall.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o cpufeature.o -obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o +obj-y += sysinfo.o lgr.o os_info.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o -obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o +obj-y += entry.o reipl.o kdebugfs.o alternative.o obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o obj-y += smp.o text_amode31.o stacktrace.o abs_lowcore.o @@ -63,12 +63,13 @@ obj-$(CONFIG_RETHOOK) += rethook.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_UPROBES) += uprobes.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o obj-$(CONFIG_KEXEC_FILE) += kexec_elf.o - +obj-$(CONFIG_CERT_STORE) += cert_store.o obj-$(CONFIG_IMA_SECURE_AND_OR_TRUSTED_BOOT) += ima_arch.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 81cf72088041..fa5f6885c74a 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -12,6 +12,7 @@ #include <linux/sched.h> #include <linux/purgatory.h> #include <linux/pgtable.h> +#include <linux/ftrace.h> #include <asm/idle.h> #include <asm/gmap.h> #include <asm/stacktrace.h> @@ -177,5 +178,13 @@ int main(void) DEFINE(OLDMEM_SIZE, PARMAREA + offsetof(struct parmarea, oldmem_size)); DEFINE(COMMAND_LINE, PARMAREA + offsetof(struct parmarea, command_line)); DEFINE(MAX_COMMAND_LINE_SIZE, PARMAREA + offsetof(struct parmarea, max_command_line_size)); +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* function graph return value tracing */ + OFFSET(__FGRAPH_RET_GPR2, fgraph_ret_regs, gpr2); + OFFSET(__FGRAPH_RET_FP, fgraph_ret_regs, fp); + DEFINE(__FGRAPH_RET_SIZE, sizeof(struct fgraph_ret_regs)); +#endif + OFFSET(__FTRACE_REGS_PT_REGS, ftrace_regs, regs); + DEFINE(__FTRACE_REGS_SIZE, sizeof(struct ftrace_regs)); return 0; } diff --git a/arch/s390/kernel/cert_store.c b/arch/s390/kernel/cert_store.c new file mode 100644 index 000000000000..3986a044eb36 --- /dev/null +++ b/arch/s390/kernel/cert_store.c @@ -0,0 +1,811 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * DIAG 0x320 support and certificate store handling + * + * Copyright IBM Corp. 2023 + * Author(s): Anastasia Eskova <anastasia.eskova@ibm.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/delay.h> +#include <linux/device.h> +#include <linux/fs.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/key-type.h> +#include <linux/key.h> +#include <linux/keyctl.h> +#include <linux/kobject.h> +#include <linux/module.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/sysfs.h> +#include <crypto/sha2.h> +#include <keys/user-type.h> +#include <asm/debug.h> +#include <asm/diag.h> +#include <asm/ebcdic.h> +#include <asm/sclp.h> + +#define DIAG_MAX_RETRIES 10 + +#define VCE_FLAGS_VALID_MASK 0x80 + +#define ISM_LEN_DWORDS 4 +#define VCSSB_LEN_BYTES 128 +#define VCSSB_LEN_NO_CERTS 4 +#define VCB_LEN_NO_CERTS 64 +#define VC_NAME_LEN_BYTES 64 + +#define CERT_STORE_KEY_TYPE_NAME "cert_store_key" +#define CERT_STORE_KEYRING_NAME "cert_store" + +static debug_info_t *cert_store_dbf; +static debug_info_t *cert_store_hexdump; + +#define pr_dbf_msg(fmt, ...) \ + debug_sprintf_event(cert_store_dbf, 3, fmt "\n", ## __VA_ARGS__) + +enum diag320_subcode { + DIAG320_SUBCODES = 0, + DIAG320_STORAGE = 1, + DIAG320_CERT_BLOCK = 2, +}; + +enum diag320_rc { + DIAG320_RC_OK = 0x0001, + DIAG320_RC_CS_NOMATCH = 0x0306, +}; + +/* Verification Certificates Store Support Block (VCSSB). */ +struct vcssb { + u32 vcssb_length; + u8 pad_0x04[3]; + u8 version; + u8 pad_0x08[8]; + u32 cs_token; + u8 pad_0x14[12]; + u16 total_vc_index_count; + u16 max_vc_index_count; + u8 pad_0x24[28]; + u32 max_vce_length; + u32 max_vcxe_length; + u8 pad_0x48[8]; + u32 max_single_vcb_length; + u32 total_vcb_length; + u32 max_single_vcxb_length; + u32 total_vcxb_length; + u8 pad_0x60[32]; +} __packed __aligned(8); + +/* Verification Certificate Entry (VCE) Header. */ +struct vce_header { + u32 vce_length; + u8 flags; + u8 key_type; + u16 vc_index; + u8 vc_name[VC_NAME_LEN_BYTES]; /* EBCDIC */ + u8 vc_format; + u8 pad_0x49; + u16 key_id_length; + u8 pad_0x4c; + u8 vc_hash_type; + u16 vc_hash_length; + u8 pad_0x50[4]; + u32 vc_length; + u8 pad_0x58[8]; + u16 vc_hash_offset; + u16 vc_offset; + u8 pad_0x64[28]; +} __packed __aligned(4); + +/* Verification Certificate Block (VCB) Header. */ +struct vcb_header { + u32 vcb_input_length; + u8 pad_0x04[4]; + u16 first_vc_index; + u16 last_vc_index; + u32 pad_0x0c; + u32 cs_token; + u8 pad_0x14[12]; + u32 vcb_output_length; + u8 pad_0x24[3]; + u8 version; + u16 stored_vc_count; + u16 remaining_vc_count; + u8 pad_0x2c[20]; +} __packed __aligned(4); + +/* Verification Certificate Block (VCB). */ +struct vcb { + struct vcb_header vcb_hdr; + u8 vcb_buf[]; +} __packed __aligned(4); + +/* Verification Certificate Entry (VCE). */ +struct vce { + struct vce_header vce_hdr; + u8 cert_data_buf[]; +} __packed __aligned(4); + +static void cert_store_key_describe(const struct key *key, struct seq_file *m) +{ + char ascii[VC_NAME_LEN_BYTES + 1]; + + /* + * First 64 bytes of the key description is key name in EBCDIC CP 500. + * Convert it to ASCII for displaying in /proc/keys. + */ + strscpy(ascii, key->description, sizeof(ascii)); + EBCASC_500(ascii, VC_NAME_LEN_BYTES); + seq_puts(m, ascii); + + seq_puts(m, &key->description[VC_NAME_LEN_BYTES]); + if (key_is_positive(key)) + seq_printf(m, ": %u", key->datalen); +} + +/* + * Certificate store key type takes over properties of + * user key but cannot be updated. + */ +static struct key_type key_type_cert_store_key = { + .name = CERT_STORE_KEY_TYPE_NAME, + .preparse = user_preparse, + .free_preparse = user_free_preparse, + .instantiate = generic_key_instantiate, + .revoke = user_revoke, + .destroy = user_destroy, + .describe = cert_store_key_describe, + .read = user_read, +}; + +/* Logging functions. */ +static void pr_dbf_vcb(const struct vcb *b) +{ + pr_dbf_msg("VCB Header:"); + pr_dbf_msg("vcb_input_length: %d", b->vcb_hdr.vcb_input_length); + pr_dbf_msg("first_vc_index: %d", b->vcb_hdr.first_vc_index); + pr_dbf_msg("last_vc_index: %d", b->vcb_hdr.last_vc_index); + pr_dbf_msg("cs_token: %d", b->vcb_hdr.cs_token); + pr_dbf_msg("vcb_output_length: %d", b->vcb_hdr.vcb_output_length); + pr_dbf_msg("version: %d", b->vcb_hdr.version); + pr_dbf_msg("stored_vc_count: %d", b->vcb_hdr.stored_vc_count); + pr_dbf_msg("remaining_vc_count: %d", b->vcb_hdr.remaining_vc_count); +} + +static void pr_dbf_vce(const struct vce *e) +{ + unsigned char vc_name[VC_NAME_LEN_BYTES + 1]; + char log_string[VC_NAME_LEN_BYTES + 40]; + + pr_dbf_msg("VCE Header:"); + pr_dbf_msg("vce_hdr.vce_length: %d", e->vce_hdr.vce_length); + pr_dbf_msg("vce_hdr.flags: %d", e->vce_hdr.flags); + pr_dbf_msg("vce_hdr.key_type: %d", e->vce_hdr.key_type); + pr_dbf_msg("vce_hdr.vc_index: %d", e->vce_hdr.vc_index); + pr_dbf_msg("vce_hdr.vc_format: %d", e->vce_hdr.vc_format); + pr_dbf_msg("vce_hdr.key_id_length: %d", e->vce_hdr.key_id_length); + pr_dbf_msg("vce_hdr.vc_hash_type: %d", e->vce_hdr.vc_hash_type); + pr_dbf_msg("vce_hdr.vc_hash_length: %d", e->vce_hdr.vc_hash_length); + pr_dbf_msg("vce_hdr.vc_hash_offset: %d", e->vce_hdr.vc_hash_offset); + pr_dbf_msg("vce_hdr.vc_length: %d", e->vce_hdr.vc_length); + pr_dbf_msg("vce_hdr.vc_offset: %d", e->vce_hdr.vc_offset); + + /* Certificate name in ASCII. */ + memcpy(vc_name, e->vce_hdr.vc_name, VC_NAME_LEN_BYTES); + EBCASC_500(vc_name, VC_NAME_LEN_BYTES); + vc_name[VC_NAME_LEN_BYTES] = '\0'; + + snprintf(log_string, sizeof(log_string), + "index: %d vce_hdr.vc_name (ASCII): %s", + e->vce_hdr.vc_index, vc_name); + debug_text_event(cert_store_hexdump, 3, log_string); + + /* Certificate data. */ + debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data start"); + debug_event(cert_store_hexdump, 3, (u8 *)e->cert_data_buf, 128); + debug_text_event(cert_store_hexdump, 3, "VCE: Certificate data end"); + debug_event(cert_store_hexdump, 3, + (u8 *)e->cert_data_buf + e->vce_hdr.vce_length - 128, 128); +} + +static void pr_dbf_vcssb(const struct vcssb *s) +{ + debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode1"); + debug_event(cert_store_hexdump, 3, (u8 *)s, VCSSB_LEN_BYTES); + + pr_dbf_msg("VCSSB:"); + pr_dbf_msg("vcssb_length: %u", s->vcssb_length); + pr_dbf_msg("version: %u", s->version); + pr_dbf_msg("cs_token: %u", s->cs_token); + pr_dbf_msg("total_vc_index_count: %u", s->total_vc_index_count); + pr_dbf_msg("max_vc_index_count: %u", s->max_vc_index_count); + pr_dbf_msg("max_vce_length: %u", s->max_vce_length); + pr_dbf_msg("max_vcxe_length: %u", s->max_vce_length); + pr_dbf_msg("max_single_vcb_length: %u", s->max_single_vcb_length); + pr_dbf_msg("total_vcb_length: %u", s->total_vcb_length); + pr_dbf_msg("max_single_vcxb_length: %u", s->max_single_vcxb_length); + pr_dbf_msg("total_vcxb_length: %u", s->total_vcxb_length); +} + +static int __diag320(unsigned long subcode, void *addr) +{ + union register_pair rp = { .even = (unsigned long)addr, }; + + asm volatile( + " diag %[rp],%[subcode],0x320\n" + "0: nopr %%r7\n" + EX_TABLE(0b, 0b) + : [rp] "+d" (rp.pair) + : [subcode] "d" (subcode) + : "cc", "memory"); + + return rp.odd; +} + +static int diag320(unsigned long subcode, void *addr) +{ + diag_stat_inc(DIAG_STAT_X320); + + return __diag320(subcode, addr); +} + +/* + * Calculate SHA256 hash of the VCE certificate and compare it to hash stored in + * VCE. Return -EINVAL if hashes don't match. + */ +static int check_certificate_hash(const struct vce *vce) +{ + u8 hash[SHA256_DIGEST_SIZE]; + u16 vc_hash_length; + u8 *vce_hash; + + vce_hash = (u8 *)vce + vce->vce_hdr.vc_hash_offset; + vc_hash_length = vce->vce_hdr.vc_hash_length; + sha256((u8 *)vce + vce->vce_hdr.vc_offset, vce->vce_hdr.vc_length, hash); + if (memcmp(vce_hash, hash, vc_hash_length) == 0) + return 0; + + pr_dbf_msg("SHA256 hash of received certificate does not match"); + debug_text_event(cert_store_hexdump, 3, "VCE hash:"); + debug_event(cert_store_hexdump, 3, vce_hash, SHA256_DIGEST_SIZE); + debug_text_event(cert_store_hexdump, 3, "Calculated hash:"); + debug_event(cert_store_hexdump, 3, hash, SHA256_DIGEST_SIZE); + + return -EINVAL; +} + +static int check_certificate_valid(const struct vce *vce) +{ + if (!(vce->vce_hdr.flags & VCE_FLAGS_VALID_MASK)) { + pr_dbf_msg("Certificate entry is invalid"); + return -EINVAL; + } + if (vce->vce_hdr.vc_format != 1) { + pr_dbf_msg("Certificate format is not supported"); + return -EINVAL; + } + if (vce->vce_hdr.vc_hash_type != 1) { + pr_dbf_msg("Hash type is not supported"); + return -EINVAL; + } + + return check_certificate_hash(vce); +} + +static struct key *get_user_session_keyring(void) +{ + key_ref_t us_keyring_ref; + + us_keyring_ref = lookup_user_key(KEY_SPEC_USER_SESSION_KEYRING, + KEY_LOOKUP_CREATE, KEY_NEED_LINK); + if (IS_ERR(us_keyring_ref)) { + pr_dbf_msg("Couldn't get user session keyring: %ld", + PTR_ERR(us_keyring_ref)); + return ERR_PTR(-ENOKEY); + } + key_ref_put(us_keyring_ref); + return key_ref_to_ptr(us_keyring_ref); +} + +/* Invalidate all keys from cert_store keyring. */ +static int invalidate_keyring_keys(struct key *keyring) +{ + unsigned long num_keys, key_index; + size_t keyring_payload_len; + key_serial_t *key_array; + struct key *current_key; + int rc; + + keyring_payload_len = key_type_keyring.read(keyring, NULL, 0); + num_keys = keyring_payload_len / sizeof(key_serial_t); + key_array = kcalloc(num_keys, sizeof(key_serial_t), GFP_KERNEL); + if (!key_array) + return -ENOMEM; + + rc = key_type_keyring.read(keyring, (char *)key_array, keyring_payload_len); + if (rc != keyring_payload_len) { + pr_dbf_msg("Couldn't read keyring payload"); + goto out; + } + + for (key_index = 0; key_index < num_keys; key_index++) { + current_key = key_lookup(key_array[key_index]); + pr_dbf_msg("Invalidating key %08x", current_key->serial); + + key_invalidate(current_key); + key_put(current_key); + rc = key_unlink(keyring, current_key); + if (rc) { + pr_dbf_msg("Couldn't unlink key %08x: %d", current_key->serial, rc); + break; + } + } +out: + kfree(key_array); + return rc; +} + +static struct key *find_cs_keyring(void) +{ + key_ref_t cs_keyring_ref; + struct key *cs_keyring; + + cs_keyring_ref = keyring_search(make_key_ref(get_user_session_keyring(), true), + &key_type_keyring, CERT_STORE_KEYRING_NAME, + false); + if (!IS_ERR(cs_keyring_ref)) { + cs_keyring = key_ref_to_ptr(cs_keyring_ref); + key_ref_put(cs_keyring_ref); + goto found; + } + /* Search default locations: thread, process, session keyrings */ + cs_keyring = request_key(&key_type_keyring, CERT_STORE_KEYRING_NAME, NULL); + if (IS_ERR(cs_keyring)) + return NULL; + key_put(cs_keyring); +found: + return cs_keyring; +} + +static void cleanup_cs_keys(void) +{ + struct key *cs_keyring; + + cs_keyring = find_cs_keyring(); + if (!cs_keyring) + return; + + pr_dbf_msg("Found cert_store keyring. Purging..."); + /* + * Remove cert_store_key_type in case invalidation + * of old cert_store keys failed (= severe error). + */ + if (invalidate_keyring_keys(cs_keyring)) + unregister_key_type(&key_type_cert_store_key); + + keyring_clear(cs_keyring); + key_invalidate(cs_keyring); + key_put(cs_keyring); + key_unlink(get_user_session_keyring(), cs_keyring); +} + +static struct key *create_cs_keyring(void) +{ + static struct key *cs_keyring; + + /* Cleanup previous cs_keyring and all associated keys if any. */ + cleanup_cs_keys(); + cs_keyring = keyring_alloc(CERT_STORE_KEYRING_NAME, GLOBAL_ROOT_UID, + GLOBAL_ROOT_GID, current_cred(), + (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA | KEY_ALLOC_SET_KEEP, + NULL, get_user_session_keyring()); + if (IS_ERR(cs_keyring)) { + pr_dbf_msg("Can't allocate cert_store keyring"); + return NULL; + } + + pr_dbf_msg("Successfully allocated cert_store keyring: %08x", cs_keyring->serial); + + /* + * In case a previous clean-up ran into an + * error and unregistered key type. + */ + register_key_type(&key_type_cert_store_key); + + return cs_keyring; +} + +/* + * Allocate memory and create key description in format + * [key name in EBCDIC]:[VCE index]:[CS token]. + * Return a pointer to key description or NULL if memory + * allocation failed. Memory should be freed by caller. + */ +static char *get_key_description(struct vcssb *vcssb, const struct vce *vce) +{ + size_t len, name_len; + u32 cs_token; + char *desc; + + cs_token = vcssb->cs_token; + /* Description string contains "%64s:%04u:%08u\0". */ + name_len = sizeof(vce->vce_hdr.vc_name); + len = name_len + 1 + 4 + 1 + 8 + 1; + desc = kmalloc(len, GFP_KERNEL); + if (!desc) + return NULL; + + memcpy(desc, vce->vce_hdr.vc_name, name_len); + sprintf(desc + name_len, ":%04u:%08u", vce->vce_hdr.vc_index, cs_token); + + return desc; +} + +/* + * Create a key of type "cert_store_key" using the data from VCE for key + * payload and key description. Link the key to "cert_store" keyring. + */ +static int create_key_from_vce(struct vcssb *vcssb, struct vce *vce, + struct key *keyring) +{ + key_ref_t newkey; + char *desc; + int rc; + + desc = get_key_description(vcssb, vce); + if (!desc) + return -ENOMEM; + + newkey = key_create_or_update( + make_key_ref(keyring, true), CERT_STORE_KEY_TYPE_NAME, + desc, (u8 *)vce + vce->vce_hdr.vc_offset, + vce->vce_hdr.vc_length, + (KEY_POS_ALL & ~KEY_POS_SETATTR) | KEY_USR_VIEW | KEY_USR_READ, + KEY_ALLOC_NOT_IN_QUOTA); + + rc = PTR_ERR_OR_ZERO(newkey); + if (rc) { + pr_dbf_msg("Couldn't create a key from Certificate Entry (%d)", rc); + rc = -ENOKEY; + goto out; + } + + key_ref_put(newkey); +out: + kfree(desc); + return rc; +} + +/* Get Verification Certificate Storage Size block with DIAG320 subcode2. */ +static int get_vcssb(struct vcssb *vcssb) +{ + int diag320_rc; + + memset(vcssb, 0, sizeof(*vcssb)); + vcssb->vcssb_length = VCSSB_LEN_BYTES; + diag320_rc = diag320(DIAG320_STORAGE, vcssb); + pr_dbf_vcssb(vcssb); + + if (diag320_rc != DIAG320_RC_OK) { + pr_dbf_msg("Diag 320 Subcode 1 returned bad RC: %04x", diag320_rc); + return -EIO; + } + if (vcssb->vcssb_length == VCSSB_LEN_NO_CERTS) { + pr_dbf_msg("No certificates available for current configuration"); + return -ENOKEY; + } + + return 0; +} + +static u32 get_4k_mult_vcb_size(struct vcssb *vcssb) +{ + return round_up(vcssb->max_single_vcb_length, PAGE_SIZE); +} + +/* Fill input fields of single-entry VCB that will be read by LPAR. */ +static void fill_vcb_input(struct vcssb *vcssb, struct vcb *vcb, u16 index) +{ + memset(vcb, 0, sizeof(*vcb)); + vcb->vcb_hdr.vcb_input_length = get_4k_mult_vcb_size(vcssb); + vcb->vcb_hdr.cs_token = vcssb->cs_token; + + /* Request single entry. */ + vcb->vcb_hdr.first_vc_index = index; + vcb->vcb_hdr.last_vc_index = index; +} + +static void extract_vce_from_sevcb(struct vcb *vcb, struct vce *vce) +{ + struct vce *extracted_vce; + + extracted_vce = (struct vce *)vcb->vcb_buf; + memcpy(vce, vcb->vcb_buf, extracted_vce->vce_hdr.vce_length); + pr_dbf_vce(vce); +} + +static int get_sevcb(struct vcssb *vcssb, u16 index, struct vcb *vcb) +{ + int rc, diag320_rc; + + fill_vcb_input(vcssb, vcb, index); + + diag320_rc = diag320(DIAG320_CERT_BLOCK, vcb); + pr_dbf_msg("Diag 320 Subcode2 RC %2x", diag320_rc); + pr_dbf_vcb(vcb); + + switch (diag320_rc) { + case DIAG320_RC_OK: + rc = 0; + if (vcb->vcb_hdr.vcb_output_length == VCB_LEN_NO_CERTS) { + pr_dbf_msg("No certificate entry for index %u", index); + rc = -ENOKEY; + } else if (vcb->vcb_hdr.remaining_vc_count != 0) { + /* Retry on insufficient space. */ + pr_dbf_msg("Couldn't get all requested certificates"); + rc = -EAGAIN; + } + break; + case DIAG320_RC_CS_NOMATCH: + pr_dbf_msg("Certificate Store token mismatch"); + rc = -EAGAIN; + break; + default: + pr_dbf_msg("Diag 320 Subcode2 returned bad rc (0x%4x)", diag320_rc); + rc = -EINVAL; + break; + } + + return rc; +} + +/* + * Allocate memory for single-entry VCB, get VCB via DIAG320 subcode 2 call, + * extract VCE and create a key from its' certificate. + */ +static int create_key_from_sevcb(struct vcssb *vcssb, u16 index, + struct key *keyring) +{ + struct vcb *vcb; + struct vce *vce; + int rc; + + rc = -ENOMEM; + vcb = vmalloc(get_4k_mult_vcb_size(vcssb)); + vce = vmalloc(vcssb->max_single_vcb_length - sizeof(vcb->vcb_hdr)); + if (!vcb || !vce) + goto out; + + rc = get_sevcb(vcssb, index, vcb); + if (rc) + goto out; + + extract_vce_from_sevcb(vcb, vce); + rc = check_certificate_valid(vce); + if (rc) + goto out; + + rc = create_key_from_vce(vcssb, vce, keyring); + if (rc) + goto out; + + pr_dbf_msg("Successfully created key from Certificate Entry %d", index); +out: + vfree(vce); + vfree(vcb); + return rc; +} + +/* + * Request a single-entry VCB for each VCE available for the partition. + * Create a key from it and link it to cert_store keyring. If no keys + * could be created (i.e. VCEs were invalid) return -ENOKEY. + */ +static int add_certificates_to_keyring(struct vcssb *vcssb, struct key *keyring) +{ + int rc, index, count, added; + + count = 0; + added = 0; + /* Certificate Store entries indices start with 1 and have no gaps. */ + for (index = 1; index < vcssb->total_vc_index_count + 1; index++) { + pr_dbf_msg("Creating key from VCE %u", index); + rc = create_key_from_sevcb(vcssb, index, keyring); + count++; + + if (rc == -EAGAIN) + return rc; + + if (rc) + pr_dbf_msg("Creating key from VCE %u failed (%d)", index, rc); + else + added++; + } + + if (added == 0) { + pr_dbf_msg("Processed %d entries. No keys created", count); + return -ENOKEY; + } + + pr_info("Added %d of %d keys to cert_store keyring", added, count); + + /* + * Do not allow to link more keys to certificate store keyring after all + * the VCEs were processed. + */ + rc = keyring_restrict(make_key_ref(keyring, true), NULL, NULL); + if (rc) + pr_dbf_msg("Failed to set restriction to cert_store keyring (%d)", rc); + + return 0; +} + +/* + * Check which DIAG320 subcodes are installed. + * Return -ENOENT if subcodes 1 or 2 are not available. + */ +static int query_diag320_subcodes(void) +{ + unsigned long ism[ISM_LEN_DWORDS]; + int rc; + + rc = diag320(0, ism); + if (rc != DIAG320_RC_OK) { + pr_dbf_msg("DIAG320 subcode query returned %04x", rc); + return -ENOENT; + } + + debug_text_event(cert_store_hexdump, 3, "DIAG320 Subcode 0"); + debug_event(cert_store_hexdump, 3, ism, sizeof(ism)); + + if (!test_bit_inv(1, ism) || !test_bit_inv(2, ism)) { + pr_dbf_msg("Not all required DIAG320 subcodes are installed"); + return -ENOENT; + } + + return 0; +} + +/* + * Check if Certificate Store is supported by the firmware and DIAG320 subcodes + * 1 and 2 are installed. Create cert_store keyring and link all certificates + * available for the current partition to it as "cert_store_key" type + * keys. On refresh or error invalidate cert_store keyring and destroy + * all keys of "cert_store_key" type. + */ +static int fill_cs_keyring(void) +{ + struct key *cs_keyring; + struct vcssb *vcssb; + int rc; + + rc = -ENOMEM; + vcssb = kmalloc(VCSSB_LEN_BYTES, GFP_KERNEL); + if (!vcssb) + goto cleanup_keys; + + rc = -ENOENT; + if (!sclp.has_diag320) { + pr_dbf_msg("Certificate Store is not supported"); + goto cleanup_keys; + } + + rc = query_diag320_subcodes(); + if (rc) + goto cleanup_keys; + + rc = get_vcssb(vcssb); + if (rc) + goto cleanup_keys; + + rc = -ENOMEM; + cs_keyring = create_cs_keyring(); + if (!cs_keyring) + goto cleanup_keys; + + rc = add_certificates_to_keyring(vcssb, cs_keyring); + if (rc) + goto cleanup_cs_keyring; + + goto out; + +cleanup_cs_keyring: + key_put(cs_keyring); +cleanup_keys: + cleanup_cs_keys(); +out: + kfree(vcssb); + return rc; +} + +static DEFINE_MUTEX(cs_refresh_lock); +static int cs_status_val = -1; + +static ssize_t cs_status_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + if (cs_status_val == -1) + return sysfs_emit(buf, "uninitialized\n"); + else if (cs_status_val == 0) + return sysfs_emit(buf, "ok\n"); + + return sysfs_emit(buf, "failed (%d)\n", cs_status_val); +} + +static struct kobj_attribute cs_status_attr = __ATTR_RO(cs_status); + +static ssize_t refresh_store(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count) +{ + int rc, retries; + + pr_dbf_msg("Refresh certificate store information requested"); + rc = mutex_lock_interruptible(&cs_refresh_lock); + if (rc) + return rc; + + for (retries = 0; retries < DIAG_MAX_RETRIES; retries++) { + /* Request certificates from certificate store. */ + rc = fill_cs_keyring(); + if (rc) + pr_dbf_msg("Failed to refresh certificate store information (%d)", rc); + if (rc != -EAGAIN) + break; + } + cs_status_val = rc; + mutex_unlock(&cs_refresh_lock); + + return rc ?: count; +} + +static struct kobj_attribute refresh_attr = __ATTR_WO(refresh); + +static const struct attribute *cert_store_attrs[] __initconst = { + &cs_status_attr.attr, + &refresh_attr.attr, + NULL, +}; + +static struct kobject *cert_store_kobj; + +static int __init cert_store_init(void) +{ + int rc = -ENOMEM; + + cert_store_dbf = debug_register("cert_store_msg", 10, 1, 64); + if (!cert_store_dbf) + goto cleanup_dbf; + + cert_store_hexdump = debug_register("cert_store_hexdump", 3, 1, 128); + if (!cert_store_hexdump) + goto cleanup_dbf; + + debug_register_view(cert_store_hexdump, &debug_hex_ascii_view); + debug_register_view(cert_store_dbf, &debug_sprintf_view); + + /* Create directory /sys/firmware/cert_store. */ + cert_store_kobj = kobject_create_and_add("cert_store", firmware_kobj); + if (!cert_store_kobj) + goto cleanup_dbf; + + rc = sysfs_create_files(cert_store_kobj, cert_store_attrs); + if (rc) + goto cleanup_kobj; + + register_key_type(&key_type_cert_store_key); + + return rc; + +cleanup_kobj: + kobject_put(cert_store_kobj); +cleanup_dbf: + debug_unregister(cert_store_dbf); + debug_unregister(cert_store_hexdump); + + return rc; +} +device_initcall(cert_store_init); diff --git a/arch/s390/kernel/diag.c b/arch/s390/kernel/diag.c index 82079f2d8583..f9f06cd8fcee 100644 --- a/arch/s390/kernel/diag.c +++ b/arch/s390/kernel/diag.c @@ -11,6 +11,7 @@ #include <linux/cpu.h> #include <linux/seq_file.h> #include <linux/debugfs.h> +#include <linux/vmalloc.h> #include <asm/asm-extable.h> #include <asm/diag.h> #include <asm/trace/diag.h> @@ -50,6 +51,7 @@ static const struct diag_desc diag_map[NR_DIAG_STAT] = { [DIAG_STAT_X304] = { .code = 0x304, .name = "Partition-Resource Service" }, [DIAG_STAT_X308] = { .code = 0x308, .name = "List-Directed IPL" }, [DIAG_STAT_X318] = { .code = 0x318, .name = "CP Name and Version Codes" }, + [DIAG_STAT_X320] = { .code = 0x320, .name = "Certificate Store" }, [DIAG_STAT_X500] = { .code = 0x500, .name = "Virtio Service" }, }; @@ -167,8 +169,29 @@ static inline int __diag204(unsigned long *subcode, unsigned long size, void *ad return rp.odd; } +/** + * diag204() - Issue diagnose 204 call. + * @subcode: Subcode of diagnose 204 to be executed. + * @size: Size of area in pages which @area points to, if given. + * @addr: Vmalloc'ed memory area where the result is written to. + * + * Execute diagnose 204 with the given subcode and write the result to the + * memory area specified with @addr. For subcodes which do not write a + * result to memory both @size and @addr must be zero. If @addr is + * specified it must be page aligned and must have been allocated with + * vmalloc(). Conversion to real / physical addresses will be handled by + * this function if required. + */ int diag204(unsigned long subcode, unsigned long size, void *addr) { + if (addr) { + if (WARN_ON_ONCE(!is_vmalloc_addr(addr))) + return -1; + if (WARN_ON_ONCE(!IS_ALIGNED((unsigned long)addr, PAGE_SIZE))) + return -1; + } + if ((subcode & DIAG204_SUBCODE_MASK) == DIAG204_SUBC_STIB4) + addr = (void *)pfn_to_phys(vmalloc_to_pfn(addr)); diag_stat_inc(DIAG_STAT_X204); size = __diag204(&subcode, size, addr); if (subcode) @@ -200,7 +223,7 @@ int diag210(struct diag210 *addr) EXPORT_SYMBOL(diag210); /* - * Diagnose 210: Get information about a virtual device + * Diagnose 8C: Access 3270 Display Device Information */ int diag8c(struct diag8c *addr, struct ccw_dev_id *devno) { diff --git a/arch/s390/kernel/ebcdic.c b/arch/s390/kernel/ebcdic.c index 7f8246c9be08..0e51fa537262 100644 --- a/arch/s390/kernel/ebcdic.c +++ b/arch/s390/kernel/ebcdic.c @@ -1,6 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* - * ECBDIC -> ASCII, ASCII -> ECBDIC, + * EBCDIC -> ASCII, ASCII -> EBCDIC, * upper to lower case (EBCDIC) conversion tables. * * S390 version diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index a660f4b6d654..49a11f6dd7ae 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -8,6 +8,7 @@ * Denis Joseph Barrow (djbarrow@de.ibm.com,barrow_dj@yahoo.com), */ +#include <linux/export.h> #include <linux/init.h> #include <linux/linkage.h> #include <asm/asm-extable.h> @@ -26,7 +27,6 @@ #include <asm/vx-insn.h> #include <asm/setup.h> #include <asm/nmi.h> -#include <asm/export.h> #include <asm/nospec-insn.h> _LPP_OFFSET = __LC_LPP diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index 85a00d97a314..05e51666db03 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -266,7 +266,7 @@ static ssize_t sys_##_prefix##_##_name##_store(struct kobject *kobj, \ struct kobj_attribute *attr, \ const char *buf, size_t len) \ { \ - strncpy(_value, buf, sizeof(_value) - 1); \ + strscpy(_value, buf, sizeof(_value)); \ strim(_value); \ return len; \ } \ @@ -557,15 +557,12 @@ static struct kobj_attribute sys_ipl_ccw_loadparm_attr = __ATTR(loadparm, 0444, ipl_ccw_loadparm_show, NULL); static struct attribute *ipl_fcp_attrs[] = { - &sys_ipl_type_attr.attr, &sys_ipl_device_attr.attr, &sys_ipl_fcp_wwpn_attr.attr, &sys_ipl_fcp_lun_attr.attr, &sys_ipl_fcp_bootprog_attr.attr, &sys_ipl_fcp_br_lba_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, - &sys_ipl_secure_attr.attr, - &sys_ipl_has_secure_attr.attr, NULL, }; @@ -575,14 +572,11 @@ static struct attribute_group ipl_fcp_attr_group = { }; static struct attribute *ipl_nvme_attrs[] = { - &sys_ipl_type_attr.attr, &sys_ipl_nvme_fid_attr.attr, &sys_ipl_nvme_nsid_attr.attr, &sys_ipl_nvme_bootprog_attr.attr, &sys_ipl_nvme_br_lba_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, - &sys_ipl_secure_attr.attr, - &sys_ipl_has_secure_attr.attr, NULL, }; @@ -592,13 +586,10 @@ static struct attribute_group ipl_nvme_attr_group = { }; static struct attribute *ipl_eckd_attrs[] = { - &sys_ipl_type_attr.attr, &sys_ipl_eckd_bootprog_attr.attr, &sys_ipl_eckd_br_chr_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, &sys_ipl_device_attr.attr, - &sys_ipl_secure_attr.attr, - &sys_ipl_has_secure_attr.attr, NULL, }; @@ -610,21 +601,15 @@ static struct attribute_group ipl_eckd_attr_group = { /* CCW ipl device attributes */ static struct attribute *ipl_ccw_attrs_vm[] = { - &sys_ipl_type_attr.attr, &sys_ipl_device_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, &sys_ipl_vm_parm_attr.attr, - &sys_ipl_secure_attr.attr, - &sys_ipl_has_secure_attr.attr, NULL, }; static struct attribute *ipl_ccw_attrs_lpar[] = { - &sys_ipl_type_attr.attr, &sys_ipl_device_attr.attr, &sys_ipl_ccw_loadparm_attr.attr, - &sys_ipl_secure_attr.attr, - &sys_ipl_has_secure_attr.attr, NULL, }; @@ -636,15 +621,15 @@ static struct attribute_group ipl_ccw_attr_group_lpar = { .attrs = ipl_ccw_attrs_lpar }; -/* UNKNOWN ipl device attributes */ - -static struct attribute *ipl_unknown_attrs[] = { +static struct attribute *ipl_common_attrs[] = { &sys_ipl_type_attr.attr, + &sys_ipl_secure_attr.attr, + &sys_ipl_has_secure_attr.attr, NULL, }; -static struct attribute_group ipl_unknown_attr_group = { - .attrs = ipl_unknown_attrs, +static struct attribute_group ipl_common_attr_group = { + .attrs = ipl_common_attrs, }; static struct kset *ipl_kset; @@ -668,6 +653,9 @@ static int __init ipl_init(void) rc = -ENOMEM; goto out; } + rc = sysfs_create_group(&ipl_kset->kobj, &ipl_common_attr_group); + if (rc) + goto out; switch (ipl_info.type) { case IPL_TYPE_CCW: if (MACHINE_IS_VM) @@ -689,8 +677,6 @@ static int __init ipl_init(void) rc = sysfs_create_group(&ipl_kset->kobj, &ipl_nvme_attr_group); break; default: - rc = sysfs_create_group(&ipl_kset->kobj, - &ipl_unknown_attr_group); break; } out: diff --git a/arch/s390/kernel/machine_kexec.c b/arch/s390/kernel/machine_kexec.c index 6d9276c096a6..12a2bd4fc88c 100644 --- a/arch/s390/kernel/machine_kexec.c +++ b/arch/s390/kernel/machine_kexec.c @@ -13,6 +13,7 @@ #include <linux/reboot.h> #include <linux/ftrace.h> #include <linux/debug_locks.h> +#include <asm/pfault.h> #include <asm/cio.h> #include <asm/setup.h> #include <asm/smp.h> diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 2df94d32140c..8d207b82d9fe 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -188,7 +188,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, data->memsz = ALIGN(data->memsz, PAGE_SIZE); buf.mem = data->memsz; - ptr = (void *)ipl_cert_list_addr; + ptr = __va(ipl_cert_list_addr); end = ptr + ipl_cert_list_size; ncerts = 0; while (ptr < end) { @@ -200,7 +200,7 @@ static int kexec_file_add_ipl_report(struct kimage *image, addr = data->memsz + data->report->size; addr += ncerts * sizeof(struct ipl_rb_certificate_entry); - ptr = (void *)ipl_cert_list_addr; + ptr = __va(ipl_cert_list_addr); while (ptr < end) { len = *(unsigned int *)ptr; ptr += sizeof(len); diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index dbece2803c50..ae4d4fd9afcd 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -9,15 +9,20 @@ #include <asm/ftrace.h> #include <asm/nospec-insn.h> #include <asm/ptrace.h> -#include <asm/export.h> +#define STACK_FRAME_SIZE_PTREGS (STACK_FRAME_OVERHEAD + __PT_SIZE) +#define STACK_PTREGS (STACK_FRAME_OVERHEAD) +#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) +#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) + +#define STACK_FRAME_SIZE_FREGS (STACK_FRAME_OVERHEAD + __FTRACE_REGS_SIZE) +#define STACK_FREGS (STACK_FRAME_OVERHEAD) +#define STACK_FREGS_PTREGS (STACK_FRAME_OVERHEAD + __FTRACE_REGS_PT_REGS) +#define STACK_FREGS_PTREGS_GPRS (STACK_FREGS_PTREGS + __PT_GPRS) +#define STACK_FREGS_PTREGS_PSW (STACK_FREGS_PTREGS + __PT_PSW) +#define STACK_FREGS_PTREGS_ORIG_GPR2 (STACK_FREGS_PTREGS + __PT_ORIG_GPR2) +#define STACK_FREGS_PTREGS_FLAGS (STACK_FREGS_PTREGS + __PT_FLAGS) -#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) -#define STACK_PTREGS (STACK_FRAME_OVERHEAD) -#define STACK_PTREGS_GPRS (STACK_PTREGS + __PT_GPRS) -#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) -#define STACK_PTREGS_ORIG_GPR2 (STACK_PTREGS + __PT_ORIG_GPR2) -#define STACK_PTREGS_FLAGS (STACK_PTREGS + __PT_FLAGS) /* packed stack: allocate just enough for r14, r15 and backchain */ #define TRACED_FUNC_FRAME_SIZE 24 @@ -53,23 +58,23 @@ SYM_CODE_END(ftrace_stub_direct_tramp) stg %r1,__SF_BACKCHAIN(%r15) stg %r0,(__SF_GPRS+8*8)(%r15) stg %r15,(__SF_GPRS+9*8)(%r15) - # allocate pt_regs and stack frame for ftrace_trace_function - aghi %r15,-STACK_FRAME_SIZE - stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) - xc STACK_PTREGS_ORIG_GPR2(8,%r15),STACK_PTREGS_ORIG_GPR2(%r15) + # allocate ftrace_regs and stack frame for ftrace_trace_function + aghi %r15,-STACK_FRAME_SIZE_FREGS + stg %r1,(STACK_FREGS_PTREGS_GPRS+15*8)(%r15) + xc STACK_FREGS_PTREGS_ORIG_GPR2(8,%r15),STACK_FREGS_PTREGS_ORIG_GPR2(%r15) .if \allregs == 1 - stg %r14,(STACK_PTREGS_PSW)(%r15) - mvghi STACK_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS + stg %r14,(STACK_FREGS_PTREGS_PSW)(%r15) + mvghi STACK_FREGS_PTREGS_FLAGS(%r15),_PIF_FTRACE_FULL_REGS .else - xc STACK_PTREGS_FLAGS(8,%r15),STACK_PTREGS_FLAGS(%r15) + xc STACK_FREGS_PTREGS_FLAGS(8,%r15),STACK_FREGS_PTREGS_FLAGS(%r15) .endif lg %r14,(__SF_GPRS+8*8)(%r1) # restore original return address aghi %r1,-TRACED_FUNC_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) - stg %r0,(STACK_PTREGS_PSW+8)(%r15) - stmg %r2,%r14,(STACK_PTREGS_GPRS+2*8)(%r15) + stg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15) + stmg %r2,%r14,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15) .endm SYM_CODE_START(ftrace_regs_caller) @@ -96,30 +101,30 @@ SYM_CODE_START(ftrace_common) lg %r1,0(%r1) #endif lgr %r3,%r14 - la %r5,STACK_PTREGS(%r15) + la %r5,STACK_FREGS(%r15) BASR_EX %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. # See ftrace_enable_ftrace_graph_caller. SYM_INNER_LABEL(ftrace_graph_caller, SYM_L_GLOBAL) j .Lftrace_graph_caller_end - lmg %r2,%r3,(STACK_PTREGS_GPRS+14*8)(%r15) - lg %r4,(STACK_PTREGS_PSW+8)(%r15) + lmg %r2,%r3,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) + lg %r4,(STACK_FREGS_PTREGS_PSW+8)(%r15) brasl %r14,prepare_ftrace_return - stg %r2,(STACK_PTREGS_GPRS+14*8)(%r15) + stg %r2,(STACK_FREGS_PTREGS_GPRS+14*8)(%r15) .Lftrace_graph_caller_end: #endif - lg %r0,(STACK_PTREGS_PSW+8)(%r15) + lg %r0,(STACK_FREGS_PTREGS_PSW+8)(%r15) #ifdef CONFIG_HAVE_MARCH_Z196_FEATURES - ltg %r1,STACK_PTREGS_ORIG_GPR2(%r15) + ltg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15) locgrz %r1,%r0 #else - lg %r1,STACK_PTREGS_ORIG_GPR2(%r15) + lg %r1,STACK_FREGS_PTREGS_ORIG_GPR2(%r15) ltgr %r1,%r1 jnz 0f lgr %r1,%r0 #endif -0: lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) +0: lmg %r2,%r15,(STACK_FREGS_PTREGS_GPRS+2*8)(%r15) BR_EX %r1 SYM_CODE_END(ftrace_common) @@ -128,10 +133,14 @@ SYM_CODE_END(ftrace_common) SYM_FUNC_START(return_to_handler) stmg %r2,%r5,32(%r15) lgr %r1,%r15 - aghi %r15,-STACK_FRAME_OVERHEAD + aghi %r15,-(STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE) stg %r1,__SF_BACKCHAIN(%r15) + la %r3,STACK_FRAME_OVERHEAD(%r15) + stg %r1,__FGRAPH_RET_FP(%r3) + stg %r2,__FGRAPH_RET_GPR2(%r3) + lgr %r2,%r3 brasl %r14,ftrace_return_to_handler - aghi %r15,STACK_FRAME_OVERHEAD + aghi %r15,STACK_FRAME_OVERHEAD+__FGRAPH_RET_SIZE lgr %r14,%r2 lmg %r2,%r5,32(%r15) BR_EX %r14 @@ -160,11 +169,11 @@ SYM_CODE_END(ftrace_shared_hotpatch_trampoline_exrl) SYM_CODE_START(arch_rethook_trampoline) stg %r14,(__SF_GPRS+8*8)(%r15) - lay %r15,-STACK_FRAME_SIZE(%r15) + lay %r15,-STACK_FRAME_SIZE_PTREGS(%r15) stmg %r0,%r14,STACK_PTREGS_GPRS(%r15) # store original stack pointer in backchain and pt_regs - lay %r7,STACK_FRAME_SIZE(%r15) + lay %r7,STACK_FRAME_SIZE_PTREGS(%r15) stg %r7,__SF_BACKCHAIN(%r15) stg %r7,STACK_PTREGS_GPRS+(15*8)(%r15) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 00d76448319d..c744104e4a9c 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -146,6 +146,7 @@ static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31; static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31; int __bootdata(noexec_disabled); +unsigned long __bootdata_preserved(max_mappable); unsigned long __bootdata(ident_map_size); struct physmem_info __bootdata(physmem_info); @@ -874,7 +875,7 @@ static void __init log_component_list(void) pr_info("Linux is running with Secure-IPL enabled\n"); else pr_info("Linux is running with Secure-IPL disabled\n"); - ptr = (void *) early_ipl_comp_list_addr; + ptr = __va(early_ipl_comp_list_addr); end = (void *) ptr + early_ipl_comp_list_size; pr_info("The IPL report contains the following components:\n"); while (ptr < end) { diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index f9a2b755f510..a4edb7ea66ea 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -37,6 +37,7 @@ #include <linux/crash_dump.h> #include <linux/kprobes.h> #include <asm/asm-offsets.h> +#include <asm/pfault.h> #include <asm/diag.h> #include <asm/switch_to.h> #include <asm/facility.h> @@ -252,8 +253,9 @@ static void pcpu_free_lowcore(struct pcpu *pcpu) static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) { - struct lowcore *lc = lowcore_ptr[cpu]; + struct lowcore *lc, *abs_lc; + lc = lowcore_ptr[cpu]; cpumask_set_cpu(cpu, &init_mm.context.cpu_attach_mask); cpumask_set_cpu(cpu, mm_cpumask(&init_mm)); lc->cpu_nr = cpu; @@ -266,7 +268,9 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->machine_flags = S390_lowcore.machine_flags; lc->user_timer = lc->system_timer = lc->steal_timer = lc->avg_steal_timer = 0; - __ctl_store(lc->cregs_save_area, 0, 15); + abs_lc = get_abs_lowcore(); + memcpy(lc->cregs_save_area, abs_lc->cregs_save_area, sizeof(lc->cregs_save_area)); + put_abs_lowcore(abs_lc); lc->cregs_save_area[1] = lc->kernel_asce; lc->cregs_save_area[7] = lc->user_asce; save_access_regs((unsigned int *) lc->access_regs_save_area); @@ -606,8 +610,8 @@ void smp_ctl_set_clear_bit(int cr, int bit, bool set) ctlreg = (ctlreg & parms.andval) | parms.orval; abs_lc->cregs_save_area[cr] = ctlreg; put_abs_lowcore(abs_lc); - spin_unlock(&ctl_lock); on_each_cpu(smp_ctl_bit_callback, &parms, 1); + spin_unlock(&ctl_lock); } EXPORT_SYMBOL(smp_ctl_set_clear_bit); @@ -927,12 +931,18 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) rc = pcpu_alloc_lowcore(pcpu, cpu); if (rc) return rc; + /* + * Make sure global control register contents do not change + * until new CPU has initialized control registers. + */ + spin_lock(&ctl_lock); pcpu_prepare_secondary(pcpu, cpu); pcpu_attach_task(pcpu, tidle); pcpu_start_fn(pcpu, smp_start_secondary, NULL); /* Wait until cpu puts itself in the online & active maps */ while (!cpu_online(cpu)) cpu_relax(); + spin_unlock(&ctl_lock); return 0; } diff --git a/arch/s390/kernel/sthyi.c b/arch/s390/kernel/sthyi.c index 4d141e2c132e..30bb20461db4 100644 --- a/arch/s390/kernel/sthyi.c +++ b/arch/s390/kernel/sthyi.c @@ -317,7 +317,9 @@ static void fill_diag(struct sthyi_sctns *sctns) if (pages <= 0) return; - diag204_buf = vmalloc(array_size(pages, PAGE_SIZE)); + diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE), + PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE, + __builtin_return_address(0)); if (!diag204_buf) return; @@ -459,9 +461,9 @@ static int sthyi_update_cache(u64 *rc) * * Fills the destination with system information returned by the STHYI * instruction. The data is generated by emulation or execution of STHYI, - * if available. The return value is the condition code that would be - * returned, the rc parameter is the return code which is passed in - * register R2 + 1. + * if available. The return value is either a negative error value or + * the condition code that would be returned, the rc parameter is the + * return code which is passed in register R2 + 1. */ int sthyi_fill(void *dst, u64 *rc) { diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index a6935af2235c..0122cc156952 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -454,3 +454,4 @@ 449 common futex_waitv sys_futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c index 66f0eb1c872b..b771f1b4cdd1 100644 --- a/arch/s390/kernel/uv.c +++ b/arch/s390/kernel/uv.c @@ -88,7 +88,7 @@ fail: * Requests the Ultravisor to pin the page in the shared state. This will * cause an intercept when the guest attempts to unshare the pinned page. */ -static int uv_pin_shared(unsigned long paddr) +int uv_pin_shared(unsigned long paddr) { struct uv_cb_cfs uvcb = { .header.cmd = UVC_CMD_PIN_PAGE_SHARED, @@ -100,6 +100,7 @@ static int uv_pin_shared(unsigned long paddr) return -EINVAL; return 0; } +EXPORT_SYMBOL_GPL(uv_pin_shared); /* * Requests the Ultravisor to destroy a guest page and make it diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 954d39adf85c..341abafb96e4 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -389,8 +389,8 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) */ int handle_sthyi(struct kvm_vcpu *vcpu) { - int reg1, reg2, r = 0; - u64 code, addr, cc = 0, rc = 0; + int reg1, reg2, cc = 0, r = 0; + u64 code, addr, rc = 0; struct sthyi_sctns *sctns = NULL; if (!test_kvm_facility(vcpu->kvm, 74)) @@ -421,7 +421,10 @@ int handle_sthyi(struct kvm_vcpu *vcpu) return -ENOMEM; cc = sthyi_fill(sctns, &rc); - + if (cc < 0) { + free_page((unsigned long)sctns); + return cc; + } out: if (!cc) { if (kvm_s390_pv_cpu_is_protected(vcpu)) { diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 0261d42c7d01..a7ea80cfa445 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -270,18 +270,6 @@ static inline u64 kvm_s390_pv_cpu_get_handle(struct kvm_vcpu *vcpu) return vcpu->arch.pv.handle; } -static inline bool kvm_s390_pv_is_protected(struct kvm *kvm) -{ - lockdep_assert_held(&kvm->lock); - return !!kvm_s390_pv_get_handle(kvm); -} - -static inline bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) -{ - lockdep_assert_held(&vcpu->mutex); - return !!kvm_s390_pv_cpu_get_handle(vcpu); -} - /* implemented in interrupt.c */ int kvm_s390_handle_wait(struct kvm_vcpu *vcpu); void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index bf1fdc7bf89e..8d3f39a8a11e 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -18,6 +18,20 @@ #include <linux/mmu_notifier.h> #include "kvm-s390.h" +bool kvm_s390_pv_is_protected(struct kvm *kvm) +{ + lockdep_assert_held(&kvm->lock); + return !!kvm_s390_pv_get_handle(kvm); +} +EXPORT_SYMBOL_GPL(kvm_s390_pv_is_protected); + +bool kvm_s390_pv_cpu_is_protected(struct kvm_vcpu *vcpu) +{ + lockdep_assert_held(&vcpu->mutex); + return !!kvm_s390_pv_cpu_get_handle(vcpu); +} +EXPORT_SYMBOL_GPL(kvm_s390_pv_cpu_is_protected); + /** * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to * be destroyed diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 5a9a55de2e10..08f60a42b9a6 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -5,8 +5,8 @@ * Copyright IBM Corp. 2012 */ +#include <linux/export.h> #include <linux/linkage.h> -#include <asm/export.h> #include <asm/nospec-insn.h> GEN_BR_THUNK %r14 diff --git a/arch/s390/lib/tishift.S b/arch/s390/lib/tishift.S index de33cf02cfd2..96214f51f49b 100644 --- a/arch/s390/lib/tishift.S +++ b/arch/s390/lib/tishift.S @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/export.h> #include <linux/linkage.h> #include <asm/nospec-insn.h> -#include <asm/export.h> .section .noinstr.text, "ax" diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile index d90db06a8af5..352ff520fd94 100644 --- a/arch/s390/mm/Makefile +++ b/arch/s390/mm/Makefile @@ -10,3 +10,4 @@ obj-$(CONFIG_CMM) += cmm.o obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o obj-$(CONFIG_PTDUMP_CORE) += dump_pagetables.o obj-$(CONFIG_PGSTE) += gmap.o +obj-$(CONFIG_PFAULT) += pfault.o diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index 5300c6867d5e..f47515313226 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -90,7 +90,7 @@ static long cmm_alloc_pages(long nr, long *counter, } else free_page((unsigned long) npa); } - diag10_range(virt_to_pfn(addr), 1); + diag10_range(virt_to_pfn((void *)addr), 1); pa->pages[pa->index++] = addr; (*counter)++; spin_unlock(&cmm_lock); diff --git a/arch/s390/mm/dump_pagetables.c b/arch/s390/mm/dump_pagetables.c index ba5f80268878..afa5db750d92 100644 --- a/arch/s390/mm/dump_pagetables.c +++ b/arch/s390/mm/dump_pagetables.c @@ -297,7 +297,7 @@ static int pt_dump_init(void) address_markers[ABS_LOWCORE_NR].start_address = __abs_lowcore; address_markers[ABS_LOWCORE_END_NR].start_address = __abs_lowcore + ABS_LOWCORE_MAP_SIZE; address_markers[MEMCPY_REAL_NR].start_address = __memcpy_real_area; - address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + PAGE_SIZE; + address_markers[MEMCPY_REAL_END_NR].start_address = __memcpy_real_area + MEMCPY_REAL_SIZE; address_markers[VMEMMAP_NR].start_address = (unsigned long) vmemmap; address_markers[VMEMMAP_END_NR].start_address = (unsigned long)vmemmap + vmemmap_size; address_markers[VMALLOC_NR].start_address = VMALLOC_START; diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c index 1bc42ce26599..e41869f5cc95 100644 --- a/arch/s390/mm/extmem.c +++ b/arch/s390/mm/extmem.c @@ -640,10 +640,13 @@ void segment_warning(int rc, char *seg_name) pr_err("There is not enough memory to load or query " "DCSS %s\n", seg_name); break; - case -ERANGE: - pr_err("DCSS %s exceeds the kernel mapping range (%lu) " - "and cannot be loaded\n", seg_name, VMEM_MAX_PHYS); + case -ERANGE: { + struct range mhp_range = arch_get_mappable_range(); + + pr_err("DCSS %s exceeds the kernel mapping range (%llu) " + "and cannot be loaded\n", seg_name, mhp_range.end + 1); break; + } default: break; } diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index a063774ba584..099c4824dd8a 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -43,8 +43,6 @@ #include "../kernel/entry.h" #define __FAIL_ADDR_MASK -4096L -#define __SUBCODE_MASK 0x0600 -#define __PF_RES_FIELD 0x8000000000000000ULL /* * Allocate private vm_fault_reason from top. Please make sure it won't @@ -582,232 +580,6 @@ void do_dat_exception(struct pt_regs *regs) } NOKPROBE_SYMBOL(do_dat_exception); -#ifdef CONFIG_PFAULT -/* - * 'pfault' pseudo page faults routines. - */ -static int pfault_disable; - -static int __init nopfault(char *str) -{ - pfault_disable = 1; - return 1; -} - -__setup("nopfault", nopfault); - -struct pfault_refbk { - u16 refdiagc; - u16 reffcode; - u16 refdwlen; - u16 refversn; - u64 refgaddr; - u64 refselmk; - u64 refcmpmk; - u64 reserved; -} __attribute__ ((packed, aligned(8))); - -static struct pfault_refbk pfault_init_refbk = { - .refdiagc = 0x258, - .reffcode = 0, - .refdwlen = 5, - .refversn = 2, - .refgaddr = __LC_LPP, - .refselmk = 1ULL << 48, - .refcmpmk = 1ULL << 48, - .reserved = __PF_RES_FIELD -}; - -int pfault_init(void) -{ - int rc; - - if (pfault_disable) - return -1; - diag_stat_inc(DIAG_STAT_X258); - asm volatile( - " diag %1,%0,0x258\n" - "0: j 2f\n" - "1: la %0,8\n" - "2:\n" - EX_TABLE(0b,1b) - : "=d" (rc) - : "a" (&pfault_init_refbk), "m" (pfault_init_refbk) : "cc"); - return rc; -} - -static struct pfault_refbk pfault_fini_refbk = { - .refdiagc = 0x258, - .reffcode = 1, - .refdwlen = 5, - .refversn = 2, -}; - -void pfault_fini(void) -{ - - if (pfault_disable) - return; - diag_stat_inc(DIAG_STAT_X258); - asm volatile( - " diag %0,0,0x258\n" - "0: nopr %%r7\n" - EX_TABLE(0b,0b) - : : "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) : "cc"); -} - -static DEFINE_SPINLOCK(pfault_lock); -static LIST_HEAD(pfault_list); - -#define PF_COMPLETE 0x0080 - -/* - * The mechanism of our pfault code: if Linux is running as guest, runs a user - * space process and the user space process accesses a page that the host has - * paged out we get a pfault interrupt. - * - * This allows us, within the guest, to schedule a different process. Without - * this mechanism the host would have to suspend the whole virtual cpu until - * the page has been paged in. - * - * So when we get such an interrupt then we set the state of the current task - * to uninterruptible and also set the need_resched flag. Both happens within - * interrupt context(!). If we later on want to return to user space we - * recognize the need_resched flag and then call schedule(). It's not very - * obvious how this works... - * - * Of course we have a lot of additional fun with the completion interrupt (-> - * host signals that a page of a process has been paged in and the process can - * continue to run). This interrupt can arrive on any cpu and, since we have - * virtual cpus, actually appear before the interrupt that signals that a page - * is missing. - */ -static void pfault_interrupt(struct ext_code ext_code, - unsigned int param32, unsigned long param64) -{ - struct task_struct *tsk; - __u16 subcode; - pid_t pid; - - /* - * Get the external interruption subcode & pfault initial/completion - * signal bit. VM stores this in the 'cpu address' field associated - * with the external interrupt. - */ - subcode = ext_code.subcode; - if ((subcode & 0xff00) != __SUBCODE_MASK) - return; - inc_irq_stat(IRQEXT_PFL); - /* Get the token (= pid of the affected task). */ - pid = param64 & LPP_PID_MASK; - rcu_read_lock(); - tsk = find_task_by_pid_ns(pid, &init_pid_ns); - if (tsk) - get_task_struct(tsk); - rcu_read_unlock(); - if (!tsk) - return; - spin_lock(&pfault_lock); - if (subcode & PF_COMPLETE) { - /* signal bit is set -> a page has been swapped in by VM */ - if (tsk->thread.pfault_wait == 1) { - /* Initial interrupt was faster than the completion - * interrupt. pfault_wait is valid. Set pfault_wait - * back to zero and wake up the process. This can - * safely be done because the task is still sleeping - * and can't produce new pfaults. */ - tsk->thread.pfault_wait = 0; - list_del(&tsk->thread.list); - wake_up_process(tsk); - put_task_struct(tsk); - } else { - /* Completion interrupt was faster than initial - * interrupt. Set pfault_wait to -1 so the initial - * interrupt doesn't put the task to sleep. - * If the task is not running, ignore the completion - * interrupt since it must be a leftover of a PFAULT - * CANCEL operation which didn't remove all pending - * completion interrupts. */ - if (task_is_running(tsk)) - tsk->thread.pfault_wait = -1; - } - } else { - /* signal bit not set -> a real page is missing. */ - if (WARN_ON_ONCE(tsk != current)) - goto out; - if (tsk->thread.pfault_wait == 1) { - /* Already on the list with a reference: put to sleep */ - goto block; - } else if (tsk->thread.pfault_wait == -1) { - /* Completion interrupt was faster than the initial - * interrupt (pfault_wait == -1). Set pfault_wait - * back to zero and exit. */ - tsk->thread.pfault_wait = 0; - } else { - /* Initial interrupt arrived before completion - * interrupt. Let the task sleep. - * An extra task reference is needed since a different - * cpu may set the task state to TASK_RUNNING again - * before the scheduler is reached. */ - get_task_struct(tsk); - tsk->thread.pfault_wait = 1; - list_add(&tsk->thread.list, &pfault_list); -block: - /* Since this must be a userspace fault, there - * is no kernel task state to trample. Rely on the - * return to userspace schedule() to block. */ - __set_current_state(TASK_UNINTERRUPTIBLE); - set_tsk_need_resched(tsk); - set_preempt_need_resched(); - } - } -out: - spin_unlock(&pfault_lock); - put_task_struct(tsk); -} - -static int pfault_cpu_dead(unsigned int cpu) -{ - struct thread_struct *thread, *next; - struct task_struct *tsk; - - spin_lock_irq(&pfault_lock); - list_for_each_entry_safe(thread, next, &pfault_list, list) { - thread->pfault_wait = 0; - list_del(&thread->list); - tsk = container_of(thread, struct task_struct, thread); - wake_up_process(tsk); - put_task_struct(tsk); - } - spin_unlock_irq(&pfault_lock); - return 0; -} - -static int __init pfault_irq_init(void) -{ - int rc; - - rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); - if (rc) - goto out_extint; - rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; - if (rc) - goto out_pfault; - irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL); - cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead", - NULL, pfault_cpu_dead); - return 0; - -out_pfault: - unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); -out_extint: - pfault_disable = 1; - return rc; -} -early_initcall(pfault_irq_init); - -#endif /* CONFIG_PFAULT */ - #if IS_ENABLED(CONFIG_PGSTE) void do_secure_storage_access(struct pt_regs *regs) diff --git a/arch/s390/mm/maccess.c b/arch/s390/mm/maccess.c index cbe1df1e9c18..c805b3e2592b 100644 --- a/arch/s390/mm/maccess.c +++ b/arch/s390/mm/maccess.c @@ -86,11 +86,12 @@ size_t memcpy_real_iter(struct iov_iter *iter, unsigned long src, size_t count) void *chunk; pte_t pte; + BUILD_BUG_ON(MEMCPY_REAL_SIZE != PAGE_SIZE); while (count) { - phys = src & PAGE_MASK; - offset = src & ~PAGE_MASK; + phys = src & MEMCPY_REAL_MASK; + offset = src & ~MEMCPY_REAL_MASK; chunk = (void *)(__memcpy_real_area + offset); - len = min(count, PAGE_SIZE - offset); + len = min(count, MEMCPY_REAL_SIZE - offset); pte = mk_pte_phys(phys, PAGE_KERNEL_RO); mutex_lock(&memcpy_real_mutex); diff --git a/arch/s390/mm/pfault.c b/arch/s390/mm/pfault.c new file mode 100644 index 000000000000..1aac13bb8f53 --- /dev/null +++ b/arch/s390/mm/pfault.c @@ -0,0 +1,248 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 1999, 2023 + */ + +#include <linux/cpuhotplug.h> +#include <linux/sched/task.h> +#include <linux/errno.h> +#include <linux/init.h> +#include <linux/irq.h> +#include <asm/asm-extable.h> +#include <asm/pfault.h> +#include <asm/diag.h> + +#define __SUBCODE_MASK 0x0600 +#define __PF_RES_FIELD 0x8000000000000000UL + +/* + * 'pfault' pseudo page faults routines. + */ +static int pfault_disable; + +static int __init nopfault(char *str) +{ + pfault_disable = 1; + return 1; +} +early_param("nopfault", nopfault); + +struct pfault_refbk { + u16 refdiagc; + u16 reffcode; + u16 refdwlen; + u16 refversn; + u64 refgaddr; + u64 refselmk; + u64 refcmpmk; + u64 reserved; +}; + +static struct pfault_refbk pfault_init_refbk = { + .refdiagc = 0x258, + .reffcode = 0, + .refdwlen = 5, + .refversn = 2, + .refgaddr = __LC_LPP, + .refselmk = 1UL << 48, + .refcmpmk = 1UL << 48, + .reserved = __PF_RES_FIELD +}; + +int __pfault_init(void) +{ + int rc = -EOPNOTSUPP; + + if (pfault_disable) + return rc; + diag_stat_inc(DIAG_STAT_X258); + asm volatile( + " diag %[refbk],%[rc],0x258\n" + "0: nopr %%r7\n" + EX_TABLE(0b, 0b) + : [rc] "+d" (rc) + : [refbk] "a" (&pfault_init_refbk), "m" (pfault_init_refbk) + : "cc"); + return rc; +} + +static struct pfault_refbk pfault_fini_refbk = { + .refdiagc = 0x258, + .reffcode = 1, + .refdwlen = 5, + .refversn = 2, +}; + +void __pfault_fini(void) +{ + if (pfault_disable) + return; + diag_stat_inc(DIAG_STAT_X258); + asm volatile( + " diag %[refbk],0,0x258\n" + "0: nopr %%r7\n" + EX_TABLE(0b, 0b) + : + : [refbk] "a" (&pfault_fini_refbk), "m" (pfault_fini_refbk) + : "cc"); +} + +static DEFINE_SPINLOCK(pfault_lock); +static LIST_HEAD(pfault_list); + +#define PF_COMPLETE 0x0080 + +/* + * The mechanism of our pfault code: if Linux is running as guest, runs a user + * space process and the user space process accesses a page that the host has + * paged out we get a pfault interrupt. + * + * This allows us, within the guest, to schedule a different process. Without + * this mechanism the host would have to suspend the whole virtual cpu until + * the page has been paged in. + * + * So when we get such an interrupt then we set the state of the current task + * to uninterruptible and also set the need_resched flag. Both happens within + * interrupt context(!). If we later on want to return to user space we + * recognize the need_resched flag and then call schedule(). It's not very + * obvious how this works... + * + * Of course we have a lot of additional fun with the completion interrupt (-> + * host signals that a page of a process has been paged in and the process can + * continue to run). This interrupt can arrive on any cpu and, since we have + * virtual cpus, actually appear before the interrupt that signals that a page + * is missing. + */ +static void pfault_interrupt(struct ext_code ext_code, + unsigned int param32, unsigned long param64) +{ + struct task_struct *tsk; + __u16 subcode; + pid_t pid; + + /* + * Get the external interruption subcode & pfault initial/completion + * signal bit. VM stores this in the 'cpu address' field associated + * with the external interrupt. + */ + subcode = ext_code.subcode; + if ((subcode & 0xff00) != __SUBCODE_MASK) + return; + inc_irq_stat(IRQEXT_PFL); + /* Get the token (= pid of the affected task). */ + pid = param64 & LPP_PID_MASK; + rcu_read_lock(); + tsk = find_task_by_pid_ns(pid, &init_pid_ns); + if (tsk) + get_task_struct(tsk); + rcu_read_unlock(); + if (!tsk) + return; + spin_lock(&pfault_lock); + if (subcode & PF_COMPLETE) { + /* signal bit is set -> a page has been swapped in by VM */ + if (tsk->thread.pfault_wait == 1) { + /* + * Initial interrupt was faster than the completion + * interrupt. pfault_wait is valid. Set pfault_wait + * back to zero and wake up the process. This can + * safely be done because the task is still sleeping + * and can't produce new pfaults. + */ + tsk->thread.pfault_wait = 0; + list_del(&tsk->thread.list); + wake_up_process(tsk); + put_task_struct(tsk); + } else { + /* + * Completion interrupt was faster than initial + * interrupt. Set pfault_wait to -1 so the initial + * interrupt doesn't put the task to sleep. + * If the task is not running, ignore the completion + * interrupt since it must be a leftover of a PFAULT + * CANCEL operation which didn't remove all pending + * completion interrupts. + */ + if (task_is_running(tsk)) + tsk->thread.pfault_wait = -1; + } + } else { + /* signal bit not set -> a real page is missing. */ + if (WARN_ON_ONCE(tsk != current)) + goto out; + if (tsk->thread.pfault_wait == 1) { + /* Already on the list with a reference: put to sleep */ + goto block; + } else if (tsk->thread.pfault_wait == -1) { + /* + * Completion interrupt was faster than the initial + * interrupt (pfault_wait == -1). Set pfault_wait + * back to zero and exit. + */ + tsk->thread.pfault_wait = 0; + } else { + /* + * Initial interrupt arrived before completion + * interrupt. Let the task sleep. + * An extra task reference is needed since a different + * cpu may set the task state to TASK_RUNNING again + * before the scheduler is reached. + */ + get_task_struct(tsk); + tsk->thread.pfault_wait = 1; + list_add(&tsk->thread.list, &pfault_list); +block: + /* + * Since this must be a userspace fault, there + * is no kernel task state to trample. Rely on the + * return to userspace schedule() to block. + */ + __set_current_state(TASK_UNINTERRUPTIBLE); + set_tsk_need_resched(tsk); + set_preempt_need_resched(); + } + } +out: + spin_unlock(&pfault_lock); + put_task_struct(tsk); +} + +static int pfault_cpu_dead(unsigned int cpu) +{ + struct thread_struct *thread, *next; + struct task_struct *tsk; + + spin_lock_irq(&pfault_lock); + list_for_each_entry_safe(thread, next, &pfault_list, list) { + thread->pfault_wait = 0; + list_del(&thread->list); + tsk = container_of(thread, struct task_struct, thread); + wake_up_process(tsk); + put_task_struct(tsk); + } + spin_unlock_irq(&pfault_lock); + return 0; +} + +static int __init pfault_irq_init(void) +{ + int rc; + + rc = register_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); + if (rc) + goto out_extint; + rc = pfault_init() == 0 ? 0 : -EOPNOTSUPP; + if (rc) + goto out_pfault; + irq_subclass_register(IRQ_SUBCLASS_SERVICE_SIGNAL); + cpuhp_setup_state_nocalls(CPUHP_S390_PFAULT_DEAD, "s390/pfault:dead", + NULL, pfault_cpu_dead); + return 0; + +out_pfault: + unregister_external_irq(EXT_IRQ_CP_SERVICE, pfault_interrupt); +out_extint: + pfault_disable = 1; + return rc; +} +early_initcall(pfault_irq_init); diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index b26649233d12..e44243b9c0a4 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -36,7 +36,7 @@ static void vmem_free_pages(unsigned long addr, int order) { /* We don't expect boot memory to be removed ever. */ if (!slab_is_available() || - WARN_ON_ONCE(PageReserved(virt_to_page(addr)))) + WARN_ON_ONCE(PageReserved(virt_to_page((void *)addr)))) return; free_pages(addr, order); } @@ -531,7 +531,7 @@ struct range arch_get_mappable_range(void) struct range mhp_range; mhp_range.start = 0; - mhp_range.end = VMEM_MAX_PHYS - 1; + mhp_range.end = max_mappable - 1; return mhp_range; } @@ -763,6 +763,8 @@ void __init vmem_map_init(void) if (static_key_enabled(&cpu_has_bear)) set_memory_nx(0, 1); set_memory_nx(PAGE_SIZE, 1); + if (debug_pagealloc_enabled()) + set_memory_4k(0, ident_map_size >> PAGE_SHIFT); pr_info("Write protected kernel read-only data: %luk\n", (unsigned long)(__end_rodata - _stext) >> 10); diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index ee367798e388..ee90a91ed888 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -666,9 +666,4 @@ static struct miscdevice clp_misc_device = { .fops = &clp_misc_fops, }; -static int __init clp_misc_init(void) -{ - return misc_register(&clp_misc_device); -} - -device_initcall(clp_misc_init); +builtin_misc_device(clp_misc_device); diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 97377e8c5025..e90d585c4d3e 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -454,3 +454,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 diff --git a/arch/sparc/include/asm/processor_64.h b/arch/sparc/include/asm/processor_64.h index 2667f35d5ea5..0a0d5c3d184c 100644 --- a/arch/sparc/include/asm/processor_64.h +++ b/arch/sparc/include/asm/processor_64.h @@ -213,7 +213,6 @@ unsigned long __get_wchan(struct task_struct *task); */ #define ARCH_HAS_PREFETCH #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH static inline void prefetch(const void *x) { @@ -239,8 +238,6 @@ static inline void prefetchw(const void *x) : "r" (x)); } -#define spin_lock_prefetch(x) prefetchw(x) - #define HAVE_ARCH_PICK_MMAP_LAYOUT int do_mathemu(struct pt_regs *regs, struct fpustate *f, bool illegal_insn_trap); diff --git a/arch/sparc/include/uapi/asm/openpromio.h b/arch/sparc/include/uapi/asm/openpromio.h index d4494b679e99..2a73ec77aba6 100644 --- a/arch/sparc/include/uapi/asm/openpromio.h +++ b/arch/sparc/include/uapi/asm/openpromio.h @@ -10,10 +10,9 @@ * were chosen to be exactly equal to the SunOS equivalents. */ -struct openpromio -{ +struct openpromio { unsigned int oprom_size; /* Actual size of the oprom_array. */ - char oprom_array[1]; /* Holds property names and values. */ + char oprom_array[]; /* Holds property names and values. */ }; #define OPROMMAXPARAM 4096 /* Maximum size of oprom_array. */ diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index faa835f3c54a..4ed06c71c43f 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -497,3 +497,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 5026e7b9adfe..ff4bda95b9c7 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -554,7 +554,7 @@ struct mconsole_output { static DEFINE_SPINLOCK(client_lock); static LIST_HEAD(clients); -static char console_buf[MCONSOLE_MAX_DATA]; +static char console_buf[MCONSOLE_MAX_DATA] __nonstring; static void console_write(struct console *console, const char *string, unsigned int len) @@ -567,7 +567,7 @@ static void console_write(struct console *console, const char *string, while (len > 0) { n = min((size_t) len, ARRAY_SIZE(console_buf)); - strncpy(console_buf, string, n); + memcpy(console_buf, string, n); string += n; len -= n; diff --git a/arch/um/drivers/vector_user.c b/arch/um/drivers/vector_user.c index c650e428432b..c719e1ec4645 100644 --- a/arch/um/drivers/vector_user.c +++ b/arch/um/drivers/vector_user.c @@ -141,7 +141,7 @@ static int create_tap_fd(char *iface) } memset(&ifr, 0, sizeof(ifr)); ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR; - strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); + strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)); err = ioctl(fd, TUNSETIFF, (void *) &ifr); if (err != 0) { @@ -171,7 +171,7 @@ static int create_raw_fd(char *iface, int flags, int proto) goto raw_fd_cleanup; } memset(&ifr, 0, sizeof(ifr)); - strncpy((char *)&ifr.ifr_name, iface, sizeof(ifr.ifr_name) - 1); + strscpy(ifr.ifr_name, iface, sizeof(ifr.ifr_name)); if (ioctl(fd, SIOCGIFINDEX, (void *) &ifr) < 0) { err = -errno; goto raw_fd_cleanup; diff --git a/arch/um/include/shared/user.h b/arch/um/include/shared/user.h index 0347a190429c..981e11d8e025 100644 --- a/arch/um/include/shared/user.h +++ b/arch/um/include/shared/user.h @@ -50,7 +50,6 @@ static inline int printk(const char *fmt, ...) #endif extern int in_aton(char *str); -extern size_t strlcpy(char *, const char *, size_t); extern size_t strlcat(char *, const char *, size_t); extern size_t strscpy(char *, const char *, size_t); diff --git a/arch/um/os-Linux/umid.c b/arch/um/os-Linux/umid.c index 7a1abb829930..288c422bfa96 100644 --- a/arch/um/os-Linux/umid.c +++ b/arch/um/os-Linux/umid.c @@ -40,7 +40,7 @@ static int __init make_uml_dir(void) __func__); goto err; } - strlcpy(dir, home, sizeof(dir)); + strscpy(dir, home, sizeof(dir)); uml_dir++; } strlcat(dir, uml_dir, sizeof(dir)); @@ -243,7 +243,7 @@ int __init set_umid(char *name) if (strlen(name) > UMID_LEN - 1) return -E2BIG; - strlcpy(umid, name, sizeof(umid)); + strscpy(umid, name, sizeof(umid)); return 0; } @@ -262,7 +262,7 @@ static int __init make_umid(void) make_uml_dir(); if (*umid == '\0') { - strlcpy(tmp, uml_dir, sizeof(tmp)); + strscpy(tmp, uml_dir, sizeof(tmp)); strlcat(tmp, "XXXXXX", sizeof(tmp)); fd = mkstemp(tmp); if (fd < 0) { diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d0258e92a8af..ad3a53e28aac 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1310,44 +1310,8 @@ config X86_REBOOTFIXUPS Say N otherwise. config MICROCODE - bool "CPU microcode loading support" - default y + def_bool y depends on CPU_SUP_AMD || CPU_SUP_INTEL - help - If you say Y here, you will be able to update the microcode on - Intel and AMD processors. The Intel support is for the IA32 family, - e.g. Pentium Pro, Pentium II, Pentium III, Pentium 4, Xeon etc. The - AMD support is for families 0x10 and later. You will obviously need - the actual microcode binary data itself which is not shipped with - the Linux kernel. - - The preferred method to load microcode from a detached initrd is described - in Documentation/arch/x86/microcode.rst. For that you need to enable - CONFIG_BLK_DEV_INITRD in order for the loader to be able to scan the - initrd for microcode blobs. - - In addition, you can build the microcode into the kernel. For that you - need to add the vendor-supplied microcode to the CONFIG_EXTRA_FIRMWARE - config option. - -config MICROCODE_INTEL - bool "Intel microcode loading support" - depends on CPU_SUP_INTEL && MICROCODE - default MICROCODE - help - This options enables microcode patch loading support for Intel - processors. - - For the current Intel microcode data package go to - <https://downloadcenter.intel.com> and search for - 'Linux Processor Microcode Data File'. - -config MICROCODE_AMD - bool "AMD microcode loading support" - depends on CPU_SUP_AMD && MICROCODE - help - If you select this option, microcode patch loading support for AMD - processors will be enabled. config MICROCODE_LATE_LOADING bool "Late microcode loading (DANGEROUS)" @@ -2595,6 +2559,13 @@ config CPU_IBRS_ENTRY This mitigates both spectre_v2 and retbleed at great cost to performance. +config CPU_SRSO + bool "Mitigate speculative RAS overflow on AMD" + depends on CPU_SUP_AMD && X86_64 && RETHUNK + default y + help + Enable the SRSO mitigation needed on AMD Zen1-4 machines. + config SLS bool "Mitigate Straight-Line-Speculation" depends on CC_HAS_SLS && X86_64 @@ -2605,6 +2576,25 @@ config SLS against straight line speculation. The kernel image might be slightly larger. +config GDS_FORCE_MITIGATION + bool "Force GDS Mitigation" + depends on CPU_SUP_INTEL + default n + help + Gather Data Sampling (GDS) is a hardware vulnerability which allows + unprivileged speculative access to data which was previously stored in + vector registers. + + This option is equivalent to setting gather_data_sampling=force on the + command line. The microcode mitigation is used if present, otherwise + AVX is disabled as a mitigation. On affected systems that are missing + the microcode any userspace code that unconditionally uses AVX will + break with this option set. + + Setting this option on systems not vulnerable to GDS has no effect. + + If in doubt, say N. + endif config ARCH_HAS_ADD_PAGES diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile index 40d2ff503079..71fc531b95b4 100644 --- a/arch/x86/boot/compressed/Makefile +++ b/arch/x86/boot/compressed/Makefile @@ -74,6 +74,11 @@ LDFLAGS_vmlinux += -z noexecstack ifeq ($(CONFIG_LD_IS_BFD),y) LDFLAGS_vmlinux += $(call ld-option,--no-warn-rwx-segments) endif +ifeq ($(CONFIG_EFI_STUB),y) +# ensure that the static EFI stub library will be pulled in, even if it is +# never referenced explicitly from the startup code +LDFLAGS_vmlinux += -u efi_pe_entry +endif LDFLAGS_vmlinux += -T hostprogs := mkpiggy diff --git a/arch/x86/boot/compressed/efi_mixed.S b/arch/x86/boot/compressed/efi_mixed.S index 4ca70bf93dc0..f4e22ef774ab 100644 --- a/arch/x86/boot/compressed/efi_mixed.S +++ b/arch/x86/boot/compressed/efi_mixed.S @@ -26,8 +26,8 @@ * When booting in 64-bit mode on 32-bit EFI firmware, startup_64_mixed_mode() * is the first thing that runs after switching to long mode. Depending on * whether the EFI handover protocol or the compat entry point was used to - * enter the kernel, it will either branch to the 64-bit EFI handover - * entrypoint at offset 0x390 in the image, or to the 64-bit EFI PE/COFF + * enter the kernel, it will either branch to the common 64-bit EFI stub + * entrypoint efi_stub_entry() directly, or via the 64-bit EFI PE/COFF * entrypoint efi_pe_entry(). In the former case, the bootloader must provide a * struct bootparams pointer as the third argument, so the presence of such a * pointer is used to disambiguate. @@ -37,21 +37,23 @@ * | efi32_pe_entry |---->| | | +-----------+--+ * +------------------+ | | +------+----------------+ | * | startup_32 |---->| startup_64_mixed_mode | | - * +------------------+ | | +------+----------------+ V - * | efi32_stub_entry |---->| | | +------------------+ - * +------------------+ +------------+ +---->| efi64_stub_entry | - * +-------------+----+ - * +------------+ +----------+ | - * | startup_64 |<----| efi_main |<--------------+ - * +------------+ +----------+ + * +------------------+ | | +------+----------------+ | + * | efi32_stub_entry |---->| | | | + * +------------------+ +------------+ | | + * V | + * +------------+ +----------------+ | + * | startup_64 |<----| efi_stub_entry |<--------+ + * +------------+ +----------------+ */ SYM_FUNC_START(startup_64_mixed_mode) lea efi32_boot_args(%rip), %rdx mov 0(%rdx), %edi mov 4(%rdx), %esi +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL mov 8(%rdx), %edx // saved bootparams pointer test %edx, %edx - jnz efi64_stub_entry + jnz efi_stub_entry +#endif /* * efi_pe_entry uses MS calling convention, which requires 32 bytes of * shadow space on the stack even if all arguments are passed in @@ -138,6 +140,28 @@ SYM_FUNC_START(__efi64_thunk) SYM_FUNC_END(__efi64_thunk) .code32 +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL +SYM_FUNC_START(efi32_stub_entry) + call 1f +1: popl %ecx + + /* Clear BSS */ + xorl %eax, %eax + leal (_bss - 1b)(%ecx), %edi + leal (_ebss - 1b)(%ecx), %ecx + subl %edi, %ecx + shrl $2, %ecx + cld + rep stosl + + add $0x4, %esp /* Discard return address */ + popl %ecx + popl %edx + popl %esi + jmp efi32_entry +SYM_FUNC_END(efi32_stub_entry) +#endif + /* * EFI service pointer must be in %edi. * @@ -218,7 +242,7 @@ SYM_FUNC_END(efi_enter32) * stub may still exit and return to the firmware using the Exit() EFI boot * service.] */ -SYM_FUNC_START(efi32_entry) +SYM_FUNC_START_LOCAL(efi32_entry) call 1f 1: pop %ebx @@ -245,10 +269,6 @@ SYM_FUNC_START(efi32_entry) jmp startup_32 SYM_FUNC_END(efi32_entry) -#define ST32_boottime 60 // offsetof(efi_system_table_32_t, boottime) -#define BS32_handle_protocol 88 // offsetof(efi_boot_services_32_t, handle_protocol) -#define LI32_image_base 32 // offsetof(efi_loaded_image_32_t, image_base) - /* * efi_status_t efi32_pe_entry(efi_handle_t image_handle, * efi_system_table_32_t *sys_table) @@ -256,8 +276,6 @@ SYM_FUNC_END(efi32_entry) SYM_FUNC_START(efi32_pe_entry) pushl %ebp movl %esp, %ebp - pushl %eax // dummy push to allocate loaded_image - pushl %ebx // save callee-save registers pushl %edi @@ -266,48 +284,8 @@ SYM_FUNC_START(efi32_pe_entry) movl $0x80000003, %eax // EFI_UNSUPPORTED jnz 2f - call 1f -1: pop %ebx - - /* Get the loaded image protocol pointer from the image handle */ - leal -4(%ebp), %eax - pushl %eax // &loaded_image - leal (loaded_image_proto - 1b)(%ebx), %eax - pushl %eax // pass the GUID address - pushl 8(%ebp) // pass the image handle - - /* - * Note the alignment of the stack frame. - * sys_table - * handle <-- 16-byte aligned on entry by ABI - * return address - * frame pointer - * loaded_image <-- local variable - * saved %ebx <-- 16-byte aligned here - * saved %edi - * &loaded_image - * &loaded_image_proto - * handle <-- 16-byte aligned for call to handle_protocol - */ - - movl 12(%ebp), %eax // sys_table - movl ST32_boottime(%eax), %eax // sys_table->boottime - call *BS32_handle_protocol(%eax) // sys_table->boottime->handle_protocol - addl $12, %esp // restore argument space - testl %eax, %eax - jnz 2f - movl 8(%ebp), %ecx // image_handle movl 12(%ebp), %edx // sys_table - movl -4(%ebp), %esi // loaded_image - movl LI32_image_base(%esi), %esi // loaded_image->image_base - leal (startup_32 - 1b)(%ebx), %ebp // runtime address of startup_32 - /* - * We need to set the image_offset variable here since startup_32() will - * use it before we get to the 64-bit efi_pe_entry() in C code. - */ - subl %esi, %ebp // calculate image_offset - movl %ebp, (image_offset - 1b)(%ebx) // save image_offset xorl %esi, %esi jmp efi32_entry // pass %ecx, %edx, %esi // no other registers remain live @@ -318,14 +296,13 @@ SYM_FUNC_START(efi32_pe_entry) RET SYM_FUNC_END(efi32_pe_entry) - .section ".rodata" - /* EFI loaded image protocol GUID */ - .balign 4 -SYM_DATA_START_LOCAL(loaded_image_proto) - .long 0x5b1b31a1 - .word 0x9562, 0x11d2 - .byte 0x8e, 0x3f, 0x00, 0xa0, 0xc9, 0x69, 0x72, 0x3b -SYM_DATA_END(loaded_image_proto) +#ifdef CONFIG_EFI_HANDOVER_PROTOCOL + .org efi32_stub_entry + 0x200 + .code64 +SYM_FUNC_START_NOALIGN(efi64_stub_entry) + jmp efi_handover_entry +SYM_FUNC_END(efi64_stub_entry) +#endif .data .balign 8 diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c index 5313c5cb2b80..19a8251de506 100644 --- a/arch/x86/boot/compressed/error.c +++ b/arch/x86/boot/compressed/error.c @@ -7,7 +7,7 @@ #include "misc.h" #include "error.h" -void warn(char *m) +void warn(const char *m) { error_putstr("\n\n"); error_putstr(m); diff --git a/arch/x86/boot/compressed/error.h b/arch/x86/boot/compressed/error.h index 86fe33b93715..31f9e080d61a 100644 --- a/arch/x86/boot/compressed/error.h +++ b/arch/x86/boot/compressed/error.h @@ -4,7 +4,7 @@ #include <linux/compiler.h> -void warn(char *m); +void warn(const char *m); void error(char *m) __noreturn; void panic(const char *fmt, ...) __noreturn __cold; diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 987ae727cf9f..1cfe9802a42f 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -84,19 +84,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE leal startup_32@GOTOFF(%edx), %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32() will be at an - * offset to the start of the space allocated for the image. efi_pe_entry() will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl image_offset@GOTOFF(%edx), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx @@ -150,17 +137,6 @@ SYM_FUNC_START(startup_32) jmp *%eax SYM_FUNC_END(startup_32) -#ifdef CONFIG_EFI_STUB -SYM_FUNC_START(efi32_stub_entry) - add $0x4, %esp - movl 8(%esp), %esi /* save boot_params pointer */ - call efi_main - /* efi_main returns the possibly relocated address of startup_32 */ - jmp *%eax -SYM_FUNC_END(efi32_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) -#endif - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) @@ -179,13 +155,7 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) */ /* push arguments for extract_kernel: */ - pushl output_len@GOTOFF(%ebx) /* decompressed length, end of relocs */ pushl %ebp /* output address */ - pushl input_len@GOTOFF(%ebx) /* input_len */ - leal input_data@GOTOFF(%ebx), %eax - pushl %eax /* input_data */ - leal boot_heap@GOTOFF(%ebx), %eax - pushl %eax /* heap area */ pushl %esi /* real mode pointer */ call extract_kernel /* returns kernel entry point in %eax */ addl $24, %esp @@ -213,8 +183,6 @@ SYM_DATA_END_LABEL(gdt, SYM_L_LOCAL, gdt_end) */ .bss .balign 4 -boot_heap: - .fill BOOT_HEAP_SIZE, 1, 0 boot_stack: .fill BOOT_STACK_SIZE, 1, 0 boot_stack_end: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 03c4328a88cb..bf4a10a5794f 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -146,19 +146,6 @@ SYM_FUNC_START(startup_32) #ifdef CONFIG_RELOCATABLE movl %ebp, %ebx - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - subl rva(image_offset)(%ebp), %ebx -#endif - movl BP_kernel_alignment(%esi), %eax decl %eax addl %eax, %ebx @@ -294,17 +281,6 @@ SYM_FUNC_START(startup_32) lret SYM_FUNC_END(startup_32) -#if IS_ENABLED(CONFIG_EFI_MIXED) && IS_ENABLED(CONFIG_EFI_HANDOVER_PROTOCOL) - .org 0x190 -SYM_FUNC_START(efi32_stub_entry) - add $0x4, %esp /* Discard return address */ - popl %ecx - popl %edx - popl %esi - jmp efi32_entry -SYM_FUNC_END(efi32_stub_entry) -#endif - .code64 .org 0x200 SYM_CODE_START(startup_64) @@ -346,20 +322,6 @@ SYM_CODE_START(startup_64) /* Start with the delta to where the kernel will run at. */ #ifdef CONFIG_RELOCATABLE leaq startup_32(%rip) /* - $startup_32 */, %rbp - -#ifdef CONFIG_EFI_STUB -/* - * If we were loaded via the EFI LoadImage service, startup_32 will be at an - * offset to the start of the space allocated for the image. efi_pe_entry will - * set up image_offset to tell us where the image actually starts, so that we - * can use the full available buffer. - * image_offset = startup_32 - image_base - * Otherwise image_offset will be zero and has no effect on the calculations. - */ - movl image_offset(%rip), %eax - subq %rax, %rbp -#endif - movl BP_kernel_alignment(%rsi), %eax decl %eax addq %rax, %rbp @@ -398,10 +360,6 @@ SYM_CODE_START(startup_64) * For the trampoline, we need the top page table to reside in lower * memory as we don't have a way to load 64-bit values into CR3 in * 32-bit mode. - * - * We go though the trampoline even if we don't have to: if we're - * already in a desired paging mode. This way the trampoline code gets - * tested on every boot. */ /* Make sure we have GDT with 32-bit code segment */ @@ -416,10 +374,14 @@ SYM_CODE_START(startup_64) lretq .Lon_kernel_cs: + /* + * RSI holds a pointer to a boot_params structure provided by the + * loader, and this needs to be preserved across C function calls. So + * move it into a callee saved register. + */ + movq %rsi, %r15 - pushq %rsi call load_stage1_idt - popq %rsi #ifdef CONFIG_AMD_MEM_ENCRYPT /* @@ -430,63 +392,24 @@ SYM_CODE_START(startup_64) * CPUID instructions being issued, so go ahead and do that now via * sev_enable(), which will also handle the rest of the SEV-related * detection/setup to ensure that has been done in advance of any dependent - * code. + * code. Pass the boot_params pointer as the first argument. */ - pushq %rsi - movq %rsi, %rdi /* real mode address */ + movq %r15, %rdi call sev_enable - popq %rsi #endif /* - * paging_prepare() sets up the trampoline and checks if we need to - * enable 5-level paging. + * configure_5level_paging() updates the number of paging levels using + * a trampoline in 32-bit addressable memory if the current number does + * not match the desired number. * - * paging_prepare() returns a two-quadword structure which lands - * into RDX:RAX: - * - Address of the trampoline is returned in RAX. - * - Non zero RDX means trampoline needs to enable 5-level - * paging. - * - * RSI holds real mode data and needs to be preserved across - * this function call. - */ - pushq %rsi - movq %rsi, %rdi /* real mode address */ - call paging_prepare - popq %rsi - - /* Save the trampoline address in RCX */ - movq %rax, %rcx - - /* - * Load the address of trampoline_return() into RDI. - * It will be used by the trampoline to return to the main code. + * Pass the boot_params pointer as the first argument. The second + * argument is the relocated address of the page table to use instead + * of the page table in trampoline memory (if required). */ - leaq trampoline_return(%rip), %rdi - - /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ - pushq $__KERNEL32_CS - leaq TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax - pushq %rax - lretq -trampoline_return: - /* Restore the stack, the 32-bit trampoline uses its own stack */ - leaq rva(boot_stack_end)(%rbx), %rsp - - /* - * cleanup_trampoline() would restore trampoline memory. - * - * RDI is address of the page table to use instead of page table - * in trampoline memory (if required). - * - * RSI holds real mode data and needs to be preserved across - * this function call. - */ - pushq %rsi - leaq rva(top_pgtable)(%rbx), %rdi - call cleanup_trampoline - popq %rsi + movq %r15, %rdi + leaq rva(top_pgtable)(%rbx), %rsi + call configure_5level_paging /* Zero EFLAGS */ pushq $0 @@ -496,7 +419,6 @@ trampoline_return: * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - pushq %rsi leaq (_bss-8)(%rip), %rsi leaq rva(_bss-8)(%rbx), %rdi movl $(_bss - startup_32), %ecx @@ -504,7 +426,6 @@ trampoline_return: std rep movsq cld - popq %rsi /* * The GDT may get overwritten either during the copy we just did or @@ -523,21 +444,6 @@ trampoline_return: jmp *%rax SYM_CODE_END(startup_64) -#ifdef CONFIG_EFI_STUB -#ifdef CONFIG_EFI_HANDOVER_PROTOCOL - .org 0x390 -#endif -SYM_FUNC_START(efi64_stub_entry) - and $~0xf, %rsp /* realign the stack */ - movq %rdx, %rbx /* save boot_params pointer */ - call efi_main - movq %rbx,%rsi - leaq rva(startup_64)(%rax), %rax - jmp *%rax -SYM_FUNC_END(efi64_stub_entry) -SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) -#endif - .text SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) @@ -551,128 +457,122 @@ SYM_FUNC_START_LOCAL_NOALIGN(.Lrelocated) shrq $3, %rcx rep stosq - pushq %rsi call load_stage2_idt /* Pass boot_params to initialize_identity_maps() */ - movq (%rsp), %rdi + movq %r15, %rdi call initialize_identity_maps - popq %rsi /* * Do the extraction, and jump to the new kernel.. */ - pushq %rsi /* Save the real mode argument */ - movq %rsi, %rdi /* real mode address */ - leaq boot_heap(%rip), %rsi /* malloc area for uncompression */ - leaq input_data(%rip), %rdx /* input_data */ - movl input_len(%rip), %ecx /* input_len */ - movq %rbp, %r8 /* output target address */ - movl output_len(%rip), %r9d /* decompressed length, end of relocs */ + /* pass struct boot_params pointer and output target address */ + movq %r15, %rdi + movq %rbp, %rsi call extract_kernel /* returns kernel entry point in %rax */ - popq %rsi /* * Jump to the decompressed kernel. */ + movq %r15, %rsi jmp *%rax SYM_FUNC_END(.Lrelocated) - .code32 /* - * This is the 32-bit trampoline that will be copied over to low memory. + * This is the 32-bit trampoline that will be copied over to low memory. It + * will be called using the ordinary 64-bit calling convention from code + * running in 64-bit mode. * - * RDI contains the return address (might be above 4G). - * ECX contains the base address of the trampoline memory. - * Non zero RDX means trampoline needs to enable 5-level paging. + * Return address is at the top of the stack (might be above 4G). + * The first argument (EDI) contains the address of the temporary PGD level + * page table in 32-bit addressable memory which will be programmed into + * register CR3. */ + .section ".rodata", "a", @progbits SYM_CODE_START(trampoline_32bit_src) - /* Set up data and stack segments */ - movl $__KERNEL_DS, %eax - movl %eax, %ds - movl %eax, %ss + /* + * Preserve callee save 64-bit registers on the stack: this is + * necessary because the architecture does not guarantee that GPRs will + * retain their full 64-bit values across a 32-bit mode switch. + */ + pushq %r15 + pushq %r14 + pushq %r13 + pushq %r12 + pushq %rbp + pushq %rbx + + /* Preserve top half of RSP in a legacy mode GPR to avoid truncation */ + movq %rsp, %rbx + shrq $32, %rbx + + /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */ + pushq $__KERNEL32_CS + leaq 0f(%rip), %rax + pushq %rax + lretq - /* Set up new stack */ - leal TRAMPOLINE_32BIT_STACK_END(%ecx), %esp + /* + * The 32-bit code below will do a far jump back to long mode and end + * up here after reconfiguring the number of paging levels. First, the + * stack pointer needs to be restored to its full 64-bit value before + * the callee save register contents can be popped from the stack. + */ +.Lret: + shlq $32, %rbx + orq %rbx, %rsp + + /* Restore the preserved 64-bit registers */ + popq %rbx + popq %rbp + popq %r12 + popq %r13 + popq %r14 + popq %r15 + retq + .code32 +0: /* Disable paging */ movl %cr0, %eax btrl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 - /* Check what paging mode we want to be in after the trampoline */ - testl %edx, %edx - jz 1f - - /* We want 5-level paging: don't touch CR3 if it already points to 5-level page tables */ - movl %cr4, %eax - testl $X86_CR4_LA57, %eax - jnz 3f - jmp 2f -1: - /* We want 4-level paging: don't touch CR3 if it already points to 4-level page tables */ - movl %cr4, %eax - testl $X86_CR4_LA57, %eax - jz 3f -2: /* Point CR3 to the trampoline's new top level page table */ - leal TRAMPOLINE_32BIT_PGTABLE_OFFSET(%ecx), %eax - movl %eax, %cr3 -3: + movl %edi, %cr3 + /* Set EFER.LME=1 as a precaution in case hypervsior pulls the rug */ - pushl %ecx - pushl %edx movl $MSR_EFER, %ecx rdmsr btsl $_EFER_LME, %eax /* Avoid writing EFER if no change was made (for TDX guest) */ jc 1f wrmsr -1: popl %edx - popl %ecx - -#ifdef CONFIG_X86_MCE - /* - * Preserve CR4.MCE if the kernel will enable #MC support. - * Clearing MCE may fault in some environments (that also force #MC - * support). Any machine check that occurs before #MC support is fully - * configured will crash the system regardless of the CR4.MCE value set - * here. - */ - movl %cr4, %eax - andl $X86_CR4_MCE, %eax -#else - movl $0, %eax -#endif - - /* Enable PAE and LA57 (if required) paging modes */ - orl $X86_CR4_PAE, %eax - testl %edx, %edx - jz 1f - orl $X86_CR4_LA57, %eax 1: + /* Toggle CR4.LA57 */ + movl %cr4, %eax + btcl $X86_CR4_LA57_BIT, %eax movl %eax, %cr4 - /* Calculate address of paging_enabled() once we are executing in the trampoline */ - leal .Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%ecx), %eax - - /* Prepare the stack for far return to Long Mode */ - pushl $__KERNEL_CS - pushl %eax - /* Enable paging again. */ movl %cr0, %eax btsl $X86_CR0_PG_BIT, %eax movl %eax, %cr0 - lret + /* + * Return to the 64-bit calling code using LJMP rather than LRET, to + * avoid the need for a 32-bit addressable stack. The destination + * address will be adjusted after the template code is copied into a + * 32-bit addressable buffer. + */ +.Ljmp: ljmpl $__KERNEL_CS, $(.Lret - trampoline_32bit_src) SYM_CODE_END(trampoline_32bit_src) - .code64 -SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled) - /* Return from the trampoline */ - jmp *%rdi -SYM_FUNC_END(.Lpaging_enabled) +/* + * This symbol is placed right after trampoline_32bit_src() so its address can + * be used to infer the size of the trampoline code. + */ +SYM_DATA(trampoline_ljmp_imm_offset, .word .Ljmp + 1 - trampoline_32bit_src) /* * The trampoline code has a size limit. @@ -681,7 +581,7 @@ SYM_FUNC_END(.Lpaging_enabled) */ .org trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE - .code32 + .text SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode) /* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */ 1: @@ -726,8 +626,6 @@ SYM_DATA_END_LABEL(boot_idt, SYM_L_GLOBAL, boot_idt_end) */ .bss .balign 4 -SYM_DATA_LOCAL(boot_heap, .fill BOOT_HEAP_SIZE, 1, 0) - SYM_DATA_START_LOCAL(boot_stack) .fill BOOT_STACK_SIZE, 1, 0 .balign 16 diff --git a/arch/x86/boot/compressed/idt_64.c b/arch/x86/boot/compressed/idt_64.c index 6debb816e83d..3cdf94b41456 100644 --- a/arch/x86/boot/compressed/idt_64.c +++ b/arch/x86/boot/compressed/idt_64.c @@ -63,7 +63,14 @@ void load_stage2_idt(void) set_idt_entry(X86_TRAP_PF, boot_page_fault); #ifdef CONFIG_AMD_MEM_ENCRYPT - set_idt_entry(X86_TRAP_VC, boot_stage2_vc); + /* + * Clear the second stage #VC handler in case guest types + * needing #VC have not been detected. + */ + if (sev_status & BIT(1)) + set_idt_entry(X86_TRAP_VC, boot_stage2_vc); + else + set_idt_entry(X86_TRAP_VC, NULL); #endif load_boot_idt(&boot_idt_desc); diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 94b7abcf624b..f711f2a85862 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -330,6 +330,33 @@ static size_t parse_elf(void *output) return ehdr.e_entry - LOAD_PHYSICAL_ADDR; } +const unsigned long kernel_total_size = VO__end - VO__text; + +static u8 boot_heap[BOOT_HEAP_SIZE] __aligned(4); + +extern unsigned char input_data[]; +extern unsigned int input_len, output_len; + +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)) +{ + unsigned long entry; + + if (!free_mem_ptr) { + free_mem_ptr = (unsigned long)boot_heap; + free_mem_end_ptr = (unsigned long)boot_heap + sizeof(boot_heap); + } + + if (__decompress(input_data, input_len, NULL, NULL, outbuf, output_len, + NULL, error) < 0) + return ULONG_MAX; + + entry = parse_elf(outbuf); + handle_relocations(outbuf, output_len, virt_addr); + + return entry; +} + /* * The compressed kernel image (ZO), has been moved so that its position * is against the end of the buffer used to hold the uncompressed kernel @@ -347,14 +374,10 @@ static size_t parse_elf(void *output) * |-------uncompressed kernel image---------| * */ -asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, - unsigned char *input_data, - unsigned long input_len, - unsigned char *output, - unsigned long output_len) +asmlinkage __visible void *extract_kernel(void *rmode, unsigned char *output) { - const unsigned long kernel_total_size = VO__end - VO__text; unsigned long virt_addr = LOAD_PHYSICAL_ADDR; + memptr heap = (memptr)boot_heap; unsigned long needed_size; size_t entry_offset; @@ -412,7 +435,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, * entries. This ensures the full mapped area is usable RAM * and doesn't include any reserved areas. */ - needed_size = max(output_len, kernel_total_size); + needed_size = max_t(unsigned long, output_len, kernel_total_size); #ifdef CONFIG_X86_64 needed_size = ALIGN(needed_size, MIN_KERNEL_ALIGN); #endif @@ -443,7 +466,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, #ifdef CONFIG_X86_64 if (heap > 0x3fffffffffffUL) error("Destination address too large"); - if (virt_addr + max(output_len, kernel_total_size) > KERNEL_IMAGE_SIZE) + if (virt_addr + needed_size > KERNEL_IMAGE_SIZE) error("Destination virtual address is beyond the kernel mapping area"); #else if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) @@ -461,10 +484,7 @@ asmlinkage __visible void *extract_kernel(void *rmode, memptr heap, accept_memory(__pa(output), __pa(output) + needed_size); } - __decompress(input_data, input_len, NULL, NULL, output, output_len, - NULL, error); - entry_offset = parse_elf(output); - handle_relocations(output, output_len, virt_addr); + entry_offset = decompress_kernel(output, virt_addr, error); debug_putstr("done.\nBooting the kernel (entry_offset: 0x"); debug_puthex(entry_offset); diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 964fe903a1cd..cc70d3fb9049 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -179,9 +179,7 @@ static inline int count_immovable_mem_regions(void) { return 0; } #endif /* ident_map_64.c */ -#ifdef CONFIG_X86_5LEVEL extern unsigned int __pgtable_l5_enabled, pgdir_shift, ptrs_per_p4d; -#endif extern void kernel_add_identity_map(unsigned long start, unsigned long end); /* Used by PAGE_KERN* macros: */ diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h index cc9b2529a086..6d595abe06b3 100644 --- a/arch/x86/boot/compressed/pgtable.h +++ b/arch/x86/boot/compressed/pgtable.h @@ -3,18 +3,16 @@ #define TRAMPOLINE_32BIT_SIZE (2 * PAGE_SIZE) -#define TRAMPOLINE_32BIT_PGTABLE_OFFSET 0 - #define TRAMPOLINE_32BIT_CODE_OFFSET PAGE_SIZE -#define TRAMPOLINE_32BIT_CODE_SIZE 0x80 - -#define TRAMPOLINE_32BIT_STACK_END TRAMPOLINE_32BIT_SIZE +#define TRAMPOLINE_32BIT_CODE_SIZE 0xA0 #ifndef __ASSEMBLER__ extern unsigned long *trampoline_32bit; -extern void trampoline_32bit_src(void *return_ptr); +extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl); + +extern const u16 trampoline_ljmp_imm_offset; #endif /* __ASSEMBLER__ */ #endif /* BOOT_COMPRESSED_PAGETABLE_H */ diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 2ac12ff4111b..7939eb6e6ce9 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -16,11 +16,6 @@ unsigned int __section(".data") pgdir_shift = 39; unsigned int __section(".data") ptrs_per_p4d = 1; #endif -struct paging_config { - unsigned long trampoline_start; - unsigned long l5_required; -}; - /* Buffer to preserve trampoline memory */ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; @@ -29,7 +24,7 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; * purposes. * * Avoid putting the pointer into .bss as it will be cleared between - * paging_prepare() and extract_kernel(). + * configure_5level_paging() and extract_kernel(). */ unsigned long *trampoline_32bit __section(".data"); @@ -106,12 +101,13 @@ static unsigned long find_trampoline_placement(void) return bios_start - TRAMPOLINE_32BIT_SIZE; } -struct paging_config paging_prepare(void *rmode) +asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) { - struct paging_config paging_config = {}; + void (*toggle_la57)(void *cr3); + bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ - boot_params = rmode; + boot_params = bp; /* * Check if LA57 is desired and supported. @@ -129,12 +125,22 @@ struct paging_config paging_prepare(void *rmode) !cmdline_find_option_bool("no5lvl") && native_cpuid_eax(0) >= 7 && (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { - paging_config.l5_required = 1; + l5_required = true; + + /* Initialize variables for 5-level paging */ + __pgtable_l5_enabled = 1; + pgdir_shift = 48; + ptrs_per_p4d = 512; } - paging_config.trampoline_start = find_trampoline_placement(); + /* + * The trampoline will not be used if the paging mode is already set to + * the desired one. + */ + if (l5_required == !!(native_read_cr4() & X86_CR4_LA57)) + return; - trampoline_32bit = (unsigned long *)paging_config.trampoline_start; + trampoline_32bit = (unsigned long *)find_trampoline_placement(); /* Preserve trampoline memory */ memcpy(trampoline_save, trampoline_32bit, TRAMPOLINE_32BIT_SIZE); @@ -143,32 +149,32 @@ struct paging_config paging_prepare(void *rmode) memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE); /* Copy trampoline code in place */ - memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), + toggle_la57 = memcpy(trampoline_32bit + + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long), &trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE); /* + * Avoid the need for a stack in the 32-bit trampoline code, by using + * LJMP rather than LRET to return back to long mode. LJMP takes an + * immediate absolute address, which needs to be adjusted based on the + * placement of the trampoline. + */ + *(u32 *)((u8 *)toggle_la57 + trampoline_ljmp_imm_offset) += + (unsigned long)toggle_la57; + + /* * The code below prepares page table in trampoline memory. * * The new page table will be used by trampoline code for switching * from 4- to 5-level paging or vice versa. - * - * If switching is not required, the page table is unused: trampoline - * code wouldn't touch CR3. - */ - - /* - * We are not going to use the page table in trampoline memory if we - * are already in the desired paging mode. */ - if (paging_config.l5_required == !!(native_read_cr4() & X86_CR4_LA57)) - goto out; - if (paging_config.l5_required) { + if (l5_required) { /* * For 4- to 5-level paging transition, set up current CR3 as * the first and the only entry in a new top-level page table. */ - trampoline_32bit[TRAMPOLINE_32BIT_PGTABLE_OFFSET] = __native_read_cr3() | _PAGE_TABLE_NOENC; + *trampoline_32bit = __native_read_cr3() | _PAGE_TABLE_NOENC; } else { unsigned long src; @@ -181,38 +187,17 @@ struct paging_config paging_prepare(void *rmode) * may be above 4G. */ src = *(unsigned long *)__native_read_cr3() & PAGE_MASK; - memcpy(trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long), - (void *)src, PAGE_SIZE); + memcpy(trampoline_32bit, (void *)src, PAGE_SIZE); } -out: - return paging_config; -} - -void cleanup_trampoline(void *pgtable) -{ - void *trampoline_pgtable; - - trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long); + toggle_la57(trampoline_32bit); /* - * Move the top level page table out of trampoline memory, - * if it's there. + * Move the top level page table out of trampoline memory. */ - if ((void *)__native_read_cr3() == trampoline_pgtable) { - memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); - native_write_cr3((unsigned long)pgtable); - } + memcpy(pgtable, trampoline_32bit, PAGE_SIZE); + native_write_cr3((unsigned long)pgtable); /* Restore trampoline memory */ memcpy(trampoline_32bit, trampoline_save, TRAMPOLINE_32BIT_SIZE); - - /* Initialize variables for 5-level paging */ -#ifdef CONFIG_X86_5LEVEL - if (__read_cr4() & X86_CR4_LA57) { - __pgtable_l5_enabled = 1; - pgdir_shift = 48; - ptrs_per_p4d = 512; - } -#endif } diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c index 09dc8c187b3c..dc8c876fbd8f 100644 --- a/arch/x86/boot/compressed/sev.c +++ b/arch/x86/boot/compressed/sev.c @@ -365,22 +365,27 @@ static void enforce_vmpl0(void) * by the guest kernel. As and when a new feature is implemented in the * guest kernel, a corresponding bit should be added to the mask. */ -#define SNP_FEATURES_PRESENT (0) +#define SNP_FEATURES_PRESENT MSR_AMD64_SNP_DEBUG_SWAP + +u64 snp_get_unsupported_features(u64 status) +{ + if (!(status & MSR_AMD64_SEV_SNP_ENABLED)) + return 0; + + return status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; +} void snp_check_features(void) { u64 unsupported; - if (!(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) - return; - /* * Terminate the boot if hypervisor has enabled any feature lacking * guest side implementation. Pass on the unsupported features mask through * EXIT_INFO_2 of the GHCB protocol so that those features can be reported * as part of the guest boot failure. */ - unsupported = sev_status & SNP_FEATURES_IMPL_REQ & ~SNP_FEATURES_PRESENT; + unsupported = snp_get_unsupported_features(sev_status); if (unsupported) { if (ghcb_version < 2 || (!boot_ghcb && !early_setup_ghcb())) sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SNP_UNSUPPORTED); @@ -390,32 +395,22 @@ void snp_check_features(void) } } -void sev_enable(struct boot_params *bp) +/* + * sev_check_cpu_support - Check for SEV support in the CPU capabilities + * + * Returns < 0 if SEV is not supported, otherwise the position of the + * encryption bit in the page table descriptors. + */ +static int sev_check_cpu_support(void) { unsigned int eax, ebx, ecx, edx; - struct msr m; - bool snp; - - /* - * bp->cc_blob_address should only be set by boot/compressed kernel. - * Initialize it to 0 to ensure that uninitialized values from - * buggy bootloaders aren't propagated. - */ - if (bp) - bp->cc_blob_address = 0; - - /* - * Setup/preliminary detection of SNP. This will be sanity-checked - * against CPUID/MSR values later. - */ - snp = snp_init(bp); /* Check for the SME/SEV support leaf */ eax = 0x80000000; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); if (eax < 0x8000001f) - return; + return -ENODEV; /* * Check for the SME/SEV feature: @@ -429,7 +424,48 @@ void sev_enable(struct boot_params *bp) ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); /* Check whether SEV is supported */ - if (!(eax & BIT(1))) { + if (!(eax & BIT(1))) + return -ENODEV; + + return ebx & 0x3f; +} + +void sev_enable(struct boot_params *bp) +{ + struct msr m; + int bitpos; + bool snp; + + /* + * bp->cc_blob_address should only be set by boot/compressed kernel. + * Initialize it to 0 to ensure that uninitialized values from + * buggy bootloaders aren't propagated. + */ + if (bp) + bp->cc_blob_address = 0; + + /* + * Do an initial SEV capability check before snp_init() which + * loads the CPUID page and the same checks afterwards are done + * without the hypervisor and are trustworthy. + * + * If the HV fakes SEV support, the guest will crash'n'burn + * which is good enough. + */ + + if (sev_check_cpu_support() < 0) + return; + + /* + * Setup/preliminary detection of SNP. This will be sanity-checked + * against CPUID/MSR values later. + */ + snp = snp_init(bp); + + /* Now repeat the checks with the SNP CPUID table. */ + + bitpos = sev_check_cpu_support(); + if (bitpos < 0) { if (snp) error("SEV-SNP support indicated by CC blob, but not CPUID."); return; @@ -461,7 +497,24 @@ void sev_enable(struct boot_params *bp) if (snp && !(sev_status & MSR_AMD64_SEV_SNP_ENABLED)) error("SEV-SNP supported indicated by CC blob, but not SEV status MSR."); - sme_me_mask = BIT_ULL(ebx & 0x3f); + sme_me_mask = BIT_ULL(bitpos); +} + +/* + * sev_get_status - Retrieve the SEV status mask + * + * Returns 0 if the CPU is not SEV capable, otherwise the value of the + * AMD64_SEV MSR. + */ +u64 sev_get_status(void) +{ + struct msr m; + + if (sev_check_cpu_support() < 0) + return 0; + + boot_rdmsr(MSR_AMD64_SEV, &m); + return m.q; } /* Search for Confidential Computing blob in the EFI config table. */ diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 75a343f10e58..1b411bbf3cb0 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -33,7 +33,6 @@ CONFIG_HYPERVISOR_GUEST=y CONFIG_PARAVIRT=y CONFIG_NR_CPUS=8 CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y -CONFIG_MICROCODE_AMD=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y CONFIG_X86_CHECK_BIOS_CORRUPTION=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 0902518e9b93..409e9182bd29 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -31,7 +31,6 @@ CONFIG_SMP=y CONFIG_HYPERVISOR_GUEST=y CONFIG_PARAVIRT=y CONFIG_X86_REROUTE_FOR_BROKEN_BOOT_IRQS=y -CONFIG_MICROCODE_AMD=y CONFIG_X86_MSR=y CONFIG_X86_CPUID=y CONFIG_NUMA=y diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index a5b0cb3efeba..39d6a62ac627 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -229,10 +229,9 @@ static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) return (struct crypto_aes_ctx *)ALIGN(addr, align); } -static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, +static int aes_set_key_common(struct crypto_aes_ctx *ctx, const u8 *in_key, unsigned int key_len) { - struct crypto_aes_ctx *ctx = aes_ctx(raw_ctx); int err; if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 && @@ -253,7 +252,8 @@ static int aes_set_key_common(struct crypto_tfm *tfm, void *raw_ctx, static int aes_set_key(struct crypto_tfm *tfm, const u8 *in_key, unsigned int key_len) { - return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len); + return aes_set_key_common(aes_ctx(crypto_tfm_ctx(tfm)), in_key, + key_len); } static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) @@ -285,8 +285,7 @@ static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) static int aesni_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key, unsigned int len) { - return aes_set_key_common(crypto_skcipher_tfm(tfm), - crypto_skcipher_ctx(tfm), key, len); + return aes_set_key_common(aes_ctx(crypto_skcipher_ctx(tfm)), key, len); } static int ecb_encrypt(struct skcipher_request *req) @@ -627,8 +626,7 @@ static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key, memcpy(ctx->nonce, key + key_len, sizeof(ctx->nonce)); - return aes_set_key_common(crypto_aead_tfm(aead), - &ctx->aes_key_expanded, key, key_len) ?: + return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?: rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); } @@ -893,14 +891,13 @@ static int xts_aesni_setkey(struct crypto_skcipher *tfm, const u8 *key, keylen /= 2; /* first half of xts-key is for crypt */ - err = aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_crypt_ctx, - key, keylen); + err = aes_set_key_common(aes_ctx(ctx->raw_crypt_ctx), key, keylen); if (err) return err; /* second half of xts-key is for tweak */ - return aes_set_key_common(crypto_skcipher_tfm(tfm), ctx->raw_tweak_ctx, - key + keylen, keylen); + return aes_set_key_common(aes_ctx(ctx->raw_tweak_ctx), key + keylen, + keylen); } static int xts_crypt(struct skcipher_request *req, bool encrypt) @@ -1150,8 +1147,7 @@ static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key, { struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead); - return aes_set_key_common(crypto_aead_tfm(aead), - &ctx->aes_key_expanded, key, key_len) ?: + return aes_set_key_common(&ctx->aes_key_expanded, key, key_len) ?: rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); } diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index bc0a3c941b35..2d0b1bd866ea 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -456,3 +456,4 @@ 449 i386 futex_waitv sys_futex_waitv 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node 451 i386 cachestat sys_cachestat +452 i386 fchmodat2 sys_fchmodat2 diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 227538b0ce80..814768249eae 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -373,6 +373,7 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 11a5c68d1218..7645730dc228 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -299,8 +299,8 @@ static unsigned long vdso_addr(unsigned long start, unsigned len) /* Round the lowest possible end address up to a PMD boundary. */ end = (start + len + PMD_SIZE - 1) & PMD_MASK; - if (end >= TASK_SIZE_MAX) - end = TASK_SIZE_MAX; + if (end >= DEFAULT_MAP_WINDOW) + end = DEFAULT_MAP_WINDOW; end -= len; if (end > start) { diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 371014802191..6911c5399d02 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -156,8 +156,8 @@ perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) * count to the generic event atomically: */ prev_raw_count = local64_read(&hwc->prev_count); - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) + if (!local64_try_cmpxchg(&hwc->prev_count, + &prev_raw_count, new_raw_count)) return 0; /* @@ -247,11 +247,33 @@ int forward_event_to_ibs(struct perf_event *event) return -ENOENT; } +/* + * Grouping of IBS events is not possible since IBS can have only + * one event active at any point in time. + */ +static int validate_group(struct perf_event *event) +{ + struct perf_event *sibling; + + if (event->group_leader == event) + return 0; + + if (event->group_leader->pmu == event->pmu) + return -EINVAL; + + for_each_sibling_event(sibling, event->group_leader) { + if (sibling->pmu == event->pmu) + return -EINVAL; + } + return 0; +} + static int perf_ibs_init(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; struct perf_ibs *perf_ibs; u64 max_cnt, config; + int ret; perf_ibs = get_ibs_pmu(event->attr.type); if (!perf_ibs) @@ -265,6 +287,10 @@ static int perf_ibs_init(struct perf_event *event) if (config & ~perf_ibs->config_mask) return -EINVAL; + ret = validate_group(event); + if (ret) + return ret; + if (hwc->sample_period) { if (config & perf_ibs->cnt_mask) /* raw max_cnt may not be set */ @@ -702,38 +728,63 @@ static u8 perf_ibs_data_src(union ibs_op_data2 *op_data2) return op_data2->data_src_lo; } -static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, - union ibs_op_data3 *op_data3, - struct perf_sample_data *data) +#define L(x) (PERF_MEM_S(LVL, x) | PERF_MEM_S(LVL, HIT)) +#define LN(x) PERF_MEM_S(LVLNUM, x) +#define REM PERF_MEM_S(REMOTE, REMOTE) +#define HOPS(x) PERF_MEM_S(HOPS, x) + +static u64 g_data_src[8] = { + [IBS_DATA_SRC_LOC_CACHE] = L(L3) | L(REM_CCE1) | LN(ANY_CACHE) | HOPS(0), + [IBS_DATA_SRC_DRAM] = L(LOC_RAM) | LN(RAM), + [IBS_DATA_SRC_REM_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), + [IBS_DATA_SRC_IO] = L(IO) | LN(IO), +}; + +#define RMT_NODE_BITS (1 << IBS_DATA_SRC_DRAM) +#define RMT_NODE_APPLICABLE(x) (RMT_NODE_BITS & (1 << x)) + +static u64 g_zen4_data_src[32] = { + [IBS_DATA_SRC_EXT_LOC_CACHE] = L(L3) | LN(L3), + [IBS_DATA_SRC_EXT_NEAR_CCX_CACHE] = L(REM_CCE1) | LN(ANY_CACHE) | REM | HOPS(0), + [IBS_DATA_SRC_EXT_DRAM] = L(LOC_RAM) | LN(RAM), + [IBS_DATA_SRC_EXT_FAR_CCX_CACHE] = L(REM_CCE2) | LN(ANY_CACHE) | REM | HOPS(1), + [IBS_DATA_SRC_EXT_PMEM] = LN(PMEM), + [IBS_DATA_SRC_EXT_IO] = L(IO) | LN(IO), + [IBS_DATA_SRC_EXT_EXT_MEM] = LN(CXL), +}; + +#define ZEN4_RMT_NODE_BITS ((1 << IBS_DATA_SRC_EXT_DRAM) | \ + (1 << IBS_DATA_SRC_EXT_PMEM) | \ + (1 << IBS_DATA_SRC_EXT_EXT_MEM)) +#define ZEN4_RMT_NODE_APPLICABLE(x) (ZEN4_RMT_NODE_BITS & (1 << x)) + +static __u64 perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, + union ibs_op_data3 *op_data3, + struct perf_sample_data *data) { union perf_mem_data_src *data_src = &data->data_src; u8 ibs_data_src = perf_ibs_data_src(op_data2); data_src->mem_lvl = 0; + data_src->mem_lvl_num = 0; /* * DcMiss, L2Miss, DataSrc, DcMissLat etc. are all invalid for Uncached * memory accesses. So, check DcUcMemAcc bit early. */ - if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) { - data_src->mem_lvl = PERF_MEM_LVL_UNC | PERF_MEM_LVL_HIT; - return; - } + if (op_data3->dc_uc_mem_acc && ibs_data_src != IBS_DATA_SRC_EXT_IO) + return L(UNC) | LN(UNC); /* L1 Hit */ - if (op_data3->dc_miss == 0) { - data_src->mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; - return; - } + if (op_data3->dc_miss == 0) + return L(L1) | LN(L1); /* L2 Hit */ if (op_data3->l2_miss == 0) { /* Erratum #1293 */ if (boot_cpu_data.x86 != 0x19 || boot_cpu_data.x86_model > 0xF || - !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) { - data_src->mem_lvl = PERF_MEM_LVL_L2 | PERF_MEM_LVL_HIT; - return; - } + !(op_data3->sw_pf || op_data3->dc_miss_no_mab_alloc)) + return L(L2) | LN(L2); } /* @@ -743,82 +794,36 @@ static void perf_ibs_get_mem_lvl(union ibs_op_data2 *op_data2, if (data_src->mem_op != PERF_MEM_OP_LOAD) goto check_mab; - /* L3 Hit */ if (ibs_caps & IBS_CAPS_ZEN4) { - if (ibs_data_src == IBS_DATA_SRC_EXT_LOC_CACHE) { - data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_HIT; - return; - } - } else { - if (ibs_data_src == IBS_DATA_SRC_LOC_CACHE) { - data_src->mem_lvl = PERF_MEM_LVL_L3 | PERF_MEM_LVL_REM_CCE1 | - PERF_MEM_LVL_HIT; - return; - } - } + u64 val = g_zen4_data_src[ibs_data_src]; - /* A peer cache in a near CCX */ - if (ibs_caps & IBS_CAPS_ZEN4 && - ibs_data_src == IBS_DATA_SRC_EXT_NEAR_CCX_CACHE) { - data_src->mem_lvl = PERF_MEM_LVL_REM_CCE1 | PERF_MEM_LVL_HIT; - return; - } + if (!val) + goto check_mab; - /* A peer cache in a far CCX */ - if (ibs_caps & IBS_CAPS_ZEN4) { - if (ibs_data_src == IBS_DATA_SRC_EXT_FAR_CCX_CACHE) { - data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT; - return; + /* HOPS_1 because IBS doesn't provide remote socket detail */ + if (op_data2->rmt_node && ZEN4_RMT_NODE_APPLICABLE(ibs_data_src)) { + if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) + val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); + else + val |= REM | HOPS(1); } - } else { - if (ibs_data_src == IBS_DATA_SRC_REM_CACHE) { - data_src->mem_lvl = PERF_MEM_LVL_REM_CCE2 | PERF_MEM_LVL_HIT; - return; - } - } - /* DRAM */ - if (ibs_data_src == IBS_DATA_SRC_EXT_DRAM) { - if (op_data2->rmt_node == 0) - data_src->mem_lvl = PERF_MEM_LVL_LOC_RAM | PERF_MEM_LVL_HIT; - else - data_src->mem_lvl = PERF_MEM_LVL_REM_RAM1 | PERF_MEM_LVL_HIT; - return; - } + return val; + } else { + u64 val = g_data_src[ibs_data_src]; - /* PMEM */ - if (ibs_caps & IBS_CAPS_ZEN4 && ibs_data_src == IBS_DATA_SRC_EXT_PMEM) { - data_src->mem_lvl_num = PERF_MEM_LVLNUM_PMEM; - if (op_data2->rmt_node) { - data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; - /* IBS doesn't provide Remote socket detail */ - data_src->mem_hops = PERF_MEM_HOPS_1; - } - return; - } + if (!val) + goto check_mab; - /* Extension Memory */ - if (ibs_caps & IBS_CAPS_ZEN4 && - ibs_data_src == IBS_DATA_SRC_EXT_EXT_MEM) { - data_src->mem_lvl_num = PERF_MEM_LVLNUM_CXL; - if (op_data2->rmt_node) { - data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; - /* IBS doesn't provide Remote socket detail */ - data_src->mem_hops = PERF_MEM_HOPS_1; + /* HOPS_1 because IBS doesn't provide remote socket detail */ + if (op_data2->rmt_node && RMT_NODE_APPLICABLE(ibs_data_src)) { + if (ibs_data_src == IBS_DATA_SRC_DRAM) + val = L(REM_RAM1) | LN(RAM) | REM | HOPS(1); + else + val |= REM | HOPS(1); } - return; - } - /* IO */ - if (ibs_data_src == IBS_DATA_SRC_EXT_IO) { - data_src->mem_lvl = PERF_MEM_LVL_IO; - data_src->mem_lvl_num = PERF_MEM_LVLNUM_IO; - if (op_data2->rmt_node) { - data_src->mem_remote = PERF_MEM_REMOTE_REMOTE; - /* IBS doesn't provide Remote socket detail */ - data_src->mem_hops = PERF_MEM_HOPS_1; - } - return; + return val; } check_mab: @@ -829,12 +834,11 @@ check_mab: * DataSrc simultaneously. Prioritize DataSrc over MAB, i.e. set * MAB only when IBS fails to provide DataSrc. */ - if (op_data3->dc_miss_no_mab_alloc) { - data_src->mem_lvl = PERF_MEM_LVL_LFB | PERF_MEM_LVL_HIT; - return; - } + if (op_data3->dc_miss_no_mab_alloc) + return L(LFB) | LN(LFB); - data_src->mem_lvl = PERF_MEM_LVL_NA; + /* Don't set HIT with NA */ + return PERF_MEM_S(LVL, NA) | LN(NA); } static bool perf_ibs_cache_hit_st_valid(void) @@ -924,7 +928,9 @@ static void perf_ibs_get_data_src(struct perf_ibs_data *ibs_data, union ibs_op_data2 *op_data2, union ibs_op_data3 *op_data3) { - perf_ibs_get_mem_lvl(op_data2, op_data3, data); + union perf_mem_data_src *data_src = &data->data_src; + + data_src->val |= perf_ibs_get_mem_lvl(op_data2, op_data3, data); perf_ibs_get_mem_snoop(op_data2, data); perf_ibs_get_tlb_lvl(op_data3, data); perf_ibs_get_mem_lock(op_data3, data); diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 9d248703cbdd..185f902e5f28 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -129,13 +129,11 @@ u64 x86_perf_event_update(struct perf_event *event) * exchange a new raw count - then add that new-prev delta * count to the generic event atomically: */ -again: prev_raw_count = local64_read(&hwc->prev_count); - rdpmcl(hwc->event_base_rdpmc, new_raw_count); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; + do { + rdpmcl(hwc->event_base_rdpmc, new_raw_count); + } while (!local64_try_cmpxchg(&hwc->prev_count, + &prev_raw_count, new_raw_count)); /* * Now we have the new raw value and have updated the prev @@ -2168,7 +2166,6 @@ static int __init init_hw_perf_events(void) hybrid_pmu->pmu = pmu; hybrid_pmu->pmu.type = -1; hybrid_pmu->pmu.attr_update = x86_pmu.attr_update; - hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_HETEROGENEOUS_CPUS; hybrid_pmu->pmu.capabilities |= PERF_PMU_CAP_EXTENDED_HW_TYPE; err = perf_pmu_register(&hybrid_pmu->pmu, hybrid_pmu->name, diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 2a284ba951b7..fa355d3658a6 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2129,6 +2129,17 @@ static struct extra_reg intel_grt_extra_regs[] __read_mostly = { EVENT_EXTRA_END }; +EVENT_ATTR_STR(topdown-retiring, td_retiring_cmt, "event=0x72,umask=0x0"); +EVENT_ATTR_STR(topdown-bad-spec, td_bad_spec_cmt, "event=0x73,umask=0x0"); + +static struct attribute *cmt_events_attrs[] = { + EVENT_PTR(td_fe_bound_tnt), + EVENT_PTR(td_retiring_cmt), + EVENT_PTR(td_bad_spec_cmt), + EVENT_PTR(td_be_bound_tnt), + NULL +}; + static struct extra_reg intel_cmt_extra_regs[] __read_mostly = { /* must define OFFCORE_RSP_X first, see intel_fixup_er() */ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x800ff3ffffffffffull, RSP_0), @@ -4847,6 +4858,8 @@ PMU_FORMAT_ATTR(ldlat, "config1:0-15"); PMU_FORMAT_ATTR(frontend, "config1:0-23"); +PMU_FORMAT_ATTR(snoop_rsp, "config1:0-63"); + static struct attribute *intel_arch3_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -4877,6 +4890,13 @@ static struct attribute *slm_format_attr[] = { NULL }; +static struct attribute *cmt_format_attr[] = { + &format_attr_offcore_rsp.attr, + &format_attr_ldlat.attr, + &format_attr_snoop_rsp.attr, + NULL +}; + static struct attribute *skl_format_attr[] = { &format_attr_frontend.attr, NULL, @@ -5656,7 +5676,6 @@ static struct attribute *adl_hybrid_extra_attr[] = { NULL }; -PMU_FORMAT_ATTR_SHOW(snoop_rsp, "config1:0-63"); FORMAT_ATTR_HYBRID(snoop_rsp, hybrid_small); static struct attribute *mtl_hybrid_extra_attr_rtm[] = { @@ -6174,7 +6193,7 @@ __init int intel_pmu_init(void) name = "Tremont"; break; - case INTEL_FAM6_ALDERLAKE_N: + case INTEL_FAM6_ATOM_GRACEMONT: x86_pmu.mid_ack = true; memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -6204,6 +6223,37 @@ __init int intel_pmu_init(void) name = "gracemont"; break; + case INTEL_FAM6_ATOM_CRESTMONT: + case INTEL_FAM6_ATOM_CRESTMONT_X: + x86_pmu.mid_ack = true; + memcpy(hw_cache_event_ids, glp_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + memcpy(hw_cache_extra_regs, tnt_hw_cache_extra_regs, + sizeof(hw_cache_extra_regs)); + hw_cache_event_ids[C(ITLB)][C(OP_READ)][C(RESULT_ACCESS)] = -1; + + x86_pmu.event_constraints = intel_slm_event_constraints; + x86_pmu.pebs_constraints = intel_grt_pebs_event_constraints; + x86_pmu.extra_regs = intel_cmt_extra_regs; + + x86_pmu.pebs_aliases = NULL; + x86_pmu.pebs_prec_dist = true; + x86_pmu.lbr_pt_coexist = true; + x86_pmu.pebs_block = true; + x86_pmu.flags |= PMU_FL_HAS_RSP_1; + x86_pmu.flags |= PMU_FL_INSTR_LATENCY; + + intel_pmu_pebs_data_source_cmt(); + x86_pmu.pebs_latency_data = mtl_latency_data_small; + x86_pmu.get_event_constraints = cmt_get_event_constraints; + x86_pmu.limit_period = spr_limit_period; + td_attr = cmt_events_attrs; + mem_attr = grt_mem_attrs; + extra_attr = cmt_format_attr; + pr_cont("Crestmont events, "); + name = "crestmont"; + break; + case INTEL_FAM6_WESTMERE: case INTEL_FAM6_WESTMERE_EP: case INTEL_FAM6_WESTMERE_EX: diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 835862c548cc..96fffb2d521d 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -365,13 +365,11 @@ static void cstate_pmu_event_update(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; u64 prev_raw_count, new_raw_count; -again: prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = cstate_pmu_read_counter(event); - - if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, - new_raw_count) != prev_raw_count) - goto again; + do { + new_raw_count = cstate_pmu_read_counter(event); + } while (!local64_try_cmpxchg(&hwc->prev_count, + &prev_raw_count, new_raw_count)); local64_add(new_raw_count - prev_raw_count, &event->count); } @@ -671,6 +669,7 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &glm_cstates), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT, &glm_cstates), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &glm_cstates), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, &icl_cstates), @@ -686,7 +685,6 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &icl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_cstates), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_cstates), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_cstates), diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index df88576d6b2a..eb8dd8b8a1e8 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -144,7 +144,7 @@ void __init intel_pmu_pebs_data_source_adl(void) __intel_pmu_pebs_data_source_grt(data_source); } -static void __init intel_pmu_pebs_data_source_cmt(u64 *data_source) +static void __init __intel_pmu_pebs_data_source_cmt(u64 *data_source) { data_source[0x07] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOPX, FWD); data_source[0x08] = OP_LH | P(LVL, L3) | LEVEL(L3) | P(SNOOP, HITM); @@ -164,7 +164,12 @@ void __init intel_pmu_pebs_data_source_mtl(void) data_source = x86_pmu.hybrid_pmu[X86_HYBRID_PMU_ATOM_IDX].pebs_data_source; memcpy(data_source, pebs_data_source, sizeof(pebs_data_source)); - intel_pmu_pebs_data_source_cmt(data_source); + __intel_pmu_pebs_data_source_cmt(data_source); +} + +void __init intel_pmu_pebs_data_source_cmt(void) +{ + __intel_pmu_pebs_data_source_cmt(pebs_data_source); } static u64 precise_store_data(u64 status) diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index bc226603ef3e..69043e02e8a7 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1858,7 +1858,6 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &rkl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &adl_uncore_init), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &adl_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_S, &adl_uncore_init), @@ -1867,6 +1866,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &spr_uncore_init), X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D, &snr_uncore_init), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &adl_uncore_init), {}, }; MODULE_DEVICE_TABLE(x86cpu, intel_uncore_match); diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index d49e90dc04a4..4d349986f76a 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -1502,7 +1502,7 @@ static int snbep_pci2phy_map_init(int devid, int nodeid_loc, int idmap_loc, bool pci_dev_put(ubox_dev); - return err ? pcibios_err_to_errno(err) : 0; + return pcibios_err_to_errno(err); } int snbep_uncore_pci_init(void) diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index 0feaaa571303..9e237b30f017 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -106,7 +106,7 @@ static bool test_intel(int idx, void *data) case INTEL_FAM6_ROCKETLAKE: case INTEL_FAM6_ALDERLAKE: case INTEL_FAM6_ALDERLAKE_L: - case INTEL_FAM6_ALDERLAKE_N: + case INTEL_FAM6_ATOM_GRACEMONT: case INTEL_FAM6_RAPTORLAKE: case INTEL_FAM6_RAPTORLAKE_P: case INTEL_FAM6_RAPTORLAKE_S: @@ -244,12 +244,10 @@ static void msr_event_update(struct perf_event *event) s64 delta; /* Careful, an NMI might modify the previous event value: */ -again: prev = local64_read(&event->hw.prev_count); - now = msr_read_counter(event); - - if (local64_cmpxchg(&event->hw.prev_count, prev, now) != prev) - goto again; + do { + now = msr_read_counter(event); + } while (!local64_try_cmpxchg(&event->hw.prev_count, &prev, now)); delta = now - prev; if (unlikely(event->hw.event_base == MSR_SMI_COUNT)) { diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index d6de4487348c..c8ba2be7585d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1606,6 +1606,8 @@ void intel_pmu_pebs_data_source_grt(void); void intel_pmu_pebs_data_source_mtl(void); +void intel_pmu_pebs_data_source_cmt(void); + int intel_pmu_setup_lbr_filter(struct perf_event *event); void intel_pt_interrupt(void); diff --git a/arch/x86/events/rapl.c b/arch/x86/events/rapl.c index 52e6e7ed4f78..1579429846cc 100644 --- a/arch/x86/events/rapl.c +++ b/arch/x86/events/rapl.c @@ -804,7 +804,7 @@ static const struct x86_cpu_id rapl_model_match[] __initconst = { X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &model_skl), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &model_skl), + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, &model_skl), X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &model_spr), X86_MATCH_INTEL_FAM6_MODEL(EMERALDRAPIDS_X, &model_spr), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, &model_skl), diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c index 1fbda2f94184..b21335e6a210 100644 --- a/arch/x86/hyperv/hv_apic.c +++ b/arch/x86/hyperv/hv_apic.c @@ -107,7 +107,6 @@ static bool cpu_is_self(int cpu) static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, bool exclude_self) { - struct hv_send_ipi_ex **arg; struct hv_send_ipi_ex *ipi_arg; unsigned long flags; int nr_bank = 0; @@ -117,9 +116,8 @@ static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector, return false; local_irq_save(flags); - arg = (struct hv_send_ipi_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + ipi_arg = *this_cpu_ptr(hyperv_pcpu_input_arg); - ipi_arg = *arg; if (unlikely(!ipi_arg)) goto ipi_mask_ex_done; diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 6c04b52f139b..953e280c07c3 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -14,6 +14,7 @@ #include <asm/apic.h> #include <asm/desc.h> #include <asm/sev.h> +#include <asm/ibt.h> #include <asm/hypervisor.h> #include <asm/hyperv-tlfs.h> #include <asm/mshyperv.h> @@ -472,6 +473,26 @@ void __init hyperv_init(void) } /* + * Some versions of Hyper-V that provide IBT in guest VMs have a bug + * in that there's no ENDBR64 instruction at the entry to the + * hypercall page. Because hypercalls are invoked via an indirect call + * to the hypercall page, all hypercall attempts fail when IBT is + * enabled, and Linux panics. For such buggy versions, disable IBT. + * + * Fixed versions of Hyper-V always provide ENDBR64 on the hypercall + * page, so if future Linux kernel versions enable IBT for 32-bit + * builds, additional hypercall page hackery will be required here + * to provide an ENDBR32. + */ +#ifdef CONFIG_X86_KERNEL_IBT + if (cpu_feature_enabled(X86_FEATURE_IBT) && + *(u32 *)hv_hypercall_pg != gen_endbr()) { + setup_clear_cpu_cap(X86_FEATURE_IBT); + pr_warn("Hyper-V: Disabling IBT because of Hyper-V bug\n"); + } +#endif + + /* * hyperv_init() is called before LAPIC is initialized: see * apic_intr_mode_init() -> x86_platform.apic_post_init() and * apic_bsp_setup() -> setup_local_APIC(). The direct-mode STIMER diff --git a/arch/x86/hyperv/hv_vtl.c b/arch/x86/hyperv/hv_vtl.c index 85d38b9f3586..db5d2ea39fc0 100644 --- a/arch/x86/hyperv/hv_vtl.c +++ b/arch/x86/hyperv/hv_vtl.c @@ -25,6 +25,10 @@ void __init hv_vtl_init_platform(void) x86_init.irqs.pre_vector_init = x86_init_noop; x86_init.timers.timer_init = x86_init_noop; + /* Avoid searching for BIOS MP tables */ + x86_init.mpparse.find_smp_config = x86_init_noop; + x86_init.mpparse.get_smp_config = x86_init_uint_noop; + x86_platform.get_wallclock = get_rtc_noop; x86_platform.set_wallclock = set_rtc_noop; x86_platform.get_nmi_reason = hv_get_nmi_reason; diff --git a/arch/x86/hyperv/ivm.c b/arch/x86/hyperv/ivm.c index 14f46ad2ca64..28be6df88063 100644 --- a/arch/x86/hyperv/ivm.c +++ b/arch/x86/hyperv/ivm.c @@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(hv_ghcb_msr_read); static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], enum hv_mem_host_visibility visibility) { - struct hv_gpa_range_for_visibility **input_pcpu, *input; + struct hv_gpa_range_for_visibility *input; u16 pages_processed; u64 hv_status; unsigned long flags; @@ -263,9 +263,8 @@ static int hv_mark_gpa_visibility(u16 count, const u64 pfn[], } local_irq_save(flags); - input_pcpu = (struct hv_gpa_range_for_visibility **) - this_cpu_ptr(hyperv_pcpu_input_arg); - input = *input_pcpu; + input = *this_cpu_ptr(hyperv_pcpu_input_arg); + if (unlikely(!input)) { local_irq_restore(flags); return -EINVAL; diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 8460bd35e10c..1cc113200ff5 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -61,7 +61,6 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus, const struct flush_tlb_info *info) { int cpu, vcpu, gva_n, max_gvas; - struct hv_tlb_flush **flush_pcpu; struct hv_tlb_flush *flush; u64 status; unsigned long flags; @@ -74,10 +73,7 @@ static void hyperv_flush_tlb_multi(const struct cpumask *cpus, local_irq_save(flags); - flush_pcpu = (struct hv_tlb_flush **) - this_cpu_ptr(hyperv_pcpu_input_arg); - - flush = *flush_pcpu; + flush = *this_cpu_ptr(hyperv_pcpu_input_arg); if (unlikely(!flush)) { local_irq_restore(flags); @@ -178,17 +174,13 @@ static u64 hyperv_flush_tlb_others_ex(const struct cpumask *cpus, const struct flush_tlb_info *info) { int nr_bank = 0, max_gvas, gva_n; - struct hv_tlb_flush_ex **flush_pcpu; struct hv_tlb_flush_ex *flush; u64 status; if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) return HV_STATUS_INVALID_PARAMETER; - flush_pcpu = (struct hv_tlb_flush_ex **) - this_cpu_ptr(hyperv_pcpu_input_arg); - - flush = *flush_pcpu; + flush = *this_cpu_ptr(hyperv_pcpu_input_arg); if (info->mm) { /* diff --git a/arch/x86/hyperv/nested.c b/arch/x86/hyperv/nested.c index 5d70968c8538..9dc259fa322e 100644 --- a/arch/x86/hyperv/nested.c +++ b/arch/x86/hyperv/nested.c @@ -19,7 +19,6 @@ int hyperv_flush_guest_mapping(u64 as) { - struct hv_guest_mapping_flush **flush_pcpu; struct hv_guest_mapping_flush *flush; u64 status; unsigned long flags; @@ -30,10 +29,7 @@ int hyperv_flush_guest_mapping(u64 as) local_irq_save(flags); - flush_pcpu = (struct hv_guest_mapping_flush **) - this_cpu_ptr(hyperv_pcpu_input_arg); - - flush = *flush_pcpu; + flush = *this_cpu_ptr(hyperv_pcpu_input_arg); if (unlikely(!flush)) { local_irq_restore(flags); @@ -90,7 +86,6 @@ EXPORT_SYMBOL_GPL(hyperv_fill_flush_guest_mapping_list); int hyperv_flush_guest_mapping_range(u64 as, hyperv_fill_flush_list_func fill_flush_list_func, void *data) { - struct hv_guest_mapping_flush_list **flush_pcpu; struct hv_guest_mapping_flush_list *flush; u64 status; unsigned long flags; @@ -102,10 +97,8 @@ int hyperv_flush_guest_mapping_range(u64 as, local_irq_save(flags); - flush_pcpu = (struct hv_guest_mapping_flush_list **) - this_cpu_ptr(hyperv_pcpu_input_arg); + flush = *this_cpu_ptr(hyperv_pcpu_input_arg); - flush = *flush_pcpu; if (unlikely(!flush)) { local_irq_restore(flags); goto fault; diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index 8eb74cf386db..c8a7fc23f63c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -6,7 +6,7 @@ * Copyright (C) 2001 Paul Diefenbaugh <paul.s.diefenbaugh@intel.com> * Copyright (C) 2001 Patrick Mochel <mochel@osdl.org> */ -#include <acpi/pdc_intel.h> +#include <acpi/proc_cap_intel.h> #include <asm/numa.h> #include <asm/fixmap.h> @@ -15,6 +15,7 @@ #include <asm/mpspec.h> #include <asm/x86_init.h> #include <asm/cpufeature.h> +#include <asm/irq_vectors.h> #ifdef CONFIG_ACPI_APEI # include <asm/pgtable_types.h> @@ -31,6 +32,7 @@ extern int acpi_skip_timer_override; extern int acpi_use_timer_override; extern int acpi_fix_pin2_polarity; extern int acpi_disable_cmcff; +extern bool acpi_int_src_ovr[NR_IRQS_LEGACY]; extern u8 acpi_sci_flags; extern u32 acpi_sci_override_gsi; @@ -100,23 +102,31 @@ static inline bool arch_has_acpi_pdc(void) c->x86_vendor == X86_VENDOR_CENTAUR); } -static inline void arch_acpi_set_pdc_bits(u32 *buf) +static inline void arch_acpi_set_proc_cap_bits(u32 *cap) { struct cpuinfo_x86 *c = &cpu_data(0); - buf[2] |= ACPI_PDC_C_CAPABILITY_SMP; + *cap |= ACPI_PROC_CAP_C_CAPABILITY_SMP; + + /* Enable coordination with firmware's _TSD info */ + *cap |= ACPI_PROC_CAP_SMP_T_SWCOORD; if (cpu_has(c, X86_FEATURE_EST)) - buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP; + *cap |= ACPI_PROC_CAP_EST_CAPABILITY_SWSMP; if (cpu_has(c, X86_FEATURE_ACPI)) - buf[2] |= ACPI_PDC_T_FFH; + *cap |= ACPI_PROC_CAP_T_FFH; + + if (cpu_has(c, X86_FEATURE_HWP)) + *cap |= ACPI_PROC_CAP_COLLAB_PROC_PERF; /* - * If mwait/monitor is unsupported, C2/C3_FFH will be disabled + * If mwait/monitor is unsupported, C_C1_FFH and + * C2/C3_FFH will be disabled. */ - if (!cpu_has(c, X86_FEATURE_MWAIT)) - buf[2] &= ~(ACPI_PDC_C_C2C3_FFH); + if (!cpu_has(c, X86_FEATURE_MWAIT) || + boot_option_idle_override == IDLE_NOMWAIT) + *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH); } static inline bool acpi_has_cpu_in_madt(void) diff --git a/arch/x86/include/asm/boot.h b/arch/x86/include/asm/boot.h index 9191280d9ea3..4ae14339cb8c 100644 --- a/arch/x86/include/asm/boot.h +++ b/arch/x86/include/asm/boot.h @@ -62,4 +62,12 @@ # define BOOT_STACK_SIZE 0x1000 #endif +#ifndef __ASSEMBLY__ +extern unsigned int output_len; +extern const unsigned long kernel_total_size; + +unsigned long decompress_kernel(unsigned char *outbuf, unsigned long virt_addr, + void (*error)(char *x)); +#endif + #endif /* _ASM_X86_BOOT_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index cb8ca46213be..b69b0d7756aa 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -14,7 +14,7 @@ * Defines x86 CPU feature bits */ #define NCAPINTS 21 /* N 32-bit words worth of info */ -#define NBUGINTS 1 /* N 32-bit bug flags */ +#define NBUGINTS 2 /* N 32-bit bug flags */ /* * Note: If the comment begins with a quoted string, that string is used @@ -309,6 +309,10 @@ #define X86_FEATURE_SMBA (11*32+21) /* "" Slow Memory Bandwidth Allocation */ #define X86_FEATURE_BMEC (11*32+22) /* "" Bandwidth Monitoring Event Configuration */ +#define X86_FEATURE_SRSO (11*32+24) /* "" AMD BTB untrain RETs */ +#define X86_FEATURE_SRSO_ALIAS (11*32+25) /* "" AMD BTB untrain RETs through aliasing */ +#define X86_FEATURE_IBPB_ON_VMEXIT (11*32+26) /* "" Issue an IBPB only on VMEXIT */ + /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ @@ -442,6 +446,10 @@ #define X86_FEATURE_AUTOIBRS (20*32+ 8) /* "" Automatic IBRS */ #define X86_FEATURE_NO_SMM_CTL_MSR (20*32+ 9) /* "" SMM_CTL MSR is not present */ +#define X86_FEATURE_SBPB (20*32+27) /* "" Selective Branch Prediction Barrier */ +#define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* "" MSR_PRED_CMD[IBPB] flushes all branch type predictions */ +#define X86_FEATURE_SRSO_NO (20*32+29) /* "" CPU is not affected by SRSO */ + /* * BUG word(s) */ @@ -483,5 +491,9 @@ #define X86_BUG_RETBLEED X86_BUG(27) /* CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_SMT_RSB X86_BUG(29) /* CPU is vulnerable to Cross-Thread Return Address Predictions */ +#define X86_BUG_GDS X86_BUG(30) /* CPU is affected by Gather Data Sampling */ +/* BUG word 2 */ +#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* AMD DIV0 speculation bug */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index b8f1dc0761e4..9931e4c7d73f 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -71,6 +71,12 @@ static inline u64 mul_u32_u32(u32 a, u32 b) } #define mul_u32_u32 mul_u32_u32 +/* + * __div64_32() is never called on x86, so prevent the + * generic definition from getting built. + */ +#define __div64_32 + #else # include <asm-generic/div64.h> diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index 8b4be7cecdb8..b0994ae3bc23 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -90,6 +90,8 @@ static inline void efi_fpu_end(void) } #ifdef CONFIG_X86_32 +#define EFI_X86_KERNEL_ALLOC_LIMIT (SZ_512M - 1) + #define arch_efi_call_virt_setup() \ ({ \ efi_fpu_begin(); \ @@ -103,8 +105,7 @@ static inline void efi_fpu_end(void) }) #else /* !CONFIG_X86_32 */ - -#define EFI_LOADER_SIGNATURE "EL64" +#define EFI_X86_KERNEL_ALLOC_LIMIT EFI_ALLOC_LIMIT extern asmlinkage u64 __efi_call(void *fp, ...); @@ -218,6 +219,8 @@ efi_status_t efi_set_virtual_address_map(unsigned long memory_map_size, #ifdef CONFIG_EFI_MIXED +#define EFI_ALLOC_LIMIT (efi_is_64bit() ? ULONG_MAX : U32_MAX) + #define ARCH_HAS_EFISTUB_WRAPPERS static inline bool efi_is_64bit(void) diff --git a/arch/x86/include/asm/entry-common.h b/arch/x86/include/asm/entry-common.h index 117903881fe4..ce8f50192ae3 100644 --- a/arch/x86/include/asm/entry-common.h +++ b/arch/x86/include/asm/entry-common.h @@ -92,6 +92,7 @@ static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs, static __always_inline void arch_exit_to_user_mode(void) { mds_user_clear_cpu_buffers(); + amd_clear_divider(); } #define arch_exit_to_user_mode arch_exit_to_user_mode diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index b3af2d45bbbb..5fcd85fd64fd 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -98,8 +98,6 @@ #define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */ #define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */ -#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ - #define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */ #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ @@ -112,21 +110,24 @@ #define INTEL_FAM6_GRANITERAPIDS_X 0xAD #define INTEL_FAM6_GRANITERAPIDS_D 0xAE +/* "Hybrid" Processors (P-Core/E-Core) */ + +#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ + #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ -#define INTEL_FAM6_ALDERLAKE_N 0xBE -#define INTEL_FAM6_RAPTORLAKE 0xB7 +#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */ #define INTEL_FAM6_RAPTORLAKE_P 0xBA #define INTEL_FAM6_RAPTORLAKE_S 0xBF #define INTEL_FAM6_METEORLAKE 0xAC #define INTEL_FAM6_METEORLAKE_L 0xAA -#define INTEL_FAM6_LUNARLAKE_M 0xBD - #define INTEL_FAM6_ARROWLAKE 0xC6 +#define INTEL_FAM6_LUNARLAKE_M 0xBD + /* "Small Core" Processors (Atom/E-Core) */ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ @@ -154,9 +155,10 @@ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ -#define INTEL_FAM6_SIERRAFOREST_X 0xAF +#define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */ -#define INTEL_FAM6_GRANDRIDGE 0xB6 +#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ +#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ /* Xeon Phi */ diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index 0953aa32a324..97a3de7892d3 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -21,7 +21,7 @@ #define FUNCTION_PADDING #endif -#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BULID_VDSO) +#if (CONFIG_FUNCTION_ALIGNMENT > 8) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO) # define __FUNC_ALIGN __ALIGN; FUNCTION_PADDING #else # define __FUNC_ALIGN __ALIGN diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 56d4ef604b91..635132a12778 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -127,8 +127,8 @@ static inline long local_cmpxchg(local_t *l, long old, long new) static inline bool local_try_cmpxchg(local_t *l, long *old, long new) { - typeof(l->a.counter) *__old = (typeof(l->a.counter) *) old; - return try_cmpxchg_local(&l->a.counter, __old, new); + return try_cmpxchg_local(&l->a.counter, + (typeof(l->a.counter) *) old, new); } /* Always has a lock prefix */ diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h index 7f97a8a97e24..473b16d73b47 100644 --- a/arch/x86/include/asm/mem_encrypt.h +++ b/arch/x86/include/asm/mem_encrypt.h @@ -50,8 +50,8 @@ void __init sme_enable(struct boot_params *bp); int __init early_set_memory_decrypted(unsigned long vaddr, unsigned long size); int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size); -void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, - bool enc); +void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, + unsigned long size, bool enc); void __init mem_encrypt_free_decrypted_mem(void); @@ -85,7 +85,7 @@ early_set_memory_decrypted(unsigned long vaddr, unsigned long size) { return 0; static inline int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } static inline void __init -early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) {} +early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) {} static inline void mem_encrypt_free_decrypted_mem(void) { } diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 66dbba181bd9..bbbe9d744977 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -2,138 +2,77 @@ #ifndef _ASM_X86_MICROCODE_H #define _ASM_X86_MICROCODE_H -#include <asm/cpu.h> -#include <linux/earlycpio.h> -#include <linux/initrd.h> -#include <asm/microcode_amd.h> - -struct ucode_patch { - struct list_head plist; - void *data; /* Intel uses only this one */ - unsigned int size; - u32 patch_id; - u16 equiv_cpu; -}; - -extern struct list_head microcode_cache; - struct cpu_signature { unsigned int sig; unsigned int pf; unsigned int rev; }; -struct device; - -enum ucode_state { - UCODE_OK = 0, - UCODE_NEW, - UCODE_UPDATED, - UCODE_NFOUND, - UCODE_ERROR, -}; - -struct microcode_ops { - enum ucode_state (*request_microcode_fw) (int cpu, struct device *); - - void (*microcode_fini_cpu) (int cpu); - - /* - * The generic 'microcode_core' part guarantees that - * the callbacks below run on a target cpu when they - * are being called. - * See also the "Synchronization" section in microcode_core.c. - */ - enum ucode_state (*apply_microcode) (int cpu); - int (*collect_cpu_info) (int cpu, struct cpu_signature *csig); -}; - struct ucode_cpu_info { struct cpu_signature cpu_sig; void *mc; }; -extern struct ucode_cpu_info ucode_cpu_info[]; -struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa); - -#ifdef CONFIG_MICROCODE_INTEL -extern struct microcode_ops * __init init_intel_microcode(void); -#else -static inline struct microcode_ops * __init init_intel_microcode(void) -{ - return NULL; -} -#endif /* CONFIG_MICROCODE_INTEL */ -#ifdef CONFIG_MICROCODE_AMD -extern struct microcode_ops * __init init_amd_microcode(void); -extern void __exit exit_amd_microcode(void); +#ifdef CONFIG_MICROCODE +void load_ucode_bsp(void); +void load_ucode_ap(void); +void microcode_bsp_resume(void); #else -static inline struct microcode_ops * __init init_amd_microcode(void) -{ - return NULL; -} -static inline void __exit exit_amd_microcode(void) {} +static inline void load_ucode_bsp(void) { } +static inline void load_ucode_ap(void) { } +static inline void microcode_bsp_resume(void) { } #endif -#define MAX_UCODE_COUNT 128 +#ifdef CONFIG_CPU_SUP_INTEL +/* Intel specific microcode defines. Public for IFS */ +struct microcode_header_intel { + unsigned int hdrver; + unsigned int rev; + unsigned int date; + unsigned int sig; + unsigned int cksum; + unsigned int ldrver; + unsigned int pf; + unsigned int datasize; + unsigned int totalsize; + unsigned int metasize; + unsigned int reserved[2]; +}; -#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) -#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') -#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') -#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') -#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') -#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') -#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') +struct microcode_intel { + struct microcode_header_intel hdr; + unsigned int bits[]; +}; -#define CPUID_IS(a, b, c, ebx, ecx, edx) \ - (!((ebx ^ (a))|(edx ^ (b))|(ecx ^ (c)))) +#define DEFAULT_UCODE_DATASIZE (2000) +#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) +#define MC_HEADER_TYPE_MICROCODE 1 +#define MC_HEADER_TYPE_IFS 2 -/* - * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. - * x86_cpuid_vendor() gets vendor id for BSP. - * - * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify - * coding, we still use x86_cpuid_vendor() to get vendor id for AP. - * - * x86_cpuid_vendor() gets vendor information directly from CPUID. - */ -static inline int x86_cpuid_vendor(void) +static inline int intel_microcode_get_datasize(struct microcode_header_intel *hdr) { - u32 eax = 0x00000000; - u32 ebx, ecx = 0, edx; - - native_cpuid(&eax, &ebx, &ecx, &edx); - - if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) - return X86_VENDOR_INTEL; - - if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) - return X86_VENDOR_AMD; - - return X86_VENDOR_UNKNOWN; + return hdr->datasize ? : DEFAULT_UCODE_DATASIZE; } -static inline unsigned int x86_cpuid_family(void) +static inline u32 intel_get_microcode_revision(void) { - u32 eax = 0x00000001; - u32 ebx, ecx = 0, edx; + u32 rev, dummy; + + native_wrmsrl(MSR_IA32_UCODE_REV, 0); - native_cpuid(&eax, &ebx, &ecx, &edx); + /* As documented in the SDM: Do a CPUID 1 here */ + native_cpuid_eax(1); - return x86_family(eax); + /* get the current revision from MSR 0x8B */ + native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev); + + return rev; } -#ifdef CONFIG_MICROCODE -extern void __init load_ucode_bsp(void); -extern void load_ucode_ap(void); -void reload_early_microcode(unsigned int cpu); -extern bool initrd_gone; -void microcode_bsp_resume(void); -#else -static inline void __init load_ucode_bsp(void) { } -static inline void load_ucode_ap(void) { } -static inline void reload_early_microcode(unsigned int cpu) { } -static inline void microcode_bsp_resume(void) { } -#endif +void show_ucode_info_early(void); + +#else /* CONFIG_CPU_SUP_INTEL */ +static inline void show_ucode_info_early(void) { } +#endif /* !CONFIG_CPU_SUP_INTEL */ #endif /* _ASM_X86_MICROCODE_H */ diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h deleted file mode 100644 index 9675c621c1ca..000000000000 --- a/arch/x86/include/asm/microcode_amd.h +++ /dev/null @@ -1,60 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MICROCODE_AMD_H -#define _ASM_X86_MICROCODE_AMD_H - -#include <asm/microcode.h> - -#define UCODE_MAGIC 0x00414d44 -#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 -#define UCODE_UCODE_TYPE 0x00000001 - -#define SECTION_HDR_SIZE 8 -#define CONTAINER_HDR_SZ 12 - -struct equiv_cpu_entry { - u32 installed_cpu; - u32 fixed_errata_mask; - u32 fixed_errata_compare; - u16 equiv_cpu; - u16 res; -} __attribute__((packed)); - -struct microcode_header_amd { - u32 data_code; - u32 patch_id; - u16 mc_patch_data_id; - u8 mc_patch_data_len; - u8 init_flag; - u32 mc_patch_data_checksum; - u32 nb_dev_id; - u32 sb_dev_id; - u16 processor_rev_id; - u8 nb_rev_id; - u8 sb_rev_id; - u8 bios_api_rev; - u8 reserved1[3]; - u32 match_reg[8]; -} __attribute__((packed)); - -struct microcode_amd { - struct microcode_header_amd hdr; - unsigned int mpb[]; -}; - -#define PATCH_MAX_SIZE (3 * PAGE_SIZE) - -#ifdef CONFIG_MICROCODE_AMD -extern void __init load_ucode_amd_bsp(unsigned int family); -extern void load_ucode_amd_ap(unsigned int family); -extern int __init save_microcode_in_initrd_amd(unsigned int family); -void reload_ucode_amd(unsigned int cpu); -extern void amd_check_microcode(void); -#else -static inline void __init load_ucode_amd_bsp(unsigned int family) {} -static inline void load_ucode_amd_ap(unsigned int family) {} -static inline int __init -save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } -static inline void reload_ucode_amd(unsigned int cpu) {} -static inline void amd_check_microcode(void) {} -#endif -#endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/include/asm/microcode_intel.h b/arch/x86/include/asm/microcode_intel.h deleted file mode 100644 index f1fa979e05bf..000000000000 --- a/arch/x86/include/asm/microcode_intel.h +++ /dev/null @@ -1,88 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _ASM_X86_MICROCODE_INTEL_H -#define _ASM_X86_MICROCODE_INTEL_H - -#include <asm/microcode.h> - -struct microcode_header_intel { - unsigned int hdrver; - unsigned int rev; - unsigned int date; - unsigned int sig; - unsigned int cksum; - unsigned int ldrver; - unsigned int pf; - unsigned int datasize; - unsigned int totalsize; - unsigned int metasize; - unsigned int reserved[2]; -}; - -struct microcode_intel { - struct microcode_header_intel hdr; - unsigned int bits[]; -}; - -/* microcode format is extended from prescott processors */ -struct extended_signature { - unsigned int sig; - unsigned int pf; - unsigned int cksum; -}; - -struct extended_sigtable { - unsigned int count; - unsigned int cksum; - unsigned int reserved[3]; - struct extended_signature sigs[]; -}; - -#define DEFAULT_UCODE_DATASIZE (2000) -#define MC_HEADER_SIZE (sizeof(struct microcode_header_intel)) -#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) -#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) -#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) -#define MC_HEADER_TYPE_MICROCODE 1 -#define MC_HEADER_TYPE_IFS 2 - -#define get_totalsize(mc) \ - (((struct microcode_intel *)mc)->hdr.datasize ? \ - ((struct microcode_intel *)mc)->hdr.totalsize : \ - DEFAULT_UCODE_TOTALSIZE) - -#define get_datasize(mc) \ - (((struct microcode_intel *)mc)->hdr.datasize ? \ - ((struct microcode_intel *)mc)->hdr.datasize : DEFAULT_UCODE_DATASIZE) - -#define exttable_size(et) ((et)->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE) - -static inline u32 intel_get_microcode_revision(void) -{ - u32 rev, dummy; - - native_wrmsrl(MSR_IA32_UCODE_REV, 0); - - /* As documented in the SDM: Do a CPUID 1 here */ - native_cpuid_eax(1); - - /* get the current revision from MSR 0x8B */ - native_rdmsr(MSR_IA32_UCODE_REV, dummy, rev); - - return rev; -} - -#ifdef CONFIG_MICROCODE_INTEL -extern void __init load_ucode_intel_bsp(void); -extern void load_ucode_intel_ap(void); -extern void show_ucode_info_early(void); -extern int __init save_microcode_in_initrd_intel(void); -void reload_ucode_intel(void); -#else -static inline __init void load_ucode_intel_bsp(void) {} -static inline void load_ucode_intel_ap(void) {} -static inline void show_ucode_info_early(void) {} -static inline int __init save_microcode_in_initrd_intel(void) { return -EINVAL; } -static inline void reload_ucode_intel(void) {} -#endif - -#endif /* _ASM_X86_MICROCODE_INTEL_H */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 88d9ef98e087..fa83d88e4c99 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -5,7 +5,7 @@ #include <linux/types.h> #include <linux/nmi.h> #include <linux/msi.h> -#include <asm/io.h> +#include <linux/io.h> #include <asm/hyperv-tlfs.h> #include <asm/nospec-branch.h> #include <asm/paravirt.h> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a00a53e15ab7..1d111350197f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -57,6 +57,7 @@ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */ +#define PRED_CMD_SBPB BIT(7) /* Selective Branch Prediction Barrier */ #define MSR_PPIN_CTL 0x0000004e #define MSR_PPIN 0x0000004f @@ -155,6 +156,15 @@ * Not susceptible to Post-Barrier * Return Stack Buffer Predictions. */ +#define ARCH_CAP_GDS_CTRL BIT(25) /* + * CPU is vulnerable to Gather + * Data Sampling (GDS) and + * has controls for mitigation. + */ +#define ARCH_CAP_GDS_NO BIT(26) /* + * CPU is not vulnerable to Gather + * Data Sampling (GDS). + */ #define ARCH_CAP_XAPIC_DISABLE BIT(21) /* * IA32_XAPIC_DISABLE_STATUS MSR @@ -178,6 +188,8 @@ #define RNGDS_MITG_DIS BIT(0) /* SRBDS support */ #define RTM_ALLOW BIT(1) /* TSX development mode */ #define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */ +#define GDS_MITG_DIS BIT(4) /* Disable GDS mitigation */ +#define GDS_MITG_LOCKED BIT(5) /* GDS mitigation locked */ #define MSR_IA32_SYSENTER_CS 0x00000174 #define MSR_IA32_SYSENTER_ESP 0x00000175 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1a65cf4acb2b..c55cc243592e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -211,7 +211,8 @@ * eventually turn into it's own annotation. */ .macro VALIDATE_UNRET_END -#if defined(CONFIG_NOINSTR_VALIDATION) && defined(CONFIG_CPU_UNRET_ENTRY) +#if defined(CONFIG_NOINSTR_VALIDATION) && \ + (defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_SRSO)) ANNOTATE_RETPOLINE_SAFE nop #endif @@ -271,9 +272,9 @@ .endm #ifdef CONFIG_CPU_UNRET_ENTRY -#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret" +#define CALL_UNTRAIN_RET "call entry_untrain_ret" #else -#define CALL_ZEN_UNTRAIN_RET "" +#define CALL_UNTRAIN_RET "" #endif /* @@ -281,7 +282,7 @@ * return thunk isn't mapped into the userspace tables (then again, AMD * typically has NO_MELTDOWN). * - * While zen_untrain_ret() doesn't clobber anything but requires stack, + * While retbleed_untrain_ret() doesn't clobber anything but requires stack, * entry_ibpb() will clobber AX, CX, DX. * * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point @@ -289,21 +290,32 @@ */ .macro UNTRAIN_RET #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ - defined(CONFIG_CALL_DEPTH_TRACKING) + defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO) VALIDATE_UNRET_END ALTERNATIVE_3 "", \ - CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH #endif .endm +.macro UNTRAIN_RET_VM +#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ + defined(CONFIG_CALL_DEPTH_TRACKING) || defined(CONFIG_CPU_SRSO) + VALIDATE_UNRET_END + ALTERNATIVE_3 "", \ + CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ + "call entry_ibpb", X86_FEATURE_IBPB_ON_VMEXIT, \ + __stringify(RESET_CALL_DEPTH), X86_FEATURE_CALL_DEPTH +#endif +.endm + .macro UNTRAIN_RET_FROM_CALL #if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY) || \ defined(CONFIG_CALL_DEPTH_TRACKING) VALIDATE_UNRET_END ALTERNATIVE_3 "", \ - CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \ + CALL_UNTRAIN_RET, X86_FEATURE_UNRET, \ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB, \ __stringify(RESET_CALL_DEPTH_FROM_CALL), X86_FEATURE_CALL_DEPTH #endif @@ -330,15 +342,24 @@ extern retpoline_thunk_t __x86_indirect_thunk_array[]; extern retpoline_thunk_t __x86_indirect_call_thunk_array[]; extern retpoline_thunk_t __x86_indirect_jump_thunk_array[]; +#ifdef CONFIG_RETHUNK extern void __x86_return_thunk(void); -extern void zen_untrain_ret(void); +#else +static inline void __x86_return_thunk(void) {} +#endif + +extern void retbleed_return_thunk(void); +extern void srso_return_thunk(void); +extern void srso_alias_return_thunk(void); + +extern void retbleed_untrain_ret(void); +extern void srso_untrain_ret(void); +extern void srso_alias_untrain_ret(void); + +extern void entry_untrain_ret(void); extern void entry_ibpb(void); -#ifdef CONFIG_CALL_THUNKS extern void (*x86_return_thunk)(void); -#else -#define x86_return_thunk (&__x86_return_thunk) -#endif #ifdef CONFIG_CALL_DEPTH_TRACKING extern void __x86_return_skl(void); @@ -465,9 +486,6 @@ enum ssb_mitigation { SPEC_STORE_BYPASS_SECCOMP, }; -extern char __indirect_thunk_start[]; -extern char __indirect_thunk_end[]; - static __always_inline void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) { @@ -479,11 +497,11 @@ void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) : "memory"); } +extern u64 x86_pred_cmd; + static inline void indirect_branch_prediction_barrier(void) { - u64 val = PRED_CMD_IBPB; - - alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); + alternative_msr_write(MSR_IA32_PRED_CMD, x86_pred_cmd, X86_FEATURE_USE_IBPB); } /* The Intel SPEC CTRL MSR base value cache */ diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index b49778664d2b..6c8ff12140ae 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -739,6 +739,7 @@ static __always_inline unsigned long arch_local_irq_save(void) ".popsection") extern void default_banner(void); +void native_pv_lock_init(void) __init; #else /* __ASSEMBLY__ */ @@ -778,6 +779,12 @@ extern void default_banner(void); #endif /* __ASSEMBLY__ */ #else /* CONFIG_PARAVIRT */ # define default_banner x86_init_noop + +#ifndef __ASSEMBLY__ +static inline void native_pv_lock_init(void) +{ +} +#endif #endif /* !CONFIG_PARAVIRT */ #ifndef __ASSEMBLY__ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index d46300e94f85..861e53e201e9 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -586,7 +586,6 @@ extern char ignore_fpu_irq; #define HAVE_ARCH_PICK_MMAP_LAYOUT 1 #define ARCH_HAS_PREFETCHW -#define ARCH_HAS_SPINLOCK_PREFETCH #ifdef CONFIG_X86_32 # define BASE_PREFETCH "" @@ -620,11 +619,6 @@ static __always_inline void prefetchw(const void *x) "m" (*(const char *)x)); } -static inline void spin_lock_prefetch(const void *x) -{ - prefetchw(x); -} - #define TOP_OF_INIT_STACK ((unsigned long)&init_stack + sizeof(init_stack) - \ TOP_OF_KERNEL_STACK_PADDING) @@ -682,9 +676,15 @@ extern u16 get_llc_id(unsigned int cpu); #ifdef CONFIG_CPU_SUP_AMD extern u32 amd_get_nodes_per_socket(void); extern u32 amd_get_highest_perf(void); +extern bool cpu_has_ibpb_brtype_microcode(void); +extern void amd_clear_divider(void); +extern void amd_check_microcode(void); #else static inline u32 amd_get_nodes_per_socket(void) { return 0; } static inline u32 amd_get_highest_perf(void) { return 0; } +static inline bool cpu_has_ibpb_brtype_microcode(void) { return false; } +static inline void amd_clear_divider(void) { } +static inline void amd_check_microcode(void) { } #endif extern unsigned long arch_align_stack(unsigned long sp); @@ -727,4 +727,6 @@ bool arch_is_platform_page(u64 paddr); #define arch_is_platform_page arch_is_platform_page #endif +extern bool gds_ucode_mitigated(void); + #endif /* _ASM_X86_PROCESSOR_H */ diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index d87451df480b..cde8357bb226 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -74,8 +74,6 @@ static inline bool vcpu_is_preempted(long cpu) */ DECLARE_STATIC_KEY_TRUE(virt_spin_lock_key); -void native_pv_lock_init(void) __init; - /* * Shortcut for the queued_spin_lock_slowpath() function that allows * virt to hijack it. @@ -103,10 +101,7 @@ static inline bool virt_spin_lock(struct qspinlock *lock) return true; } -#else -static inline void native_pv_lock_init(void) -{ -} + #endif /* CONFIG_PARAVIRT */ #include <asm-generic/qspinlock.h> diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 42b17cf10b10..85b6e3609cb9 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -4,6 +4,8 @@ #include <asm/ibt.h> +void __lockfunc __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked); + /* * For x86-64, PV_CALLEE_SAVE_REGS_THUNK() saves and restores 8 64-bit * registers. For i386, however, only 1 32-bit register needs to be saved diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 794f69625780..9d6411c65920 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -56,7 +56,7 @@ #define GDT_ENTRY_INVALID_SEG 0 -#ifdef CONFIG_X86_32 +#if defined(CONFIG_X86_32) && !defined(BUILD_VDSO32_64) /* * The layout of the per-CPU GDT under Linux: * diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 66c806784c52..5b4a1ce3d368 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -164,6 +164,7 @@ static __always_inline void sev_es_nmi_complete(void) __sev_es_nmi_complete(); } extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd); +extern void sev_enable(struct boot_params *bp); static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { @@ -210,12 +211,15 @@ bool snp_init(struct boot_params *bp); void __init __noreturn snp_abort(void); int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct snp_guest_request_ioctl *rio); void snp_accept_memory(phys_addr_t start, phys_addr_t end); +u64 snp_get_unsupported_features(u64 status); +u64 sev_get_status(void); #else static inline void sev_es_ist_enter(struct pt_regs *regs) { } static inline void sev_es_ist_exit(void) { } static inline int sev_es_setup_ap_jump_table(struct real_mode_header *rmh) { return 0; } static inline void sev_es_nmi_complete(void) { } static inline int sev_es_efi_map_ghcbs(pgd_t *pgd) { return 0; } +static inline void sev_enable(struct boot_params *bp) { } static inline int pvalidate(unsigned long vaddr, bool rmp_psize, bool validate) { return 0; } static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs) { return 0; } static inline void setup_ghcb(void) { } @@ -235,6 +239,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in } static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { } +static inline u64 snp_get_unsupported_features(u64 status) { return 0; } +static inline u64 sev_get_status(void) { return 0; } #endif #endif diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index caf41c4869a0..3235ba1e5b06 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -136,10 +136,11 @@ static inline int topology_max_smt_threads(void) return __max_smt_threads; } +#include <linux/cpu_smt.h> + int topology_update_package_map(unsigned int apicid, unsigned int cpu); int topology_update_die_map(unsigned int dieid, unsigned int cpu); int topology_phys_to_logical_pkg(unsigned int pkg); -bool topology_smt_supported(void); extern struct cpumask __cpu_primary_thread_mask; #define cpu_primary_thread_mask ((const struct cpumask *)&__cpu_primary_thread_mask) @@ -162,7 +163,6 @@ static inline int topology_phys_to_logical_pkg(unsigned int pkg) { return 0; } static inline int topology_max_die_per_package(void) { return 1; } static inline int topology_max_smt_threads(void) { return 1; } static inline bool topology_is_primary_thread(unsigned int cpu) { return true; } -static inline bool topology_smt_supported(void) { return false; } #endif /* !CONFIG_SMP */ static inline void arch_fix_phys_package_id(int num, u32 slot) diff --git a/arch/x86/include/asm/uv/bios.h b/arch/x86/include/asm/uv/bios.h index 1b6455f881f9..6989b824fd32 100644 --- a/arch/x86/include/asm/uv/bios.h +++ b/arch/x86/include/asm/uv/bios.h @@ -10,6 +10,7 @@ * Copyright (c) Russ Anderson <rja@sgi.com> */ +#include <linux/efi.h> #include <linux/rtc.h> /* @@ -115,7 +116,8 @@ struct uv_arch_type_entry { struct uv_systab { char signature[4]; /* must be UV_SYSTAB_SIG */ u32 revision; /* distinguish different firmware revs */ - u64 function; /* BIOS runtime callback function ptr */ + u64 (__efiapi *function)(enum uv_bios_cmd, ...); + /* BIOS runtime callback function ptr */ u32 size; /* systab size (starting with _VERSION_UV4) */ struct { u32 type:8; /* type of entry */ diff --git a/arch/x86/include/asm/xen/page.h b/arch/x86/include/asm/xen/page.h index fa9ec20783fa..85e63d58c074 100644 --- a/arch/x86/include/asm/xen/page.h +++ b/arch/x86/include/asm/xen/page.h @@ -295,7 +295,10 @@ static inline unsigned long bfn_to_local_pfn(unsigned long mfn) /* VIRT <-> MACHINE conversion */ #define virt_to_machine(v) (phys_to_machine(XPADDR(__pa(v)))) -#define virt_to_pfn(v) (PFN_DOWN(__pa(v))) +static inline unsigned long virt_to_pfn(const void *v) +{ + return PFN_DOWN(__pa(v)); +} #define virt_to_mfn(v) (pfn_to_mfn(virt_to_pfn(v))) #define mfn_to_virt(m) (__va(mfn_to_pfn(m) << PAGE_SHIFT)) diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 21b542a6866c..53369c57751e 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -52,6 +52,7 @@ int acpi_lapic; int acpi_ioapic; int acpi_strict; int acpi_disable_cmcff; +bool acpi_int_src_ovr[NR_IRQS_LEGACY]; /* ACPI SCI override configuration */ u8 acpi_sci_flags __initdata; @@ -588,6 +589,9 @@ acpi_parse_int_src_ovr(union acpi_subtable_headers * header, acpi_table_print_madt_entry(&header->common); + if (intsrc->source_irq < NR_IRQS_LEGACY) + acpi_int_src_ovr[intsrc->source_irq] = true; + if (intsrc->source_irq == acpi_gbl_FADT.sci_interrupt) { acpi_sci_ioapic_setup(intsrc->source_irq, intsrc->inti_flags & ACPI_MADT_POLARITY_MASK, diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 2dcf3a06af09..a5ead6a6d233 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -687,10 +687,6 @@ void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) #ifdef CONFIG_RETHUNK -#ifdef CONFIG_CALL_THUNKS -void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; -#endif - /* * Rewrite the compiler generated return thunk tail-calls. * @@ -1531,6 +1527,7 @@ static noinline void __init int3_selftest(void) static __initdata int __alt_reloc_selftest_addr; +extern void __init __alt_reloc_selftest(void *arg); __visible noinline void __init __alt_reloc_selftest(void *arg) { WARN_ON(arg != &__alt_reloc_selftest_addr); diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 035a3db5330b..356de955e78d 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -24,6 +24,8 @@ #define PCI_DEVICE_ID_AMD_19H_M40H_ROOT 0x14b5 #define PCI_DEVICE_ID_AMD_19H_M60H_ROOT 0x14d8 #define PCI_DEVICE_ID_AMD_19H_M70H_ROOT 0x14e8 +#define PCI_DEVICE_ID_AMD_1AH_M00H_ROOT 0x153a +#define PCI_DEVICE_ID_AMD_1AH_M20H_ROOT 0x1507 #define PCI_DEVICE_ID_AMD_MI200_ROOT 0x14bb #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 @@ -39,6 +41,7 @@ #define PCI_DEVICE_ID_AMD_19H_M60H_DF_F4 0x14e4 #define PCI_DEVICE_ID_AMD_19H_M70H_DF_F4 0x14f4 #define PCI_DEVICE_ID_AMD_19H_M78H_DF_F4 0x12fc +#define PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4 0x12c4 #define PCI_DEVICE_ID_AMD_MI200_DF_F4 0x14d4 /* Protect the PCI config register pairs used for SMN. */ @@ -56,6 +59,8 @@ static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_ROOT) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_ROOT) }, {} }; @@ -85,6 +90,8 @@ static const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M60H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M70H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M78H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M20H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F3) }, {} }; @@ -106,6 +113,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M40H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_19H_M50H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_1AH_M00H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_MI200_DF_F4) }, {} }; diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 2a6509e8c840..9bfd6e397384 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c @@ -301,6 +301,7 @@ void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector) local_irq_restore(flags); } +#ifdef CONFIG_SMP /* must come after the send_IPI functions above for inlining */ static int convert_apicid_to_cpu(int apic_id) { @@ -329,3 +330,4 @@ int safe_smp_processor_id(void) return cpuid >= 0 ? cpuid : 0; } #endif +#endif diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d9384d5b4b8e..b524dee1cbbb 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -294,8 +294,7 @@ static void __init early_get_apic_socketid_shift(void) static void __init uv_stringify(int len, char *to, char *from) { - /* Relies on 'to' being NULL chars so result will be NULL terminated */ - strncpy(to, from, len-1); + strscpy(to, from, len); /* Trim trailing spaces */ (void)strim(to); @@ -1013,7 +1012,7 @@ static void __init calc_mmioh_map(enum mmioh_arch index, /* One (UV2) mapping */ if (index == UV2_MMIOH) { - strncpy(id, "MMIOH", sizeof(id)); + strscpy(id, "MMIOH", sizeof(id)); max_io = max_pnode; mapped = 0; goto map_exit; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 26ad7ca423e7..7eca6a8abbb1 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -73,8 +73,13 @@ static const int amd_erratum_1054[] = static const int amd_zenbleed[] = AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x30, 0x0, 0x4f, 0xf), AMD_MODEL_RANGE(0x17, 0x60, 0x0, 0x7f, 0xf), + AMD_MODEL_RANGE(0x17, 0x90, 0x0, 0x91, 0xf), AMD_MODEL_RANGE(0x17, 0xa0, 0x0, 0xaf, 0xf)); +static const int amd_div0[] = + AMD_LEGACY_ERRATUM(AMD_MODEL_RANGE(0x17, 0x00, 0x0, 0x2f, 0xf), + AMD_MODEL_RANGE(0x17, 0x50, 0x0, 0x5f, 0xf)); + static bool cpu_has_amd_erratum(struct cpuinfo_x86 *cpu, const int *erratum) { int osvw_id = *erratum++; @@ -1130,6 +1135,11 @@ static void init_amd(struct cpuinfo_x86 *c) WARN_ON_ONCE(msr_set_bit(MSR_EFER, _EFER_AUTOIBRS)); zenbleed_check(c); + + if (cpu_has_amd_erratum(c, amd_div0)) { + pr_notice_once("AMD Zen1 DIV0 bug detected. Disable SMT for full protection.\n"); + setup_force_cpu_bug(X86_BUG_DIV0); + } } #ifdef CONFIG_X86_32 @@ -1290,3 +1300,33 @@ void amd_check_microcode(void) { on_each_cpu(zenbleed_check_cpu, NULL, 1); } + +bool cpu_has_ibpb_brtype_microcode(void) +{ + switch (boot_cpu_data.x86) { + /* Zen1/2 IBPB flushes branch type predictions too. */ + case 0x17: + return boot_cpu_has(X86_FEATURE_AMD_IBPB); + case 0x19: + /* Poke the MSR bit on Zen3/4 to check its presence. */ + if (!wrmsrl_safe(MSR_IA32_PRED_CMD, PRED_CMD_SBPB)) { + setup_force_cpu_cap(X86_FEATURE_SBPB); + return true; + } else { + return false; + } + default: + return false; + } +} + +/* + * Issue a DIV 0/1 insn to clear any division data from previous DIV + * operations. + */ +void noinstr amd_clear_divider(void) +{ + asm volatile(ALTERNATIVE("", "div %2\n\t", X86_BUG_DIV0) + :: "a" (0), "d" (0), "r" (1)); +} +EXPORT_SYMBOL_GPL(amd_clear_divider); diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 95507448e781..f081d26616ac 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -47,6 +47,8 @@ static void __init taa_select_mitigation(void); static void __init mmio_select_mitigation(void); static void __init srbds_select_mitigation(void); static void __init l1d_flush_select_mitigation(void); +static void __init srso_select_mitigation(void); +static void __init gds_select_mitigation(void); /* The base value of the SPEC_CTRL MSR without task-specific bits set */ u64 x86_spec_ctrl_base; @@ -56,8 +58,13 @@ EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); DEFINE_PER_CPU(u64, x86_spec_ctrl_current); EXPORT_SYMBOL_GPL(x86_spec_ctrl_current); +u64 x86_pred_cmd __ro_after_init = PRED_CMD_IBPB; +EXPORT_SYMBOL_GPL(x86_pred_cmd); + static DEFINE_MUTEX(spec_ctrl_mutex); +void (*x86_return_thunk)(void) __ro_after_init = &__x86_return_thunk; + /* Update SPEC_CTRL MSR and its cached copy unconditionally */ static void update_spec_ctrl(u64 val) { @@ -160,6 +167,13 @@ void __init cpu_select_mitigations(void) md_clear_select_mitigation(); srbds_select_mitigation(); l1d_flush_select_mitigation(); + + /* + * srso_select_mitigation() depends and must run after + * retbleed_select_mitigation(). + */ + srso_select_mitigation(); + gds_select_mitigation(); } /* @@ -646,6 +660,149 @@ static int __init l1d_flush_parse_cmdline(char *str) early_param("l1d_flush", l1d_flush_parse_cmdline); #undef pr_fmt +#define pr_fmt(fmt) "GDS: " fmt + +enum gds_mitigations { + GDS_MITIGATION_OFF, + GDS_MITIGATION_UCODE_NEEDED, + GDS_MITIGATION_FORCE, + GDS_MITIGATION_FULL, + GDS_MITIGATION_FULL_LOCKED, + GDS_MITIGATION_HYPERVISOR, +}; + +#if IS_ENABLED(CONFIG_GDS_FORCE_MITIGATION) +static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FORCE; +#else +static enum gds_mitigations gds_mitigation __ro_after_init = GDS_MITIGATION_FULL; +#endif + +static const char * const gds_strings[] = { + [GDS_MITIGATION_OFF] = "Vulnerable", + [GDS_MITIGATION_UCODE_NEEDED] = "Vulnerable: No microcode", + [GDS_MITIGATION_FORCE] = "Mitigation: AVX disabled, no microcode", + [GDS_MITIGATION_FULL] = "Mitigation: Microcode", + [GDS_MITIGATION_FULL_LOCKED] = "Mitigation: Microcode (locked)", + [GDS_MITIGATION_HYPERVISOR] = "Unknown: Dependent on hypervisor status", +}; + +bool gds_ucode_mitigated(void) +{ + return (gds_mitigation == GDS_MITIGATION_FULL || + gds_mitigation == GDS_MITIGATION_FULL_LOCKED); +} +EXPORT_SYMBOL_GPL(gds_ucode_mitigated); + +void update_gds_msr(void) +{ + u64 mcu_ctrl_after; + u64 mcu_ctrl; + + switch (gds_mitigation) { + case GDS_MITIGATION_OFF: + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + mcu_ctrl |= GDS_MITG_DIS; + break; + case GDS_MITIGATION_FULL_LOCKED: + /* + * The LOCKED state comes from the boot CPU. APs might not have + * the same state. Make sure the mitigation is enabled on all + * CPUs. + */ + case GDS_MITIGATION_FULL: + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + mcu_ctrl &= ~GDS_MITG_DIS; + break; + case GDS_MITIGATION_FORCE: + case GDS_MITIGATION_UCODE_NEEDED: + case GDS_MITIGATION_HYPERVISOR: + return; + }; + + wrmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + + /* + * Check to make sure that the WRMSR value was not ignored. Writes to + * GDS_MITG_DIS will be ignored if this processor is locked but the boot + * processor was not. + */ + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl_after); + WARN_ON_ONCE(mcu_ctrl != mcu_ctrl_after); +} + +static void __init gds_select_mitigation(void) +{ + u64 mcu_ctrl; + + if (!boot_cpu_has_bug(X86_BUG_GDS)) + return; + + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { + gds_mitigation = GDS_MITIGATION_HYPERVISOR; + goto out; + } + + if (cpu_mitigations_off()) + gds_mitigation = GDS_MITIGATION_OFF; + /* Will verify below that mitigation _can_ be disabled */ + + /* No microcode */ + if (!(x86_read_arch_cap_msr() & ARCH_CAP_GDS_CTRL)) { + if (gds_mitigation == GDS_MITIGATION_FORCE) { + /* + * This only needs to be done on the boot CPU so do it + * here rather than in update_gds_msr() + */ + setup_clear_cpu_cap(X86_FEATURE_AVX); + pr_warn("Microcode update needed! Disabling AVX as mitigation.\n"); + } else { + gds_mitigation = GDS_MITIGATION_UCODE_NEEDED; + } + goto out; + } + + /* Microcode has mitigation, use it */ + if (gds_mitigation == GDS_MITIGATION_FORCE) + gds_mitigation = GDS_MITIGATION_FULL; + + rdmsrl(MSR_IA32_MCU_OPT_CTRL, mcu_ctrl); + if (mcu_ctrl & GDS_MITG_LOCKED) { + if (gds_mitigation == GDS_MITIGATION_OFF) + pr_warn("Mitigation locked. Disable failed.\n"); + + /* + * The mitigation is selected from the boot CPU. All other CPUs + * _should_ have the same state. If the boot CPU isn't locked + * but others are then update_gds_msr() will WARN() of the state + * mismatch. If the boot CPU is locked update_gds_msr() will + * ensure the other CPUs have the mitigation enabled. + */ + gds_mitigation = GDS_MITIGATION_FULL_LOCKED; + } + + update_gds_msr(); +out: + pr_info("%s\n", gds_strings[gds_mitigation]); +} + +static int __init gds_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!boot_cpu_has_bug(X86_BUG_GDS)) + return 0; + + if (!strcmp(str, "off")) + gds_mitigation = GDS_MITIGATION_OFF; + else if (!strcmp(str, "force")) + gds_mitigation = GDS_MITIGATION_FORCE; + + return 0; +} +early_param("gather_data_sampling", gds_parse_cmdline); + +#undef pr_fmt #define pr_fmt(fmt) "Spectre V1 : " fmt enum spectre_v1_mitigation { @@ -885,6 +1042,9 @@ do_cmd_auto: setup_force_cpu_cap(X86_FEATURE_RETHUNK); setup_force_cpu_cap(X86_FEATURE_UNRET); + if (IS_ENABLED(CONFIG_RETHUNK)) + x86_return_thunk = retbleed_return_thunk; + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD && boot_cpu_data.x86_vendor != X86_VENDOR_HYGON) pr_err(RETBLEED_UNTRAIN_MSG); @@ -894,6 +1054,7 @@ do_cmd_auto: case RETBLEED_MITIGATION_IBPB: setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); mitigate_smt = true; break; @@ -2188,6 +2349,170 @@ static int __init l1tf_cmdline(char *str) early_param("l1tf", l1tf_cmdline); #undef pr_fmt +#define pr_fmt(fmt) "Speculative Return Stack Overflow: " fmt + +enum srso_mitigation { + SRSO_MITIGATION_NONE, + SRSO_MITIGATION_MICROCODE, + SRSO_MITIGATION_SAFE_RET, + SRSO_MITIGATION_IBPB, + SRSO_MITIGATION_IBPB_ON_VMEXIT, +}; + +enum srso_mitigation_cmd { + SRSO_CMD_OFF, + SRSO_CMD_MICROCODE, + SRSO_CMD_SAFE_RET, + SRSO_CMD_IBPB, + SRSO_CMD_IBPB_ON_VMEXIT, +}; + +static const char * const srso_strings[] = { + [SRSO_MITIGATION_NONE] = "Vulnerable", + [SRSO_MITIGATION_MICROCODE] = "Mitigation: microcode", + [SRSO_MITIGATION_SAFE_RET] = "Mitigation: safe RET", + [SRSO_MITIGATION_IBPB] = "Mitigation: IBPB", + [SRSO_MITIGATION_IBPB_ON_VMEXIT] = "Mitigation: IBPB on VMEXIT only" +}; + +static enum srso_mitigation srso_mitigation __ro_after_init = SRSO_MITIGATION_NONE; +static enum srso_mitigation_cmd srso_cmd __ro_after_init = SRSO_CMD_SAFE_RET; + +static int __init srso_parse_cmdline(char *str) +{ + if (!str) + return -EINVAL; + + if (!strcmp(str, "off")) + srso_cmd = SRSO_CMD_OFF; + else if (!strcmp(str, "microcode")) + srso_cmd = SRSO_CMD_MICROCODE; + else if (!strcmp(str, "safe-ret")) + srso_cmd = SRSO_CMD_SAFE_RET; + else if (!strcmp(str, "ibpb")) + srso_cmd = SRSO_CMD_IBPB; + else if (!strcmp(str, "ibpb-vmexit")) + srso_cmd = SRSO_CMD_IBPB_ON_VMEXIT; + else + pr_err("Ignoring unknown SRSO option (%s).", str); + + return 0; +} +early_param("spec_rstack_overflow", srso_parse_cmdline); + +#define SRSO_NOTICE "WARNING: See https://kernel.org/doc/html/latest/admin-guide/hw-vuln/srso.html for mitigation options." + +static void __init srso_select_mitigation(void) +{ + bool has_microcode; + + if (!boot_cpu_has_bug(X86_BUG_SRSO) || cpu_mitigations_off()) + goto pred_cmd; + + /* + * The first check is for the kernel running as a guest in order + * for guests to verify whether IBPB is a viable mitigation. + */ + has_microcode = boot_cpu_has(X86_FEATURE_IBPB_BRTYPE) || cpu_has_ibpb_brtype_microcode(); + if (!has_microcode) { + pr_warn("IBPB-extending microcode not applied!\n"); + pr_warn(SRSO_NOTICE); + } else { + /* + * Enable the synthetic (even if in a real CPUID leaf) + * flags for guests. + */ + setup_force_cpu_cap(X86_FEATURE_IBPB_BRTYPE); + + /* + * Zen1/2 with SMT off aren't vulnerable after the right + * IBPB microcode has been applied. + */ + if (boot_cpu_data.x86 < 0x19 && !cpu_smt_possible()) { + setup_force_cpu_cap(X86_FEATURE_SRSO_NO); + return; + } + } + + if (retbleed_mitigation == RETBLEED_MITIGATION_IBPB) { + if (has_microcode) { + pr_err("Retbleed IBPB mitigation enabled, using same for SRSO\n"); + srso_mitigation = SRSO_MITIGATION_IBPB; + goto pred_cmd; + } + } + + switch (srso_cmd) { + case SRSO_CMD_OFF: + return; + + case SRSO_CMD_MICROCODE: + if (has_microcode) { + srso_mitigation = SRSO_MITIGATION_MICROCODE; + pr_warn(SRSO_NOTICE); + } + break; + + case SRSO_CMD_SAFE_RET: + if (IS_ENABLED(CONFIG_CPU_SRSO)) { + /* + * Enable the return thunk for generated code + * like ftrace, static_call, etc. + */ + setup_force_cpu_cap(X86_FEATURE_RETHUNK); + setup_force_cpu_cap(X86_FEATURE_UNRET); + + if (boot_cpu_data.x86 == 0x19) { + setup_force_cpu_cap(X86_FEATURE_SRSO_ALIAS); + x86_return_thunk = srso_alias_return_thunk; + } else { + setup_force_cpu_cap(X86_FEATURE_SRSO); + x86_return_thunk = srso_return_thunk; + } + srso_mitigation = SRSO_MITIGATION_SAFE_RET; + } else { + pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + goto pred_cmd; + } + break; + + case SRSO_CMD_IBPB: + if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) { + if (has_microcode) { + setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB); + srso_mitigation = SRSO_MITIGATION_IBPB; + } + } else { + pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n"); + goto pred_cmd; + } + break; + + case SRSO_CMD_IBPB_ON_VMEXIT: + if (IS_ENABLED(CONFIG_CPU_SRSO)) { + if (!boot_cpu_has(X86_FEATURE_ENTRY_IBPB) && has_microcode) { + setup_force_cpu_cap(X86_FEATURE_IBPB_ON_VMEXIT); + srso_mitigation = SRSO_MITIGATION_IBPB_ON_VMEXIT; + } + } else { + pr_err("WARNING: kernel not compiled with CPU_SRSO.\n"); + goto pred_cmd; + } + break; + + default: + break; + } + + pr_info("%s%s\n", srso_strings[srso_mitigation], (has_microcode ? "" : ", no microcode")); + +pred_cmd: + if ((boot_cpu_has(X86_FEATURE_SRSO_NO) || srso_cmd == SRSO_CMD_OFF) && + boot_cpu_has(X86_FEATURE_SBPB)) + x86_pred_cmd = PRED_CMD_SBPB; +} + +#undef pr_fmt #define pr_fmt(fmt) fmt #ifdef CONFIG_SYSFS @@ -2385,6 +2710,21 @@ static ssize_t retbleed_show_state(char *buf) return sysfs_emit(buf, "%s\n", retbleed_strings[retbleed_mitigation]); } +static ssize_t srso_show_state(char *buf) +{ + if (boot_cpu_has(X86_FEATURE_SRSO_NO)) + return sysfs_emit(buf, "Mitigation: SMT disabled\n"); + + return sysfs_emit(buf, "%s%s\n", + srso_strings[srso_mitigation], + (cpu_has_ibpb_brtype_microcode() ? "" : ", no microcode")); +} + +static ssize_t gds_show_state(char *buf) +{ + return sysfs_emit(buf, "%s\n", gds_strings[gds_mitigation]); +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -2434,6 +2774,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_RETBLEED: return retbleed_show_state(buf); + case X86_BUG_SRSO: + return srso_show_state(buf); + + case X86_BUG_GDS: + return gds_show_state(buf); + default: break; } @@ -2498,4 +2844,14 @@ ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, cha { return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED); } + +ssize_t cpu_show_spec_rstack_overflow(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SRSO); +} + +ssize_t cpu_show_gds(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_GDS); +} #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 0ba1067f4e5f..41b573f34a10 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -59,7 +59,6 @@ #include <asm/cacheinfo.h> #include <asm/memtype.h> #include <asm/microcode.h> -#include <asm/microcode_intel.h> #include <asm/intel-family.h> #include <asm/cpu_device_id.h> #include <asm/uv/uv.h> @@ -1250,6 +1249,10 @@ static const __initconst struct x86_cpu_id cpu_vuln_whitelist[] = { #define RETBLEED BIT(3) /* CPU is affected by SMT (cross-thread) return predictions */ #define SMT_RSB BIT(4) +/* CPU is affected by SRSO */ +#define SRSO BIT(5) +/* CPU is affected by GDS */ +#define GDS BIT(6) static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS), @@ -1262,27 +1265,30 @@ static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = { VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS), VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED | GDS), VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED), - VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO), - VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO | GDS), + VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO | GDS), + VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED), - VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), + VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED | GDS), + VULNBL_INTEL_STEPPINGS(TIGERLAKE_L, X86_STEPPING_ANY, GDS), + VULNBL_INTEL_STEPPINGS(TIGERLAKE, X86_STEPPING_ANY, GDS), VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED), - VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED), + VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED | GDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO), VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS), VULNBL_AMD(0x15, RETBLEED), VULNBL_AMD(0x16, RETBLEED), - VULNBL_AMD(0x17, RETBLEED | SMT_RSB), + VULNBL_AMD(0x17, RETBLEED | SMT_RSB | SRSO), VULNBL_HYGON(0x18, RETBLEED | SMT_RSB), + VULNBL_AMD(0x19, SRSO), {} }; @@ -1406,6 +1412,21 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) if (cpu_matches(cpu_vuln_blacklist, SMT_RSB)) setup_force_cpu_bug(X86_BUG_SMT_RSB); + if (!cpu_has(c, X86_FEATURE_SRSO_NO)) { + if (cpu_matches(cpu_vuln_blacklist, SRSO)) + setup_force_cpu_bug(X86_BUG_SRSO); + } + + /* + * Check if CPU is vulnerable to GDS. If running in a virtual machine on + * an affected processor, the VMM may have disabled the use of GATHER by + * disabling AVX2. The only way to do this in HW is to clear XCR0[2], + * which means that AVX will be disabled. + */ + if (cpu_matches(cpu_vuln_blacklist, GDS) && !(ia32_cap & ARCH_CAP_GDS_NO) && + boot_cpu_has(X86_FEATURE_AVX)) + setup_force_cpu_bug(X86_BUG_GDS); + if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN)) return; @@ -1962,6 +1983,8 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) validate_apic_and_package_id(c); x86_spec_ctrl_setup_ap(); update_srbds_msr(); + if (boot_cpu_has_bug(X86_BUG_GDS)) + update_gds_msr(); tsx_ap_init(); } @@ -2276,8 +2299,7 @@ void store_cpu_caps(struct cpuinfo_x86 *curr_info) * @prev_info: CPU capabilities stored before an update. * * The microcode loader calls this upon late microcode load to recheck features, - * only when microcode has been updated. Caller holds microcode_mutex and CPU - * hotplug lock. + * only when microcode has been updated. Caller holds and CPU hotplug lock. * * Return: None */ @@ -2319,7 +2341,7 @@ void __init arch_cpu_finalize_init(void) * identify_boot_cpu() initialized SMT support information, let the * core code know. */ - cpu_smt_check_topology(); + cpu_smt_set_num_threads(smp_num_siblings, smp_num_siblings); if (!IS_ENABLED(CONFIG_SMP)) { pr_info("CPU: "); diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 1c44630d4789..1dcd7d4e38ef 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -83,6 +83,7 @@ void cpu_select_mitigations(void); extern void x86_spec_ctrl_setup_ap(void); extern void update_srbds_msr(void); +extern void update_gds_msr(void); extern enum spectre_v2_mitigation spectre_v2_enabled; diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 1c4639588ff9..be4045628fd3 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -20,7 +20,7 @@ #include <asm/bugs.h> #include <asm/cpu.h> #include <asm/intel-family.h> -#include <asm/microcode_intel.h> +#include <asm/microcode.h> #include <asm/hwcap2.h> #include <asm/elf.h> #include <asm/cpu_device_id.h> @@ -184,180 +184,6 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c) return false; } -int intel_cpu_collect_info(struct ucode_cpu_info *uci) -{ - unsigned int val[2]; - unsigned int family, model; - struct cpu_signature csig = { 0 }; - unsigned int eax, ebx, ecx, edx; - - memset(uci, 0, sizeof(*uci)); - - eax = 0x00000001; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - csig.sig = eax; - - family = x86_family(eax); - model = x86_model(eax); - - if (model >= 5 || family > 6) { - /* get processor flags from MSR 0x17 */ - native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); - csig.pf = 1 << ((val[1] >> 18) & 7); - } - - csig.rev = intel_get_microcode_revision(); - - uci->cpu_sig = csig; - - return 0; -} -EXPORT_SYMBOL_GPL(intel_cpu_collect_info); - -/* - * Returns 1 if update has been found, 0 otherwise. - */ -int intel_find_matching_signature(void *mc, unsigned int csig, int cpf) -{ - struct microcode_header_intel *mc_hdr = mc; - struct extended_sigtable *ext_hdr; - struct extended_signature *ext_sig; - int i; - - if (intel_cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf)) - return 1; - - /* Look for ext. headers: */ - if (get_totalsize(mc_hdr) <= get_datasize(mc_hdr) + MC_HEADER_SIZE) - return 0; - - ext_hdr = mc + get_datasize(mc_hdr) + MC_HEADER_SIZE; - ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE; - - for (i = 0; i < ext_hdr->count; i++) { - if (intel_cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf)) - return 1; - ext_sig++; - } - return 0; -} -EXPORT_SYMBOL_GPL(intel_find_matching_signature); - -/** - * intel_microcode_sanity_check() - Sanity check microcode file. - * @mc: Pointer to the microcode file contents. - * @print_err: Display failure reason if true, silent if false. - * @hdr_type: Type of file, i.e. normal microcode file or In Field Scan file. - * Validate if the microcode header type matches with the type - * specified here. - * - * Validate certain header fields and verify if computed checksum matches - * with the one specified in the header. - * - * Return: 0 if the file passes all the checks, -EINVAL if any of the checks - * fail. - */ -int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type) -{ - unsigned long total_size, data_size, ext_table_size; - struct microcode_header_intel *mc_header = mc; - struct extended_sigtable *ext_header = NULL; - u32 sum, orig_sum, ext_sigcount = 0, i; - struct extended_signature *ext_sig; - - total_size = get_totalsize(mc_header); - data_size = get_datasize(mc_header); - - if (data_size + MC_HEADER_SIZE > total_size) { - if (print_err) - pr_err("Error: bad microcode data file size.\n"); - return -EINVAL; - } - - if (mc_header->ldrver != 1 || mc_header->hdrver != hdr_type) { - if (print_err) - pr_err("Error: invalid/unknown microcode update format. Header type %d\n", - mc_header->hdrver); - return -EINVAL; - } - - ext_table_size = total_size - (MC_HEADER_SIZE + data_size); - if (ext_table_size) { - u32 ext_table_sum = 0; - u32 *ext_tablep; - - if (ext_table_size < EXT_HEADER_SIZE || - ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { - if (print_err) - pr_err("Error: truncated extended signature table.\n"); - return -EINVAL; - } - - ext_header = mc + MC_HEADER_SIZE + data_size; - if (ext_table_size != exttable_size(ext_header)) { - if (print_err) - pr_err("Error: extended signature table size mismatch.\n"); - return -EFAULT; - } - - ext_sigcount = ext_header->count; - - /* - * Check extended table checksum: the sum of all dwords that - * comprise a valid table must be 0. - */ - ext_tablep = (u32 *)ext_header; - - i = ext_table_size / sizeof(u32); - while (i--) - ext_table_sum += ext_tablep[i]; - - if (ext_table_sum) { - if (print_err) - pr_warn("Bad extended signature table checksum, aborting.\n"); - return -EINVAL; - } - } - - /* - * Calculate the checksum of update data and header. The checksum of - * valid update data and header including the extended signature table - * must be 0. - */ - orig_sum = 0; - i = (MC_HEADER_SIZE + data_size) / sizeof(u32); - while (i--) - orig_sum += ((u32 *)mc)[i]; - - if (orig_sum) { - if (print_err) - pr_err("Bad microcode data checksum, aborting.\n"); - return -EINVAL; - } - - if (!ext_table_size) - return 0; - - /* - * Check extended signature checksum: 0 => valid. - */ - for (i = 0; i < ext_sigcount; i++) { - ext_sig = (void *)ext_header + EXT_HEADER_SIZE + - EXT_SIGNATURE_SIZE * i; - - sum = (mc_header->sig + mc_header->pf + mc_header->cksum) - - (ext_sig->sig + ext_sig->pf + ext_sig->cksum); - if (sum) { - if (print_err) - pr_err("Bad extended signature checksum, aborting.\n"); - return -EINVAL; - } - } - return 0; -} -EXPORT_SYMBOL_GPL(intel_microcode_sanity_check); - static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; diff --git a/arch/x86/kernel/cpu/intel_epb.c b/arch/x86/kernel/cpu/intel_epb.c index 3b8476158236..e4c3ba91321c 100644 --- a/arch/x86/kernel/cpu/intel_epb.c +++ b/arch/x86/kernel/cpu/intel_epb.c @@ -206,7 +206,7 @@ static int intel_epb_offline(unsigned int cpu) static const struct x86_cpu_id intel_epb_normal[] = { X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, ENERGY_PERF_BIAS_NORMAL_POWERSAVE), - X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, + X86_MATCH_INTEL_FAM6_MODEL(ATOM_GRACEMONT, ENERGY_PERF_BIAS_NORMAL_POWERSAVE), X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, ENERGY_PERF_BIAS_NORMAL_POWERSAVE), diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 89e2aab5d34d..6f35f724cc14 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -843,6 +843,26 @@ static noinstr bool quirk_skylake_repmov(void) } /* + * Some Zen-based Instruction Fetch Units set EIPV=RIPV=0 on poison consumption + * errors. This means mce_gather_info() will not save the "ip" and "cs" registers. + * + * However, the context is still valid, so save the "cs" register for later use. + * + * The "ip" register is truly unknown, so don't save it or fixup EIPV/RIPV. + * + * The Instruction Fetch Unit is at MCA bank 1 for all affected systems. + */ +static __always_inline void quirk_zen_ifu(int bank, struct mce *m, struct pt_regs *regs) +{ + if (bank != 1) + return; + if (!(m->status & MCI_STATUS_POISON)) + return; + + m->cs = regs->cs; +} + +/* * Do a quick check if any of the events requires a panic. * This decides if we keep the events around or clear them. */ @@ -861,6 +881,9 @@ static __always_inline int mce_no_way_out(struct mce *m, char **msg, unsigned lo if (mce_flags.snb_ifu_quirk) quirk_sandybridge_ifu(i, m, regs); + if (mce_flags.zen_ifu_quirk) + quirk_zen_ifu(i, m, regs); + m->bank = i; if (mce_severity(m, regs, &tmp, true) >= MCE_PANIC_SEVERITY) { mce_read_aux(m, i); @@ -1608,6 +1631,13 @@ static void __start_timer(struct timer_list *t, unsigned long interval) local_irq_restore(flags); } +static void mc_poll_banks_default(void) +{ + machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); +} + +void (*mc_poll_banks)(void) = mc_poll_banks_default; + static void mce_timer_fn(struct timer_list *t) { struct timer_list *cpu_t = this_cpu_ptr(&mce_timer); @@ -1618,7 +1648,7 @@ static void mce_timer_fn(struct timer_list *t) iv = __this_cpu_read(mce_next_interval); if (mce_available(this_cpu_ptr(&cpu_info))) { - machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); + mc_poll_banks(); if (mce_intel_cmci_poll()) { iv = mce_adjust_timer(iv); @@ -1842,6 +1872,9 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) if (c->x86 == 0x15 && c->x86_model <= 0xf) mce_flags.overflow_recov = 1; + if (c->x86 >= 0x17 && c->x86 <= 0x1A) + mce_flags.zen_ifu_quirk = 1; + } if (c->x86_vendor == X86_VENDOR_INTEL) { diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index 95275a5e57e0..f5323551c1a9 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -56,6 +56,13 @@ static DEFINE_PER_CPU(int, cmci_backoff_cnt); */ static DEFINE_RAW_SPINLOCK(cmci_discover_lock); +/* + * On systems that do support CMCI but it's disabled, polling for MCEs can + * cause the same event to be reported multiple times because IA32_MCi_STATUS + * is shared by the same package. + */ +static DEFINE_SPINLOCK(cmci_poll_lock); + #define CMCI_THRESHOLD 1 #define CMCI_POLL_INTERVAL (30 * HZ) #define CMCI_STORM_INTERVAL (HZ) @@ -426,12 +433,22 @@ void cmci_disable_bank(int bank) raw_spin_unlock_irqrestore(&cmci_discover_lock, flags); } +/* Bank polling function when CMCI is disabled. */ +static void cmci_mc_poll_banks(void) +{ + spin_lock(&cmci_poll_lock); + machine_check_poll(0, this_cpu_ptr(&mce_poll_banks)); + spin_unlock(&cmci_poll_lock); +} + void intel_init_cmci(void) { int banks; - if (!cmci_supported(&banks)) + if (!cmci_supported(&banks)) { + mc_poll_banks = cmci_mc_poll_banks; return; + } mce_threshold_vector = intel_threshold_interrupt; cmci_discover(banks); diff --git a/arch/x86/kernel/cpu/mce/internal.h b/arch/x86/kernel/cpu/mce/internal.h index d2412ce2d312..bcf1b3c66c9c 100644 --- a/arch/x86/kernel/cpu/mce/internal.h +++ b/arch/x86/kernel/cpu/mce/internal.h @@ -157,6 +157,9 @@ struct mce_vendor_flags { */ smca : 1, + /* Zen IFU quirk */ + zen_ifu_quirk : 1, + /* AMD-style error thresholding banks present. */ amd_threshold : 1, @@ -172,7 +175,7 @@ struct mce_vendor_flags { /* Skylake, Cascade Lake, Cooper Lake REP;MOVS* quirk */ skx_repmov_quirk : 1, - __reserved_0 : 56; + __reserved_0 : 55; }; extern struct mce_vendor_flags mce_flags; @@ -274,4 +277,5 @@ static __always_inline u32 mca_msr_reg(int bank, enum mca_msr reg) return 0; } +extern void (*mc_poll_banks)(void); #endif /* __X86_MCE_INTERNAL_H__ */ diff --git a/arch/x86/kernel/cpu/microcode/Makefile b/arch/x86/kernel/cpu/microcode/Makefile index 34098d48c48f..193d98b33a0a 100644 --- a/arch/x86/kernel/cpu/microcode/Makefile +++ b/arch/x86/kernel/cpu/microcode/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only microcode-y := core.o obj-$(CONFIG_MICROCODE) += microcode.o -microcode-$(CONFIG_MICROCODE_INTEL) += intel.o -microcode-$(CONFIG_MICROCODE_AMD) += amd.o +microcode-$(CONFIG_CPU_SUP_INTEL) += intel.o +microcode-$(CONFIG_CPU_SUP_AMD) += amd.o diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 87208e46f7ed..bbd1dc38ea03 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -29,13 +29,53 @@ #include <linux/kernel.h> #include <linux/pci.h> -#include <asm/microcode_amd.h> #include <asm/microcode.h> #include <asm/processor.h> #include <asm/setup.h> #include <asm/cpu.h> #include <asm/msr.h> +#include "internal.h" + +#define UCODE_MAGIC 0x00414d44 +#define UCODE_EQUIV_CPU_TABLE_TYPE 0x00000000 +#define UCODE_UCODE_TYPE 0x00000001 + +#define SECTION_HDR_SIZE 8 +#define CONTAINER_HDR_SZ 12 + +struct equiv_cpu_entry { + u32 installed_cpu; + u32 fixed_errata_mask; + u32 fixed_errata_compare; + u16 equiv_cpu; + u16 res; +} __packed; + +struct microcode_header_amd { + u32 data_code; + u32 patch_id; + u16 mc_patch_data_id; + u8 mc_patch_data_len; + u8 init_flag; + u32 mc_patch_data_checksum; + u32 nb_dev_id; + u32 sb_dev_id; + u16 processor_rev_id; + u8 nb_rev_id; + u8 sb_rev_id; + u8 bios_api_rev; + u8 reserved1[3]; + u32 match_reg[8]; +} __packed; + +struct microcode_amd { + struct microcode_header_amd hdr; + unsigned int mpb[]; +}; + +#define PATCH_MAX_SIZE (3 * PAGE_SIZE) + static struct equiv_cpu_table { unsigned int num_entries; struct equiv_cpu_entry *entry; @@ -56,9 +96,6 @@ struct cont_desc { static u32 ucode_new_rev; -/* One blob per node. */ -static u8 amd_ucode_patch[MAX_NUMNODES][PATCH_MAX_SIZE]; - /* * Microcode patch container file is prepended to the initrd in cpio * format. See Documentation/arch/x86/microcode.rst @@ -415,20 +452,17 @@ static int __apply_microcode_amd(struct microcode_amd *mc) * * Returns true if container found (sets @desc), false otherwise. */ -static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size, bool save_patch) +static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size) { struct cont_desc desc = { 0 }; - u8 (*patch)[PATCH_MAX_SIZE]; struct microcode_amd *mc; u32 rev, dummy, *new_rev; bool ret = false; #ifdef CONFIG_X86_32 new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); - patch = (u8 (*)[PATCH_MAX_SIZE])__pa_nodebug(&amd_ucode_patch); #else new_rev = &ucode_new_rev; - patch = &amd_ucode_patch[0]; #endif desc.cpuid_1_eax = cpuid_1_eax; @@ -452,9 +486,6 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size, boo if (!__apply_microcode_amd(mc)) { *new_rev = mc->hdr.patch_id; ret = true; - - if (save_patch) - memcpy(patch, mc, min_t(u32, desc.psize, PATCH_MAX_SIZE)); } return ret; @@ -507,7 +538,7 @@ static void find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpio_data *ret = cp; } -void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) +static void apply_ucode_from_containers(unsigned int cpuid_1_eax) { struct cpio_data cp = { }; @@ -515,42 +546,12 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) if (!(cp.data && cp.size)) return; - early_apply_microcode(cpuid_1_eax, cp.data, cp.size, true); + early_apply_microcode(cpuid_1_eax, cp.data, cp.size); } -void load_ucode_amd_ap(unsigned int cpuid_1_eax) +void load_ucode_amd_early(unsigned int cpuid_1_eax) { - struct microcode_amd *mc; - struct cpio_data cp; - u32 *new_rev, rev, dummy; - - if (IS_ENABLED(CONFIG_X86_32)) { - mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); - new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); - } else { - mc = (struct microcode_amd *)amd_ucode_patch; - new_rev = &ucode_new_rev; - } - - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - - /* - * Check whether a new patch has been saved already. Also, allow application of - * the same revision in order to pick up SMT-thread-specific configuration even - * if the sibling SMT thread already has an up-to-date revision. - */ - if (*new_rev && rev <= mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) { - *new_rev = mc->hdr.patch_id; - return; - } - } - - find_blobs_in_containers(cpuid_1_eax, &cp); - if (!(cp.data && cp.size)) - return; - - early_apply_microcode(cpuid_1_eax, cp.data, cp.size, false); + return apply_ucode_from_containers(cpuid_1_eax); } static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); @@ -578,23 +579,6 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) return 0; } -void reload_ucode_amd(unsigned int cpu) -{ - u32 rev, dummy __always_unused; - struct microcode_amd *mc; - - mc = (struct microcode_amd *)amd_ucode_patch[cpu_to_node(cpu)]; - - rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - - if (rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) { - ucode_new_rev = mc->hdr.patch_id; - pr_info("reload patch_level=0x%08x\n", ucode_new_rev); - } - } -} - /* * a small, trivial cache of per-family ucode patches */ @@ -655,6 +639,28 @@ static struct ucode_patch *find_patch(unsigned int cpu) return cache_find_patch(equiv_id); } +void reload_ucode_amd(unsigned int cpu) +{ + u32 rev, dummy __always_unused; + struct microcode_amd *mc; + struct ucode_patch *p; + + p = find_patch(cpu); + if (!p) + return; + + mc = p->data; + + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + + if (rev < mc->hdr.patch_id) { + if (!__apply_microcode_amd(mc)) { + ucode_new_rev = mc->hdr.patch_id; + pr_info("reload patch_level=0x%08x\n", ucode_new_rev); + } + } +} + static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) { struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -875,9 +881,6 @@ static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t siz continue; ret = UCODE_NEW; - - memset(&amd_ucode_patch[nid], 0, PATCH_MAX_SIZE); - memcpy(&amd_ucode_patch[nid], p->data, min_t(u32, p->size, PATCH_MAX_SIZE)); } return ret; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 3afcf3de0dd4..6cc7a2c181da 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -31,15 +31,14 @@ #include <linux/fs.h> #include <linux/mm.h> -#include <asm/microcode_intel.h> #include <asm/cpu_device_id.h> -#include <asm/microcode_amd.h> #include <asm/perf_event.h> -#include <asm/microcode.h> #include <asm/processor.h> #include <asm/cmdline.h> #include <asm/setup.h> +#include "internal.h" + #define DRIVER_VERSION "2.2" static struct microcode_ops *microcode_ops; @@ -54,15 +53,12 @@ LIST_HEAD(microcode_cache); * * All non cpu-hotplug-callback call sites use: * - * - microcode_mutex to synchronize with each other; * - cpus_read_lock/unlock() to synchronize with * the cpu-hotplug-callback call sites. * * We guarantee that only a single cpu is being * updated at any particular moment of time. */ -static DEFINE_MUTEX(microcode_mutex); - struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; struct cpu_info_ctx { @@ -172,7 +168,7 @@ void __init load_ucode_bsp(void) if (intel) load_ucode_intel_bsp(); else - load_ucode_amd_bsp(cpuid_1_eax); + load_ucode_amd_early(cpuid_1_eax); } static bool check_loader_disabled_ap(void) @@ -200,7 +196,7 @@ void load_ucode_ap(void) break; case X86_VENDOR_AMD: if (x86_family(cpuid_1_eax) >= 0x10) - load_ucode_amd_ap(cpuid_1_eax); + load_ucode_amd_early(cpuid_1_eax); break; default: break; @@ -298,7 +294,7 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa) #endif } -void reload_early_microcode(unsigned int cpu) +static void reload_early_microcode(unsigned int cpu) { int vendor, family; @@ -488,10 +484,7 @@ static ssize_t reload_store(struct device *dev, if (tmp_ret != UCODE_NEW) goto put; - mutex_lock(µcode_mutex); ret = microcode_reload_late(); - mutex_unlock(µcode_mutex); - put: cpus_read_unlock(); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 467cf37ea90a..94dd6af9c963 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -10,15 +10,7 @@ * Copyright (C) 2012 Fenghua Yu <fenghua.yu@intel.com> * H Peter Anvin" <hpa@zytor.com> */ - -/* - * This needs to be before all headers so that pr_debug in printk.h doesn't turn - * printk calls into no_printk(). - * - *#define DEBUG - */ #define pr_fmt(fmt) "microcode: " fmt - #include <linux/earlycpio.h> #include <linux/firmware.h> #include <linux/uaccess.h> @@ -30,13 +22,14 @@ #include <linux/uio.h> #include <linux/mm.h> -#include <asm/microcode_intel.h> #include <asm/intel-family.h> #include <asm/processor.h> #include <asm/tlbflush.h> #include <asm/setup.h> #include <asm/msr.h> +#include "internal.h" + static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; /* Current microcode patch used in early patching on the APs. */ @@ -45,6 +38,208 @@ static struct microcode_intel *intel_ucode_patch; /* last level cache size per core */ static int llc_size_per_core; +/* microcode format is extended from prescott processors */ +struct extended_signature { + unsigned int sig; + unsigned int pf; + unsigned int cksum; +}; + +struct extended_sigtable { + unsigned int count; + unsigned int cksum; + unsigned int reserved[3]; + struct extended_signature sigs[]; +}; + +#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE) +#define EXT_HEADER_SIZE (sizeof(struct extended_sigtable)) +#define EXT_SIGNATURE_SIZE (sizeof(struct extended_signature)) + +static inline unsigned int get_totalsize(struct microcode_header_intel *hdr) +{ + return hdr->datasize ? hdr->totalsize : DEFAULT_UCODE_TOTALSIZE; +} + +static inline unsigned int exttable_size(struct extended_sigtable *et) +{ + return et->count * EXT_SIGNATURE_SIZE + EXT_HEADER_SIZE; +} + +int intel_cpu_collect_info(struct ucode_cpu_info *uci) +{ + unsigned int val[2]; + unsigned int family, model; + struct cpu_signature csig = { 0 }; + unsigned int eax, ebx, ecx, edx; + + memset(uci, 0, sizeof(*uci)); + + eax = 0x00000001; + ecx = 0; + native_cpuid(&eax, &ebx, &ecx, &edx); + csig.sig = eax; + + family = x86_family(eax); + model = x86_model(eax); + + if (model >= 5 || family > 6) { + /* get processor flags from MSR 0x17 */ + native_rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]); + csig.pf = 1 << ((val[1] >> 18) & 7); + } + + csig.rev = intel_get_microcode_revision(); + + uci->cpu_sig = csig; + + return 0; +} +EXPORT_SYMBOL_GPL(intel_cpu_collect_info); + +/* + * Returns 1 if update has been found, 0 otherwise. + */ +int intel_find_matching_signature(void *mc, unsigned int csig, int cpf) +{ + struct microcode_header_intel *mc_hdr = mc; + struct extended_sigtable *ext_hdr; + struct extended_signature *ext_sig; + int i; + + if (intel_cpu_signatures_match(csig, cpf, mc_hdr->sig, mc_hdr->pf)) + return 1; + + /* Look for ext. headers: */ + if (get_totalsize(mc_hdr) <= intel_microcode_get_datasize(mc_hdr) + MC_HEADER_SIZE) + return 0; + + ext_hdr = mc + intel_microcode_get_datasize(mc_hdr) + MC_HEADER_SIZE; + ext_sig = (void *)ext_hdr + EXT_HEADER_SIZE; + + for (i = 0; i < ext_hdr->count; i++) { + if (intel_cpu_signatures_match(csig, cpf, ext_sig->sig, ext_sig->pf)) + return 1; + ext_sig++; + } + return 0; +} +EXPORT_SYMBOL_GPL(intel_find_matching_signature); + +/** + * intel_microcode_sanity_check() - Sanity check microcode file. + * @mc: Pointer to the microcode file contents. + * @print_err: Display failure reason if true, silent if false. + * @hdr_type: Type of file, i.e. normal microcode file or In Field Scan file. + * Validate if the microcode header type matches with the type + * specified here. + * + * Validate certain header fields and verify if computed checksum matches + * with the one specified in the header. + * + * Return: 0 if the file passes all the checks, -EINVAL if any of the checks + * fail. + */ +int intel_microcode_sanity_check(void *mc, bool print_err, int hdr_type) +{ + unsigned long total_size, data_size, ext_table_size; + struct microcode_header_intel *mc_header = mc; + struct extended_sigtable *ext_header = NULL; + u32 sum, orig_sum, ext_sigcount = 0, i; + struct extended_signature *ext_sig; + + total_size = get_totalsize(mc_header); + data_size = intel_microcode_get_datasize(mc_header); + + if (data_size + MC_HEADER_SIZE > total_size) { + if (print_err) + pr_err("Error: bad microcode data file size.\n"); + return -EINVAL; + } + + if (mc_header->ldrver != 1 || mc_header->hdrver != hdr_type) { + if (print_err) + pr_err("Error: invalid/unknown microcode update format. Header type %d\n", + mc_header->hdrver); + return -EINVAL; + } + + ext_table_size = total_size - (MC_HEADER_SIZE + data_size); + if (ext_table_size) { + u32 ext_table_sum = 0; + u32 *ext_tablep; + + if (ext_table_size < EXT_HEADER_SIZE || + ((ext_table_size - EXT_HEADER_SIZE) % EXT_SIGNATURE_SIZE)) { + if (print_err) + pr_err("Error: truncated extended signature table.\n"); + return -EINVAL; + } + + ext_header = mc + MC_HEADER_SIZE + data_size; + if (ext_table_size != exttable_size(ext_header)) { + if (print_err) + pr_err("Error: extended signature table size mismatch.\n"); + return -EFAULT; + } + + ext_sigcount = ext_header->count; + + /* + * Check extended table checksum: the sum of all dwords that + * comprise a valid table must be 0. + */ + ext_tablep = (u32 *)ext_header; + + i = ext_table_size / sizeof(u32); + while (i--) + ext_table_sum += ext_tablep[i]; + + if (ext_table_sum) { + if (print_err) + pr_warn("Bad extended signature table checksum, aborting.\n"); + return -EINVAL; + } + } + + /* + * Calculate the checksum of update data and header. The checksum of + * valid update data and header including the extended signature table + * must be 0. + */ + orig_sum = 0; + i = (MC_HEADER_SIZE + data_size) / sizeof(u32); + while (i--) + orig_sum += ((u32 *)mc)[i]; + + if (orig_sum) { + if (print_err) + pr_err("Bad microcode data checksum, aborting.\n"); + return -EINVAL; + } + + if (!ext_table_size) + return 0; + + /* + * Check extended signature checksum: 0 => valid. + */ + for (i = 0; i < ext_sigcount; i++) { + ext_sig = (void *)ext_header + EXT_HEADER_SIZE + + EXT_SIGNATURE_SIZE * i; + + sum = (mc_header->sig + mc_header->pf + mc_header->cksum) - + (ext_sig->sig + ext_sig->pf + ext_sig->cksum); + if (sum) { + if (print_err) + pr_err("Bad extended signature checksum, aborting.\n"); + return -EINVAL; + } + } + return 0; +} +EXPORT_SYMBOL_GPL(intel_microcode_sanity_check); + /* * Returns 1 if update has been found, 0 otherwise. */ @@ -202,86 +397,6 @@ next: return patch; } -static void show_saved_mc(void) -{ -#ifdef DEBUG - int i = 0, j; - unsigned int sig, pf, rev, total_size, data_size, date; - struct ucode_cpu_info uci; - struct ucode_patch *p; - - if (list_empty(µcode_cache)) { - pr_debug("no microcode data saved.\n"); - return; - } - - intel_cpu_collect_info(&uci); - - sig = uci.cpu_sig.sig; - pf = uci.cpu_sig.pf; - rev = uci.cpu_sig.rev; - pr_debug("CPU: sig=0x%x, pf=0x%x, rev=0x%x\n", sig, pf, rev); - - list_for_each_entry(p, µcode_cache, plist) { - struct microcode_header_intel *mc_saved_header; - struct extended_sigtable *ext_header; - struct extended_signature *ext_sig; - int ext_sigcount; - - mc_saved_header = (struct microcode_header_intel *)p->data; - - sig = mc_saved_header->sig; - pf = mc_saved_header->pf; - rev = mc_saved_header->rev; - date = mc_saved_header->date; - - total_size = get_totalsize(mc_saved_header); - data_size = get_datasize(mc_saved_header); - - pr_debug("mc_saved[%d]: sig=0x%x, pf=0x%x, rev=0x%x, total size=0x%x, date = %04x-%02x-%02x\n", - i++, sig, pf, rev, total_size, - date & 0xffff, - date >> 24, - (date >> 16) & 0xff); - - /* Look for ext. headers: */ - if (total_size <= data_size + MC_HEADER_SIZE) - continue; - - ext_header = (void *)mc_saved_header + data_size + MC_HEADER_SIZE; - ext_sigcount = ext_header->count; - ext_sig = (void *)ext_header + EXT_HEADER_SIZE; - - for (j = 0; j < ext_sigcount; j++) { - sig = ext_sig->sig; - pf = ext_sig->pf; - - pr_debug("\tExtended[%d]: sig=0x%x, pf=0x%x\n", - j, sig, pf); - - ext_sig++; - } - } -#endif -} - -/* - * Save this microcode patch. It will be loaded early when a CPU is - * hot-added or resumes. - */ -static void save_mc_for_early(struct ucode_cpu_info *uci, u8 *mc, unsigned int size) -{ - /* Synchronization during CPU hotplug. */ - static DEFINE_MUTEX(x86_cpu_microcode_mutex); - - mutex_lock(&x86_cpu_microcode_mutex); - - save_microcode_patch(uci, mc, size); - show_saved_mc(); - - mutex_unlock(&x86_cpu_microcode_mutex); -} - static bool load_builtin_intel_microcode(struct cpio_data *cp) { unsigned int eax = 1, ebx, ecx = 0, edx; @@ -428,9 +543,6 @@ int __init save_microcode_in_initrd_intel(void) intel_cpu_collect_info(&uci); scan_microcode(cp.data, cp.size, &uci, true); - - show_saved_mc(); - return 0; } @@ -701,12 +813,8 @@ static enum ucode_state generic_load_microcode(int cpu, struct iov_iter *iter) vfree(uci->mc); uci->mc = (struct microcode_intel *)new_mc; - /* - * If early loading microcode is supported, save this mc into - * permanent memory. So it will be loaded early when a CPU is hot added - * or resumes. - */ - save_mc_for_early(uci, new_mc, new_mc_size); + /* Save for CPU hotplug */ + save_microcode_patch(uci, new_mc, new_mc_size); pr_debug("CPU%d found a matching microcode update with version 0x%x (current=0x%x)\n", cpu, new_rev, uci->cpu_sig.rev); diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h new file mode 100644 index 000000000000..bf883aa71233 --- /dev/null +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _X86_MICROCODE_INTERNAL_H +#define _X86_MICROCODE_INTERNAL_H + +#include <linux/earlycpio.h> +#include <linux/initrd.h> + +#include <asm/cpu.h> +#include <asm/microcode.h> + +struct ucode_patch { + struct list_head plist; + void *data; /* Intel uses only this one */ + unsigned int size; + u32 patch_id; + u16 equiv_cpu; +}; + +extern struct list_head microcode_cache; + +struct device; + +enum ucode_state { + UCODE_OK = 0, + UCODE_NEW, + UCODE_UPDATED, + UCODE_NFOUND, + UCODE_ERROR, +}; + +struct microcode_ops { + enum ucode_state (*request_microcode_fw)(int cpu, struct device *dev); + + void (*microcode_fini_cpu)(int cpu); + + /* + * The generic 'microcode_core' part guarantees that + * the callbacks below run on a target cpu when they + * are being called. + * See also the "Synchronization" section in microcode_core.c. + */ + enum ucode_state (*apply_microcode)(int cpu); + int (*collect_cpu_info)(int cpu, struct cpu_signature *csig); +}; + +extern struct ucode_cpu_info ucode_cpu_info[]; +struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa); + +#define MAX_UCODE_COUNT 128 + +#define QCHAR(a, b, c, d) ((a) + ((b) << 8) + ((c) << 16) + ((d) << 24)) +#define CPUID_INTEL1 QCHAR('G', 'e', 'n', 'u') +#define CPUID_INTEL2 QCHAR('i', 'n', 'e', 'I') +#define CPUID_INTEL3 QCHAR('n', 't', 'e', 'l') +#define CPUID_AMD1 QCHAR('A', 'u', 't', 'h') +#define CPUID_AMD2 QCHAR('e', 'n', 't', 'i') +#define CPUID_AMD3 QCHAR('c', 'A', 'M', 'D') + +#define CPUID_IS(a, b, c, ebx, ecx, edx) \ + (!(((ebx) ^ (a)) | ((edx) ^ (b)) | ((ecx) ^ (c)))) + +/* + * In early loading microcode phase on BSP, boot_cpu_data is not set up yet. + * x86_cpuid_vendor() gets vendor id for BSP. + * + * In 32 bit AP case, accessing boot_cpu_data needs linear address. To simplify + * coding, we still use x86_cpuid_vendor() to get vendor id for AP. + * + * x86_cpuid_vendor() gets vendor information directly from CPUID. + */ +static inline int x86_cpuid_vendor(void) +{ + u32 eax = 0x00000000; + u32 ebx, ecx = 0, edx; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + if (CPUID_IS(CPUID_INTEL1, CPUID_INTEL2, CPUID_INTEL3, ebx, ecx, edx)) + return X86_VENDOR_INTEL; + + if (CPUID_IS(CPUID_AMD1, CPUID_AMD2, CPUID_AMD3, ebx, ecx, edx)) + return X86_VENDOR_AMD; + + return X86_VENDOR_UNKNOWN; +} + +static inline unsigned int x86_cpuid_family(void) +{ + u32 eax = 0x00000001; + u32 ebx, ecx = 0, edx; + + native_cpuid(&eax, &ebx, &ecx, &edx); + + return x86_family(eax); +} + +extern bool initrd_gone; + +#ifdef CONFIG_CPU_SUP_AMD +void load_ucode_amd_bsp(unsigned int family); +void load_ucode_amd_ap(unsigned int family); +void load_ucode_amd_early(unsigned int cpuid_1_eax); +int save_microcode_in_initrd_amd(unsigned int family); +void reload_ucode_amd(unsigned int cpu); +struct microcode_ops *init_amd_microcode(void); +void exit_amd_microcode(void); +#else /* CONFIG_CPU_SUP_AMD */ +static inline void load_ucode_amd_bsp(unsigned int family) { } +static inline void load_ucode_amd_ap(unsigned int family) { } +static inline void load_ucode_amd_early(unsigned int family) { } +static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } +static inline void reload_ucode_amd(unsigned int cpu) { } +static inline struct microcode_ops *init_amd_microcode(void) { return NULL; } +static inline void exit_amd_microcode(void) { } +#endif /* !CONFIG_CPU_SUP_AMD */ + +#ifdef CONFIG_CPU_SUP_INTEL +void load_ucode_intel_bsp(void); +void load_ucode_intel_ap(void); +int save_microcode_in_initrd_intel(void); +void reload_ucode_intel(void); +struct microcode_ops *init_intel_microcode(void); +#else /* CONFIG_CPU_SUP_INTEL */ +static inline void load_ucode_intel_bsp(void) { } +static inline void load_ucode_intel_ap(void) { } +static inline int save_microcode_in_initrd_intel(void) { return -EINVAL; } +static inline void reload_ucode_intel(void) { } +static inline struct microcode_ops *init_intel_microcode(void) { return NULL; } +#endif /* !CONFIG_CPU_SUP_INTEL */ + +#endif /* _X86_MICROCODE_INTERNAL_H */ diff --git a/arch/x86/kernel/fpu/context.h b/arch/x86/kernel/fpu/context.h index af5cbdd9bd29..f6d856bd50bc 100644 --- a/arch/x86/kernel/fpu/context.h +++ b/arch/x86/kernel/fpu/context.h @@ -19,8 +19,7 @@ * FPU state for a task MUST let the rest of the kernel know that the * FPU registers are no longer valid for this task. * - * Either one of these invalidation functions is enough. Invalidate - * a resource you control: CPU if using the CPU for something else + * Invalidate a resource you control: CPU if using the CPU for something else * (with preemption disabled), FPU for the current task, or a task that * is prevented from running by the current task. */ diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 1015af1ae562..98e507cc7d34 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -679,7 +679,7 @@ static void fpu_reset_fpregs(void) struct fpu *fpu = ¤t->thread.fpu; fpregs_lock(); - fpu__drop(fpu); + __fpu_invalidate_fpregs_state(fpu); /* * This does not change the actual hardware registers. It just * resets the memory image and sets TIF_NEED_FPU_LOAD so a diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 0bab497c9436..1afbc4866b10 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -882,6 +882,13 @@ void __init fpu__init_system_xstate(unsigned int legacy_size) goto out_disable; } + /* + * CPU capabilities initialization runs before FPU init. So + * X86_FEATURE_OSXSAVE is not set. Now that XSAVE is completely + * functional, set the feature bit so depending code works. + */ + setup_force_cpu_cap(X86_FEATURE_OSXSAVE); + print_xstate_offset_size(); pr_info("x86/fpu: Enabled xstate features 0x%llx, context size is %d bytes, using '%s' format.\n", fpu_kernel_cfg.max_features, diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index c5b9289837dc..ea6995920b7a 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -51,7 +51,9 @@ SYM_CODE_START_NOALIGN(startup_64) * for us. These identity mapped page tables map all of the * kernel pages and possibly all of memory. * - * %rsi holds a physical pointer to real_mode_data. + * %RSI holds the physical address of the boot_params structure + * provided by the bootloader. Preserve it in %R15 so C function calls + * will not clobber it. * * We come here either directly from a 64bit bootloader, or from * arch/x86/boot/compressed/head_64.S. @@ -62,6 +64,7 @@ SYM_CODE_START_NOALIGN(startup_64) * compiled to run at we first fixup the physical addresses in our page * tables and then reload them. */ + mov %rsi, %r15 /* Set up the stack for verify_cpu() */ leaq (__end_init_task - PTREGS_SIZE)(%rip), %rsp @@ -75,9 +78,7 @@ SYM_CODE_START_NOALIGN(startup_64) shrq $32, %rdx wrmsr - pushq %rsi call startup_64_setup_env - popq %rsi /* Now switch to __KERNEL_CS so IRET works reliably */ pushq $__KERNEL_CS @@ -93,12 +94,10 @@ SYM_CODE_START_NOALIGN(startup_64) * Activate SEV/SME memory encryption if supported/enabled. This needs to * be done now, since this also includes setup of the SEV-SNP CPUID table, * which needs to be done before any CPUID instructions are executed in - * subsequent code. + * subsequent code. Pass the boot_params pointer as the first argument. */ - movq %rsi, %rdi - pushq %rsi + movq %r15, %rdi call sme_enable - popq %rsi #endif /* Sanitize CPU configuration */ @@ -111,9 +110,8 @@ SYM_CODE_START_NOALIGN(startup_64) * programmed into CR3. */ leaq _text(%rip), %rdi - pushq %rsi + movq %r15, %rsi call __startup_64 - popq %rsi /* Form the CR3 value being sure to include the CR3 modifier */ addq $(early_top_pgt - __START_KERNEL_map), %rax @@ -127,8 +125,6 @@ SYM_CODE_START(secondary_startup_64) * At this point the CPU runs in 64bit mode CS.L = 1 CS.D = 0, * and someone has loaded a mapped page table. * - * %rsi holds a physical pointer to real_mode_data. - * * We come here either from startup_64 (using physical addresses) * or from trampoline.S (using virtual addresses). * @@ -153,6 +149,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) UNWIND_HINT_END_OF_STACK ANNOTATE_NOENDBR + /* Clear %R15 which holds the boot_params pointer on the boot CPU */ + xorq %r15, %r15 + /* * Retrieve the modifier (SME encryption mask if SME is active) to be * added to the initial pgdir entry that will be programmed into CR3. @@ -199,13 +198,9 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) * hypervisor could lie about the C-bit position to perform a ROP * attack on the guest by writing to the unencrypted stack and wait for * the next RET instruction. - * %rsi carries pointer to realmode data and is callee-clobbered. Save - * and restore it. */ - pushq %rsi movq %rax, %rdi call sev_verify_cbit - popq %rsi /* * Switch to new page-table @@ -365,9 +360,7 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) wrmsr /* Setup and Load IDT */ - pushq %rsi call early_setup_idt - popq %rsi /* Check if nx is implemented */ movl $0x80000001, %eax @@ -403,9 +396,8 @@ SYM_INNER_LABEL(secondary_startup_64_no_verify, SYM_L_GLOBAL) pushq $0 popfq - /* rsi is pointer to real mode structure with interesting info. - pass it to C */ - movq %rsi, %rdi + /* Pass the boot_params pointer as first argument */ + movq %r15, %rdi .Ljump_to_C_code: /* diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index c8eb1ac5125a..1648aa0204d9 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -421,7 +421,7 @@ static void __init hpet_legacy_clockevent_register(struct hpet_channel *hc) * the IO_APIC has been initialized. */ hc->cpu = boot_cpu_data.cpu_index; - strncpy(hc->name, "hpet", sizeof(hc->name)); + strscpy(hc->name, "hpet", sizeof(hc->name)); hpet_init_clockevent(hc, 50); hc->evt.tick_resume = hpet_clkevt_legacy_resume; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 57b0037d0a99..517821b48391 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -226,7 +226,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src, u8 *real) } /* Check whether insn is indirect jump */ -static int __insn_is_indirect_jump(struct insn *insn) +static int insn_is_indirect_jump(struct insn *insn) { return ((insn->opcode.bytes[0] == 0xff && (X86_MODRM_REG(insn->modrm.value) & 6) == 4) || /* Jump */ @@ -260,26 +260,6 @@ static int insn_jump_into_range(struct insn *insn, unsigned long start, int len) return (start <= target && target <= start + len); } -static int insn_is_indirect_jump(struct insn *insn) -{ - int ret = __insn_is_indirect_jump(insn); - -#ifdef CONFIG_RETPOLINE - /* - * Jump to x86_indirect_thunk_* is treated as an indirect jump. - * Note that even with CONFIG_RETPOLINE=y, the kernel compiled with - * older gcc may use indirect jump. So we add this check instead of - * replace indirect-jump check. - */ - if (!ret) - ret = insn_jump_into_range(insn, - (unsigned long)__indirect_thunk_start, - (unsigned long)__indirect_thunk_end - - (unsigned long)__indirect_thunk_start); -#endif - return ret; -} - /* Decode whole function to ensure any instructions don't jump into target */ static int can_optimize(unsigned long paddr) { @@ -334,9 +314,21 @@ static int can_optimize(unsigned long paddr) /* Recover address */ insn.kaddr = (void *)addr; insn.next_byte = (void *)(addr + insn.length); - /* Check any instructions don't jump into target */ - if (insn_is_indirect_jump(&insn) || - insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE, + /* + * Check any instructions don't jump into target, indirectly or + * directly. + * + * The indirect case is present to handle a code with jump + * tables. When the kernel uses retpolines, the check should in + * theory additionally look for jumps to indirect thunks. + * However, the kernel built with retpolines or IBT has jump + * tables disabled so the check can be skipped altogether. + */ + if (!IS_ENABLED(CONFIG_RETPOLINE) && + !IS_ENABLED(CONFIG_X86_KERNEL_IBT) && + insn_is_indirect_jump(&insn)) + return 0; + if (insn_jump_into_range(&insn, paddr + INT3_INSN_SIZE, DISP32_SIZE)) return 0; addr += insn.length; diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 1cceac5984da..526d4da3dcd4 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -966,10 +966,8 @@ static void __init kvm_init_platform(void) * Ensure that _bss_decrypted section is marked as decrypted in the * shared pages list. */ - nr_pages = DIV_ROUND_UP(__end_bss_decrypted - __start_bss_decrypted, - PAGE_SIZE); early_set_mem_enc_dec_hypercall((unsigned long)__start_bss_decrypted, - nr_pages, 0); + __end_bss_decrypted - __start_bss_decrypted, 0); /* * If not booted using EFI, enable Live migration support. diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index ac10b46c5832..975f98d5eee5 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -75,10 +75,16 @@ DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key); void __init native_pv_lock_init(void) { - if (!boot_cpu_has(X86_FEATURE_HYPERVISOR)) + if (IS_ENABLED(CONFIG_PARAVIRT_SPINLOCKS) && + !boot_cpu_has(X86_FEATURE_HYPERVISOR)) static_branch_disable(&virt_spin_lock_key); } +static void native_tlb_remove_table(struct mmu_gather *tlb, void *table) +{ + tlb_remove_page(tlb, table); +} + unsigned int paravirt_patch(u8 type, void *insn_buff, unsigned long addr, unsigned int len) { @@ -295,8 +301,7 @@ struct paravirt_patch_template pv_ops = { .mmu.flush_tlb_kernel = native_flush_tlb_global, .mmu.flush_tlb_one_user = native_flush_tlb_one_user, .mmu.flush_tlb_multi = native_flush_tlb_multi, - .mmu.tlb_remove_table = - (void (*)(struct mmu_gather *, void *))tlb_remove_page, + .mmu.tlb_remove_table = native_tlb_remove_table, .mmu.exit_mmap = paravirt_nop, .mmu.notify_page_enc_status_changed = paravirt_nop, diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 1ee7bed453de..d380c9399480 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1575,6 +1575,9 @@ static enum es_result vc_handle_dr7_write(struct ghcb *ghcb, long val, *reg = vc_insn_get_rm(ctxt); enum es_result ret; + if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) + return ES_VMM_ERROR; + if (!reg) return ES_DECODE_FAILED; @@ -1612,6 +1615,9 @@ static enum es_result vc_handle_dr7_read(struct ghcb *ghcb, struct sev_es_runtime_data *data = this_cpu_read(runtime_data); long *reg = vc_insn_get_rm(ctxt); + if (sev_status & MSR_AMD64_SNP_DEBUG_SWAP) + return ES_VMM_ERROR; + if (!reg) return ES_DECODE_FAILED; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e1aa2cd7734b..d40ed3a7dc23 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -327,14 +327,6 @@ static void notrace start_secondary(void *unused) } /** - * topology_smt_supported - Check whether SMT is supported by the CPUs - */ -bool topology_smt_supported(void) -{ - return smp_num_siblings > 1; -} - -/** * topology_phys_to_logical_pkg - Map a physical package id to a logical * @phys_pkg: The physical package id to map * @@ -632,14 +624,9 @@ static void __init build_sched_topology(void) }; #endif #ifdef CONFIG_SCHED_CLUSTER - /* - * For now, skip the cluster domain on Hybrid. - */ - if (!cpu_feature_enabled(X86_FEATURE_HYBRID_CPU)) { - x86_topology[i++] = (struct sched_domain_topology_level){ - cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) - }; - } + x86_topology[i++] = (struct sched_domain_topology_level){ + cpu_clustergroup_mask, x86_cluster_flags, SD_INIT_NAME(CLS) + }; #endif #ifdef CONFIG_SCHED_MC x86_topology[i++] = (struct sched_domain_topology_level){ diff --git a/arch/x86/kernel/static_call.c b/arch/x86/kernel/static_call.c index b70670a98597..77a9316da435 100644 --- a/arch/x86/kernel/static_call.c +++ b/arch/x86/kernel/static_call.c @@ -186,6 +186,19 @@ EXPORT_SYMBOL_GPL(arch_static_call_transform); */ bool __static_call_fixup(void *tramp, u8 op, void *dest) { + unsigned long addr = (unsigned long)tramp; + /* + * Not all .return_sites are a static_call trampoline (most are not). + * Check if the 3 bytes after the return are still kernel text, if not, + * then this definitely is not a trampoline and we need not worry + * further. + * + * This avoids the memcmp() below tripping over pagefaults etc.. + */ + if (((addr >> PAGE_SHIFT) != ((addr + 7) >> PAGE_SHIFT)) && + !kernel_text_address(addr + 7)) + return false; + if (memcmp(tramp+5, tramp_ud, 3)) { /* Not a trampoline site, not our problem. */ return false; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 3425c6a943e4..15f97c0abc9d 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1258,7 +1258,7 @@ static void __init check_system_tsc_reliable(void) if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC) && boot_cpu_has(X86_FEATURE_NONSTOP_TSC) && boot_cpu_has(X86_FEATURE_TSC_ADJUST) && - nr_online_nodes <= 2) + nr_online_nodes <= 4) tsc_disable_clocksource_watchdog(); } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 03c885d3640f..83d41c2601d7 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -133,14 +133,26 @@ SECTIONS KPROBES_TEXT SOFTIRQENTRY_TEXT #ifdef CONFIG_RETPOLINE - __indirect_thunk_start = .; - *(.text.__x86.*) - __indirect_thunk_end = .; + *(.text..__x86.indirect_thunk) + *(.text..__x86.return_thunk) #endif STATIC_CALL_TEXT ALIGN_ENTRY_TEXT_BEGIN +#ifdef CONFIG_CPU_SRSO + *(.text..__x86.rethunk_untrain) +#endif + ENTRY_TEXT + +#ifdef CONFIG_CPU_SRSO + /* + * See the comment above srso_alias_untrain_ret()'s + * definition. + */ + . = srso_alias_untrain_ret | (1 << 2) | (1 << 8) | (1 << 14) | (1 << 20); + *(.text..__x86.rethunk_safe) +#endif ALIGN_ENTRY_TEXT_END *(.gnu.warning) @@ -509,7 +521,24 @@ INIT_PER_CPU(irq_stack_backing_store); #endif #ifdef CONFIG_RETHUNK -. = ASSERT((__x86_return_thunk & 0x3f) == 0, "__x86_return_thunk not cacheline-aligned"); +. = ASSERT((retbleed_return_thunk & 0x3f) == 0, "retbleed_return_thunk not cacheline-aligned"); +. = ASSERT((srso_safe_ret & 0x3f) == 0, "srso_safe_ret not cacheline-aligned"); +#endif + +#ifdef CONFIG_CPU_SRSO +/* + * GNU ld cannot do XOR until 2.41. + * https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=f6f78318fca803c4907fb8d7f6ded8295f1947b1 + * + * LLVM lld cannot do XOR until lld-17. + * https://github.com/llvm/llvm-project/commit/fae96104d4378166cbe5c875ef8ed808a356f3fb + * + * Instead do: (A | B) - (A & B) in order to compute the XOR + * of the two function addresses: + */ +. = ASSERT(((ABSOLUTE(srso_alias_untrain_ret) | srso_alias_safe_ret) - + (ABSOLUTE(srso_alias_untrain_ret) & srso_alias_safe_ret)) == ((1 << 2) | (1 << 8) | (1 << 14) | (1 << 20)), + "SRSO function pair won't alias"); #endif #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 7f4d13383cf2..d3432687c9e6 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -729,6 +729,9 @@ void kvm_set_cpu_caps(void) F(NULL_SEL_CLR_BASE) | F(AUTOIBRS) | 0 /* PrefetchCtlMsr */ ); + if (cpu_feature_enabled(X86_FEATURE_SRSO_NO)) + kvm_cpu_cap_set(X86_FEATURE_SRSO_NO); + kvm_cpu_cap_init_kvm_defined(CPUID_8000_0022_EAX, F(PERFMON_V2) ); diff --git a/arch/x86/kvm/svm/sev.c b/arch/x86/kvm/svm/sev.c index 07756b7348ae..d3aec1f2cad2 100644 --- a/arch/x86/kvm/svm/sev.c +++ b/arch/x86/kvm/svm/sev.c @@ -2417,15 +2417,18 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) */ memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs)); - vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb); - vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb); - vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb); - vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb); - vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb); + BUILD_BUG_ON(sizeof(svm->sev_es.valid_bitmap) != sizeof(ghcb->save.valid_bitmap)); + memcpy(&svm->sev_es.valid_bitmap, &ghcb->save.valid_bitmap, sizeof(ghcb->save.valid_bitmap)); - svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb); + vcpu->arch.regs[VCPU_REGS_RAX] = kvm_ghcb_get_rax_if_valid(svm, ghcb); + vcpu->arch.regs[VCPU_REGS_RBX] = kvm_ghcb_get_rbx_if_valid(svm, ghcb); + vcpu->arch.regs[VCPU_REGS_RCX] = kvm_ghcb_get_rcx_if_valid(svm, ghcb); + vcpu->arch.regs[VCPU_REGS_RDX] = kvm_ghcb_get_rdx_if_valid(svm, ghcb); + vcpu->arch.regs[VCPU_REGS_RSI] = kvm_ghcb_get_rsi_if_valid(svm, ghcb); - if (ghcb_xcr0_is_valid(ghcb)) { + svm->vmcb->save.cpl = kvm_ghcb_get_cpl_if_valid(svm, ghcb); + + if (kvm_ghcb_xcr0_is_valid(svm)) { vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb); kvm_update_cpuid_runtime(vcpu); } @@ -2436,84 +2439,88 @@ static void sev_es_sync_from_ghcb(struct vcpu_svm *svm) control->exit_code_hi = upper_32_bits(exit_code); control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb); control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb); + svm->sev_es.sw_scratch = kvm_ghcb_get_sw_scratch_if_valid(svm, ghcb); /* Clear the valid entries fields */ memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); } +static u64 kvm_ghcb_get_sw_exit_code(struct vmcb_control_area *control) +{ + return (((u64)control->exit_code_hi) << 32) | control->exit_code; +} + static int sev_es_validate_vmgexit(struct vcpu_svm *svm) { - struct kvm_vcpu *vcpu; - struct ghcb *ghcb; + struct vmcb_control_area *control = &svm->vmcb->control; + struct kvm_vcpu *vcpu = &svm->vcpu; u64 exit_code; u64 reason; - ghcb = svm->sev_es.ghcb; - /* * Retrieve the exit code now even though it may not be marked valid * as it could help with debugging. */ - exit_code = ghcb_get_sw_exit_code(ghcb); + exit_code = kvm_ghcb_get_sw_exit_code(control); /* Only GHCB Usage code 0 is supported */ - if (ghcb->ghcb_usage) { + if (svm->sev_es.ghcb->ghcb_usage) { reason = GHCB_ERR_INVALID_USAGE; goto vmgexit_err; } reason = GHCB_ERR_MISSING_INPUT; - if (!ghcb_sw_exit_code_is_valid(ghcb) || - !ghcb_sw_exit_info_1_is_valid(ghcb) || - !ghcb_sw_exit_info_2_is_valid(ghcb)) + if (!kvm_ghcb_sw_exit_code_is_valid(svm) || + !kvm_ghcb_sw_exit_info_1_is_valid(svm) || + !kvm_ghcb_sw_exit_info_2_is_valid(svm)) goto vmgexit_err; - switch (ghcb_get_sw_exit_code(ghcb)) { + switch (exit_code) { case SVM_EXIT_READ_DR7: break; case SVM_EXIT_WRITE_DR7: - if (!ghcb_rax_is_valid(ghcb)) + if (!kvm_ghcb_rax_is_valid(svm)) goto vmgexit_err; break; case SVM_EXIT_RDTSC: break; case SVM_EXIT_RDPMC: - if (!ghcb_rcx_is_valid(ghcb)) + if (!kvm_ghcb_rcx_is_valid(svm)) goto vmgexit_err; break; case SVM_EXIT_CPUID: - if (!ghcb_rax_is_valid(ghcb) || - !ghcb_rcx_is_valid(ghcb)) + if (!kvm_ghcb_rax_is_valid(svm) || + !kvm_ghcb_rcx_is_valid(svm)) goto vmgexit_err; - if (ghcb_get_rax(ghcb) == 0xd) - if (!ghcb_xcr0_is_valid(ghcb)) + if (vcpu->arch.regs[VCPU_REGS_RAX] == 0xd) + if (!kvm_ghcb_xcr0_is_valid(svm)) goto vmgexit_err; break; case SVM_EXIT_INVD: break; case SVM_EXIT_IOIO: - if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) { - if (!ghcb_sw_scratch_is_valid(ghcb)) + if (control->exit_info_1 & SVM_IOIO_STR_MASK) { + if (!kvm_ghcb_sw_scratch_is_valid(svm)) goto vmgexit_err; } else { - if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK)) - if (!ghcb_rax_is_valid(ghcb)) + if (!(control->exit_info_1 & SVM_IOIO_TYPE_MASK)) + if (!kvm_ghcb_rax_is_valid(svm)) goto vmgexit_err; } break; case SVM_EXIT_MSR: - if (!ghcb_rcx_is_valid(ghcb)) + if (!kvm_ghcb_rcx_is_valid(svm)) goto vmgexit_err; - if (ghcb_get_sw_exit_info_1(ghcb)) { - if (!ghcb_rax_is_valid(ghcb) || - !ghcb_rdx_is_valid(ghcb)) + if (control->exit_info_1) { + if (!kvm_ghcb_rax_is_valid(svm) || + !kvm_ghcb_rdx_is_valid(svm)) goto vmgexit_err; } break; case SVM_EXIT_VMMCALL: - if (!ghcb_rax_is_valid(ghcb) || - !ghcb_cpl_is_valid(ghcb)) + if (!kvm_ghcb_rax_is_valid(svm) || + !kvm_ghcb_cpl_is_valid(svm)) goto vmgexit_err; break; case SVM_EXIT_RDTSCP: @@ -2521,19 +2528,19 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) case SVM_EXIT_WBINVD: break; case SVM_EXIT_MONITOR: - if (!ghcb_rax_is_valid(ghcb) || - !ghcb_rcx_is_valid(ghcb) || - !ghcb_rdx_is_valid(ghcb)) + if (!kvm_ghcb_rax_is_valid(svm) || + !kvm_ghcb_rcx_is_valid(svm) || + !kvm_ghcb_rdx_is_valid(svm)) goto vmgexit_err; break; case SVM_EXIT_MWAIT: - if (!ghcb_rax_is_valid(ghcb) || - !ghcb_rcx_is_valid(ghcb)) + if (!kvm_ghcb_rax_is_valid(svm) || + !kvm_ghcb_rcx_is_valid(svm)) goto vmgexit_err; break; case SVM_VMGEXIT_MMIO_READ: case SVM_VMGEXIT_MMIO_WRITE: - if (!ghcb_sw_scratch_is_valid(ghcb)) + if (!kvm_ghcb_sw_scratch_is_valid(svm)) goto vmgexit_err; break; case SVM_VMGEXIT_NMI_COMPLETE: @@ -2549,11 +2556,9 @@ static int sev_es_validate_vmgexit(struct vcpu_svm *svm) return 0; vmgexit_err: - vcpu = &svm->vcpu; - if (reason == GHCB_ERR_INVALID_USAGE) { vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n", - ghcb->ghcb_usage); + svm->sev_es.ghcb->ghcb_usage); } else if (reason == GHCB_ERR_INVALID_EVENT) { vcpu_unimpl(vcpu, "vmgexit: exit code %#llx is not valid\n", exit_code); @@ -2563,11 +2568,8 @@ vmgexit_err: dump_ghcb(svm); } - /* Clear the valid entries fields */ - memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap)); - - ghcb_set_sw_exit_info_1(ghcb, 2); - ghcb_set_sw_exit_info_2(ghcb, reason); + ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2); + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, reason); /* Resume the guest to "return" the error code. */ return 1; @@ -2586,7 +2588,7 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm) */ if (svm->sev_es.ghcb_sa_sync) { kvm_write_guest(svm->vcpu.kvm, - ghcb_get_sw_scratch(svm->sev_es.ghcb), + svm->sev_es.sw_scratch, svm->sev_es.ghcb_sa, svm->sev_es.ghcb_sa_len); svm->sev_es.ghcb_sa_sync = false; @@ -2632,12 +2634,11 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu) static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) { struct vmcb_control_area *control = &svm->vmcb->control; - struct ghcb *ghcb = svm->sev_es.ghcb; u64 ghcb_scratch_beg, ghcb_scratch_end; u64 scratch_gpa_beg, scratch_gpa_end; void *scratch_va; - scratch_gpa_beg = ghcb_get_sw_scratch(ghcb); + scratch_gpa_beg = svm->sev_es.sw_scratch; if (!scratch_gpa_beg) { pr_err("vmgexit: scratch gpa not provided\n"); goto e_scratch; @@ -2708,8 +2709,8 @@ static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len) return 0; e_scratch: - ghcb_set_sw_exit_info_1(ghcb, 2); - ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_SCRATCH_AREA); + ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2); + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_SCRATCH_AREA); return 1; } @@ -2822,7 +2823,6 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); struct vmcb_control_area *control = &svm->vmcb->control; u64 ghcb_gpa, exit_code; - struct ghcb *ghcb; int ret; /* Validate the GHCB */ @@ -2847,20 +2847,18 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) } svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva; - ghcb = svm->sev_es.ghcb_map.hva; - trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb); - - exit_code = ghcb_get_sw_exit_code(ghcb); + trace_kvm_vmgexit_enter(vcpu->vcpu_id, svm->sev_es.ghcb); + sev_es_sync_from_ghcb(svm); ret = sev_es_validate_vmgexit(svm); if (ret) return ret; - sev_es_sync_from_ghcb(svm); - ghcb_set_sw_exit_info_1(ghcb, 0); - ghcb_set_sw_exit_info_2(ghcb, 0); + ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 0); + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 0); + exit_code = kvm_ghcb_get_sw_exit_code(control); switch (exit_code) { case SVM_VMGEXIT_MMIO_READ: ret = setup_vmgexit_scratch(svm, true, control->exit_info_2); @@ -2898,13 +2896,13 @@ int sev_handle_vmgexit(struct kvm_vcpu *vcpu) break; case 1: /* Get AP jump table address */ - ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table); + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, sev->ap_jump_table); break; default: pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n", control->exit_info_1); - ghcb_set_sw_exit_info_1(ghcb, 2); - ghcb_set_sw_exit_info_2(ghcb, GHCB_ERR_INVALID_INPUT); + ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, 2); + ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, GHCB_ERR_INVALID_INPUT); } ret = 1; diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 956726d867aa..d4bfdc607fe7 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1498,7 +1498,9 @@ static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if (sd->current_vmcb != svm->vmcb) { sd->current_vmcb = svm->vmcb; - indirect_branch_prediction_barrier(); + + if (!cpu_feature_enabled(X86_FEATURE_IBPB_ON_VMEXIT)) + indirect_branch_prediction_barrier(); } if (kvm_vcpu_apicv_active(vcpu)) avic_vcpu_load(vcpu, cpu); @@ -4004,6 +4006,8 @@ static noinstr void svm_vcpu_enter_exit(struct kvm_vcpu *vcpu, bool spec_ctrl_in guest_state_enter_irqoff(); + amd_clear_divider(); + if (sev_es_guest(vcpu->kvm)) __svm_sev_es_vcpu_run(svm, spec_ctrl_intercepted); else diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h index 18af7e712a5a..8239c8de45ac 100644 --- a/arch/x86/kvm/svm/svm.h +++ b/arch/x86/kvm/svm/svm.h @@ -190,10 +190,12 @@ struct vcpu_sev_es_state { /* SEV-ES support */ struct sev_es_save_area *vmsa; struct ghcb *ghcb; + u8 valid_bitmap[16]; struct kvm_host_map ghcb_map; bool received_first_sipi; /* SEV-ES scratch area support */ + u64 sw_scratch; void *ghcb_sa; u32 ghcb_sa_len; bool ghcb_sa_sync; @@ -744,4 +746,28 @@ void sev_es_unmap_ghcb(struct vcpu_svm *svm); void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); void __svm_vcpu_run(struct vcpu_svm *svm, bool spec_ctrl_intercepted); +#define DEFINE_KVM_GHCB_ACCESSORS(field) \ + static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm) \ + { \ + return test_bit(GHCB_BITMAP_IDX(field), \ + (unsigned long *)&svm->sev_es.valid_bitmap); \ + } \ + \ + static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm, struct ghcb *ghcb) \ + { \ + return kvm_ghcb_##field##_is_valid(svm) ? ghcb->save.field : 0; \ + } \ + +DEFINE_KVM_GHCB_ACCESSORS(cpl) +DEFINE_KVM_GHCB_ACCESSORS(rax) +DEFINE_KVM_GHCB_ACCESSORS(rcx) +DEFINE_KVM_GHCB_ACCESSORS(rdx) +DEFINE_KVM_GHCB_ACCESSORS(rbx) +DEFINE_KVM_GHCB_ACCESSORS(rsi) +DEFINE_KVM_GHCB_ACCESSORS(sw_exit_code) +DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_1) +DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_2) +DEFINE_KVM_GHCB_ACCESSORS(sw_scratch) +DEFINE_KVM_GHCB_ACCESSORS(xcr0) + #endif diff --git a/arch/x86/kvm/svm/vmenter.S b/arch/x86/kvm/svm/vmenter.S index 8e8295e774f0..ef2ebabb059c 100644 --- a/arch/x86/kvm/svm/vmenter.S +++ b/arch/x86/kvm/svm/vmenter.S @@ -222,7 +222,7 @@ SYM_FUNC_START(__svm_vcpu_run) * because interrupt handlers won't sanitize 'ret' if the return is * from the kernel. */ - UNTRAIN_RET + UNTRAIN_RET_VM /* * Clear all general purpose registers except RSP and RAX to prevent @@ -359,7 +359,7 @@ SYM_FUNC_START(__svm_sev_es_vcpu_run) * because interrupt handlers won't sanitize RET if the return is * from the kernel. */ - UNTRAIN_RET + UNTRAIN_RET_VM /* "Pop" @spec_ctrl_intercepted. */ pop %_ASM_BX diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 278dbd37dab2..c381770bcbf1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1616,7 +1616,7 @@ static bool kvm_is_immutable_feature_msr(u32 msr) ARCH_CAP_SKIP_VMENTRY_L1DFLUSH | ARCH_CAP_SSB_NO | ARCH_CAP_MDS_NO | \ ARCH_CAP_PSCHANGE_MC_NO | ARCH_CAP_TSX_CTRL_MSR | ARCH_CAP_TAA_NO | \ ARCH_CAP_SBDR_SSDP_NO | ARCH_CAP_FBSDP_NO | ARCH_CAP_PSDP_NO | \ - ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO) + ARCH_CAP_FB_CLEAR | ARCH_CAP_RRSBA | ARCH_CAP_PBRSB_NO | ARCH_CAP_GDS_NO) static u64 kvm_get_arch_capabilities(void) { @@ -1673,6 +1673,9 @@ static u64 kvm_get_arch_capabilities(void) */ } + if (!boot_cpu_has_bug(X86_BUG_GDS) || gds_ucode_mitigated()) + data |= ARCH_CAP_GDS_NO; + return data; } diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S index 3fd066d42ec0..cd86aeb5fdd3 100644 --- a/arch/x86/lib/retpoline.S +++ b/arch/x86/lib/retpoline.S @@ -11,8 +11,9 @@ #include <asm/unwind_hints.h> #include <asm/percpu.h> #include <asm/frame.h> +#include <asm/nops.h> - .section .text.__x86.indirect_thunk + .section .text..__x86.indirect_thunk .macro POLINE reg @@ -131,36 +132,107 @@ SYM_CODE_END(__x86_indirect_jump_thunk_array) */ #ifdef CONFIG_RETHUNK - .section .text.__x86.return_thunk +/* + * srso_alias_untrain_ret() and srso_alias_safe_ret() are placed at + * special addresses: + * + * - srso_alias_untrain_ret() is 2M aligned + * - srso_alias_safe_ret() is also in the same 2M page but bits 2, 8, 14 + * and 20 in its virtual address are set (while those bits in the + * srso_alias_untrain_ret() function are cleared). + * + * This guarantees that those two addresses will alias in the branch + * target buffer of Zen3/4 generations, leading to any potential + * poisoned entries at that BTB slot to get evicted. + * + * As a result, srso_alias_safe_ret() becomes a safe return. + */ +#ifdef CONFIG_CPU_SRSO + .section .text..__x86.rethunk_untrain + +SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ASM_NOP2 + lfence + jmp srso_alias_return_thunk +SYM_FUNC_END(srso_alias_untrain_ret) +__EXPORT_THUNK(srso_alias_untrain_ret) + + .section .text..__x86.rethunk_safe +#else +/* dummy definition for alternatives */ +SYM_START(srso_alias_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(srso_alias_untrain_ret) +#endif + +SYM_START(srso_alias_safe_ret, SYM_L_GLOBAL, SYM_A_NONE) + lea 8(%_ASM_SP), %_ASM_SP + UNWIND_HINT_FUNC + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_FUNC_END(srso_alias_safe_ret) + + .section .text..__x86.return_thunk + +SYM_CODE_START(srso_alias_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_alias_safe_ret + ud2 +SYM_CODE_END(srso_alias_return_thunk) + +/* + * Some generic notes on the untraining sequences: + * + * They are interchangeable when it comes to flushing potentially wrong + * RET predictions from the BTB. + * + * The SRSO Zen1/2 (MOVABS) untraining sequence is longer than the + * Retbleed sequence because the return sequence done there + * (srso_safe_ret()) is longer and the return sequence must fully nest + * (end before) the untraining sequence. Therefore, the untraining + * sequence must fully overlap the return sequence. + * + * Regarding alignment - the instructions which need to be untrained, + * must all start at a cacheline boundary for Zen1/2 generations. That + * is, instruction sequences starting at srso_safe_ret() and + * the respective instruction sequences at retbleed_return_thunk() + * must start at a cacheline boundary. + */ /* * Safety details here pertain to the AMD Zen{1,2} microarchitecture: - * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for + * 1) The RET at retbleed_return_thunk must be on a 64 byte boundary, for * alignment within the BTB. - * 2) The instruction at zen_untrain_ret must contain, and not + * 2) The instruction at retbleed_untrain_ret must contain, and not * end with, the 0xc3 byte of the RET. * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread * from re-poisioning the BTB prediction. */ .align 64 - .skip 64 - (__x86_return_thunk - zen_untrain_ret), 0xcc -SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + .skip 64 - (retbleed_return_thunk - retbleed_untrain_ret), 0xcc +SYM_START(retbleed_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) ANNOTATE_NOENDBR /* - * As executed from zen_untrain_ret, this is: + * As executed from retbleed_untrain_ret, this is: * * TEST $0xcc, %bl * LFENCE - * JMP __x86_return_thunk + * JMP retbleed_return_thunk * * Executing the TEST instruction has a side effect of evicting any BTB * prediction (potentially attacker controlled) attached to the RET, as - * __x86_return_thunk + 1 isn't an instruction boundary at the moment. + * retbleed_return_thunk + 1 isn't an instruction boundary at the moment. */ .byte 0xf6 /* - * As executed from __x86_return_thunk, this is a plain RET. + * As executed from retbleed_return_thunk, this is a plain RET. * * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8. * @@ -172,13 +244,13 @@ SYM_START(zen_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) * With SMT enabled and STIBP active, a sibling thread cannot poison * RET's prediction to a type of its choice, but can evict the * prediction due to competitive sharing. If the prediction is - * evicted, __x86_return_thunk will suffer Straight Line Speculation + * evicted, retbleed_return_thunk will suffer Straight Line Speculation * which will be contained safely by the INT3. */ -SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL) +SYM_INNER_LABEL(retbleed_return_thunk, SYM_L_GLOBAL) ret int3 -SYM_CODE_END(__x86_return_thunk) +SYM_CODE_END(retbleed_return_thunk) /* * Ensure the TEST decoding / BTB invalidation is complete. @@ -189,11 +261,67 @@ SYM_CODE_END(__x86_return_thunk) * Jump back and execute the RET in the middle of the TEST instruction. * INT3 is for SLS protection. */ - jmp __x86_return_thunk + jmp retbleed_return_thunk int3 -SYM_FUNC_END(zen_untrain_ret) -__EXPORT_THUNK(zen_untrain_ret) +SYM_FUNC_END(retbleed_untrain_ret) +__EXPORT_THUNK(retbleed_untrain_ret) +/* + * SRSO untraining sequence for Zen1/2, similar to retbleed_untrain_ret() + * above. On kernel entry, srso_untrain_ret() is executed which is a + * + * movabs $0xccccc30824648d48,%rax + * + * and when the return thunk executes the inner label srso_safe_ret() + * later, it is a stack manipulation and a RET which is mispredicted and + * thus a "safe" one to use. + */ + .align 64 + .skip 64 - (srso_safe_ret - srso_untrain_ret), 0xcc +SYM_START(srso_untrain_ret, SYM_L_GLOBAL, SYM_A_NONE) + ANNOTATE_NOENDBR + .byte 0x48, 0xb8 + +/* + * This forces the function return instruction to speculate into a trap + * (UD2 in srso_return_thunk() below). This RET will then mispredict + * and execution will continue at the return site read from the top of + * the stack. + */ +SYM_INNER_LABEL(srso_safe_ret, SYM_L_GLOBAL) + lea 8(%_ASM_SP), %_ASM_SP + ret + int3 + int3 + /* end of movabs */ + lfence + call srso_safe_ret + ud2 +SYM_CODE_END(srso_safe_ret) +SYM_FUNC_END(srso_untrain_ret) +__EXPORT_THUNK(srso_untrain_ret) + +SYM_CODE_START(srso_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + call srso_safe_ret + ud2 +SYM_CODE_END(srso_return_thunk) + +SYM_FUNC_START(entry_untrain_ret) + ALTERNATIVE_2 "jmp retbleed_untrain_ret", \ + "jmp srso_untrain_ret", X86_FEATURE_SRSO, \ + "jmp srso_alias_untrain_ret", X86_FEATURE_SRSO_ALIAS +SYM_FUNC_END(entry_untrain_ret) +__EXPORT_THUNK(entry_untrain_ret) + +SYM_CODE_START(__x86_return_thunk) + UNWIND_HINT_FUNC + ANNOTATE_NOENDBR + ANNOTATE_UNRET_SAFE + ret + int3 +SYM_CODE_END(__x86_return_thunk) EXPORT_SYMBOL(__x86_return_thunk) #endif /* CONFIG_RETHUNK */ diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 8192452d1d2d..ffa25e962343 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -20,7 +20,6 @@ #include <asm/tlb.h> #include <asm/proto.h> #include <asm/dma.h> /* for MAX_DMA_PFN */ -#include <asm/microcode.h> #include <asm/kaslr.h> #include <asm/hypervisor.h> #include <asm/cpufeature.h> @@ -273,7 +272,7 @@ static void __init probe_page_size_mask(void) static const struct x86_cpu_id invlpg_miss_ids[] = { INTEL_MATCH(INTEL_FAM6_ALDERLAKE ), INTEL_MATCH(INTEL_FAM6_ALDERLAKE_L ), - INTEL_MATCH(INTEL_FAM6_ALDERLAKE_N ), + INTEL_MATCH(INTEL_FAM6_ATOM_GRACEMONT ), INTEL_MATCH(INTEL_FAM6_RAPTORLAKE ), INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_P), INTEL_MATCH(INTEL_FAM6_RAPTORLAKE_S), diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 54bbd5163e8d..6faea41e99b6 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -288,11 +288,10 @@ static bool amd_enc_cache_flush_required(void) return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT); } -static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) +static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) { #ifdef CONFIG_PARAVIRT - unsigned long sz = npages << PAGE_SHIFT; - unsigned long vaddr_end = vaddr + sz; + unsigned long vaddr_end = vaddr + size; while (vaddr < vaddr_end) { int psize, pmask, level; @@ -342,7 +341,7 @@ static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool e snp_set_memory_private(vaddr, npages); if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) - enc_dec_hypercall(vaddr, npages, enc); + enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc); return true; } @@ -466,7 +465,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, ret = 0; - early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); + early_set_mem_enc_dec_hypercall(start, size, enc); out: __flush_tlb_all(); return ret; @@ -482,9 +481,9 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) return early_set_memory_enc_dec(vaddr, size, true); } -void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) +void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc) { - enc_dec_hypercall(vaddr, npages, enc); + enc_dec_hypercall(vaddr, size, enc); } void __init sme_early_init(void) diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 438adb695daa..a5930042139d 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -701,6 +701,38 @@ static void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) *pprog = prog; } +static void emit_movsx_reg(u8 **pprog, int num_bits, bool is64, u32 dst_reg, + u32 src_reg) +{ + u8 *prog = *pprog; + + if (is64) { + /* movs[b,w,l]q dst, src */ + if (num_bits == 8) + EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbe, + add_2reg(0xC0, src_reg, dst_reg)); + else if (num_bits == 16) + EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbf, + add_2reg(0xC0, src_reg, dst_reg)); + else if (num_bits == 32) + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x63, + add_2reg(0xC0, src_reg, dst_reg)); + } else { + /* movs[b,w]l dst, src */ + if (num_bits == 8) { + EMIT4(add_2mod(0x40, src_reg, dst_reg), 0x0f, 0xbe, + add_2reg(0xC0, src_reg, dst_reg)); + } else if (num_bits == 16) { + if (is_ereg(dst_reg) || is_ereg(src_reg)) + EMIT1(add_2mod(0x40, src_reg, dst_reg)); + EMIT3(add_2mod(0x0f, src_reg, dst_reg), 0xbf, + add_2reg(0xC0, src_reg, dst_reg)); + } + } + + *pprog = prog; +} + /* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */ static void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off) { @@ -779,6 +811,29 @@ static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) *pprog = prog; } +/* LDSX: dst_reg = *(s8*)(src_reg + off) */ +static void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) +{ + u8 *prog = *pprog; + + switch (size) { + case BPF_B: + /* Emit 'movsx rax, byte ptr [rax + off]' */ + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xBE); + break; + case BPF_H: + /* Emit 'movsx rax, word ptr [rax + off]' */ + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xBF); + break; + case BPF_W: + /* Emit 'movsx rax, dword ptr [rax+0x14]' */ + EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x63); + break; + } + emit_insn_suffix(&prog, src_reg, dst_reg, off); + *pprog = prog; +} + /* STX: *(u8*)(dst_reg + off) = src_reg */ static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) { @@ -1028,9 +1083,14 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image case BPF_ALU64 | BPF_MOV | BPF_X: case BPF_ALU | BPF_MOV | BPF_X: - emit_mov_reg(&prog, - BPF_CLASS(insn->code) == BPF_ALU64, - dst_reg, src_reg); + if (insn->off == 0) + emit_mov_reg(&prog, + BPF_CLASS(insn->code) == BPF_ALU64, + dst_reg, src_reg); + else + emit_movsx_reg(&prog, insn->off, + BPF_CLASS(insn->code) == BPF_ALU64, + dst_reg, src_reg); break; /* neg dst */ @@ -1134,15 +1194,26 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image /* mov rax, dst_reg */ emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg); - /* - * xor edx, edx - * equivalent to 'xor rdx, rdx', but one byte less - */ - EMIT2(0x31, 0xd2); + if (insn->off == 0) { + /* + * xor edx, edx + * equivalent to 'xor rdx, rdx', but one byte less + */ + EMIT2(0x31, 0xd2); - /* div src_reg */ - maybe_emit_1mod(&prog, src_reg, is64); - EMIT2(0xF7, add_1reg(0xF0, src_reg)); + /* div src_reg */ + maybe_emit_1mod(&prog, src_reg, is64); + EMIT2(0xF7, add_1reg(0xF0, src_reg)); + } else { + if (BPF_CLASS(insn->code) == BPF_ALU) + EMIT1(0x99); /* cdq */ + else + EMIT2(0x48, 0x99); /* cqo */ + + /* idiv src_reg */ + maybe_emit_1mod(&prog, src_reg, is64); + EMIT2(0xF7, add_1reg(0xF8, src_reg)); + } if (BPF_OP(insn->code) == BPF_MOD && dst_reg != BPF_REG_3) @@ -1262,6 +1333,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image break; case BPF_ALU | BPF_END | BPF_FROM_BE: + case BPF_ALU64 | BPF_END | BPF_FROM_LE: switch (imm32) { case 16: /* Emit 'ror %ax, 8' to swap lower 2 bytes */ @@ -1370,9 +1442,17 @@ st: if (is_imm8(insn->off)) case BPF_LDX | BPF_PROBE_MEM | BPF_W: case BPF_LDX | BPF_MEM | BPF_DW: case BPF_LDX | BPF_PROBE_MEM | BPF_DW: + /* LDXS: dst_reg = *(s8*)(src_reg + off) */ + case BPF_LDX | BPF_MEMSX | BPF_B: + case BPF_LDX | BPF_MEMSX | BPF_H: + case BPF_LDX | BPF_MEMSX | BPF_W: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: + case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: insn_off = insn->off; - if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { + if (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { /* Conservatively check that src_reg + insn->off is a kernel address: * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE * src_reg is used as scratch for src_reg += insn->off and restored @@ -1415,8 +1495,13 @@ st: if (is_imm8(insn->off)) start_of_ldx = prog; end_of_jmp[-1] = start_of_ldx - end_of_jmp; } - emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); - if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { + if (BPF_MODE(insn->code) == BPF_PROBE_MEMSX || + BPF_MODE(insn->code) == BPF_MEMSX) + emit_ldsx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); + else + emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); + if (BPF_MODE(insn->code) == BPF_PROBE_MEM || + BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { struct exception_table_entry *ex; u8 *_insn = image + proglen + (start_of_ldx - temp); s64 delta; @@ -1730,16 +1815,24 @@ emit_cond_jmp: /* Convert BPF opcode to x86 */ break; case BPF_JMP | BPF_JA: - if (insn->off == -1) - /* -1 jmp instructions will always jump - * backwards two bytes. Explicitly handling - * this case avoids wasting too many passes - * when there are long sequences of replaced - * dead code. - */ - jmp_offset = -2; - else - jmp_offset = addrs[i + insn->off] - addrs[i]; + case BPF_JMP32 | BPF_JA: + if (BPF_CLASS(insn->code) == BPF_JMP) { + if (insn->off == -1) + /* -1 jmp instructions will always jump + * backwards two bytes. Explicitly handling + * this case avoids wasting too many passes + * when there are long sequences of replaced + * dead code. + */ + jmp_offset = -2; + else + jmp_offset = addrs[i + insn->off] - addrs[i]; + } else { + if (insn->imm == -1) + jmp_offset = -2; + else + jmp_offset = addrs[i + insn->imm] - addrs[i]; + } if (!jmp_offset) { /* @@ -1857,59 +1950,177 @@ emit_jmp: return proglen; } -static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_regs, - int stack_size) +static void clean_stack_garbage(const struct btf_func_model *m, + u8 **pprog, int nr_stack_slots, + int stack_size) +{ + int arg_size, off; + u8 *prog; + + /* Generally speaking, the compiler will pass the arguments + * on-stack with "push" instruction, which will take 8-byte + * on the stack. In this case, there won't be garbage values + * while we copy the arguments from origin stack frame to current + * in BPF_DW. + * + * However, sometimes the compiler will only allocate 4-byte on + * the stack for the arguments. For now, this case will only + * happen if there is only one argument on-stack and its size + * not more than 4 byte. In this case, there will be garbage + * values on the upper 4-byte where we store the argument on + * current stack frame. + * + * arguments on origin stack: + * + * stack_arg_1(4-byte) xxx(4-byte) + * + * what we copy: + * + * stack_arg_1(8-byte): stack_arg_1(origin) xxx + * + * and the xxx is the garbage values which we should clean here. + */ + if (nr_stack_slots != 1) + return; + + /* the size of the last argument */ + arg_size = m->arg_size[m->nr_args - 1]; + if (arg_size <= 4) { + off = -(stack_size - 4); + prog = *pprog; + /* mov DWORD PTR [rbp + off], 0 */ + if (!is_imm8(off)) + EMIT2_off32(0xC7, 0x85, off); + else + EMIT3(0xC7, 0x45, off); + EMIT(0, 4); + *pprog = prog; + } +} + +/* get the count of the regs that are used to pass arguments */ +static int get_nr_used_regs(const struct btf_func_model *m) { - int i, j, arg_size; - bool next_same_struct = false; + int i, arg_regs, nr_used_regs = 0; + + for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { + arg_regs = (m->arg_size[i] + 7) / 8; + if (nr_used_regs + arg_regs <= 6) + nr_used_regs += arg_regs; + + if (nr_used_regs >= 6) + break; + } + + return nr_used_regs; +} + +static void save_args(const struct btf_func_model *m, u8 **prog, + int stack_size, bool for_call_origin) +{ + int arg_regs, first_off = 0, nr_regs = 0, nr_stack_slots = 0; + int i, j; /* Store function arguments to stack. * For a function that accepts two pointers the sequence will be: * mov QWORD PTR [rbp-0x10],rdi * mov QWORD PTR [rbp-0x8],rsi */ - for (i = 0, j = 0; i < min(nr_regs, 6); i++) { - /* The arg_size is at most 16 bytes, enforced by the verifier. */ - arg_size = m->arg_size[j]; - if (arg_size > 8) { - arg_size = 8; - next_same_struct = !next_same_struct; - } + for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { + arg_regs = (m->arg_size[i] + 7) / 8; - emit_stx(prog, bytes_to_bpf_size(arg_size), - BPF_REG_FP, - i == 5 ? X86_REG_R9 : BPF_REG_1 + i, - -(stack_size - i * 8)); + /* According to the research of Yonghong, struct members + * should be all in register or all on the stack. + * Meanwhile, the compiler will pass the argument on regs + * if the remaining regs can hold the argument. + * + * Disorder of the args can happen. For example: + * + * struct foo_struct { + * long a; + * int b; + * }; + * int foo(char, char, char, char, char, struct foo_struct, + * char); + * + * the arg1-5,arg7 will be passed by regs, and arg6 will + * by stack. + */ + if (nr_regs + arg_regs > 6) { + /* copy function arguments from origin stack frame + * into current stack frame. + * + * The starting address of the arguments on-stack + * is: + * rbp + 8(push rbp) + + * 8(return addr of origin call) + + * 8(return addr of the caller) + * which means: rbp + 24 + */ + for (j = 0; j < arg_regs; j++) { + emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP, + nr_stack_slots * 8 + 0x18); + emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0, + -stack_size); + + if (!nr_stack_slots) + first_off = stack_size; + stack_size -= 8; + nr_stack_slots++; + } + } else { + /* Only copy the arguments on-stack to current + * 'stack_size' and ignore the regs, used to + * prepare the arguments on-stack for orign call. + */ + if (for_call_origin) { + nr_regs += arg_regs; + continue; + } - j = next_same_struct ? j : j + 1; + /* copy the arguments from regs into stack */ + for (j = 0; j < arg_regs; j++) { + emit_stx(prog, BPF_DW, BPF_REG_FP, + nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs, + -stack_size); + stack_size -= 8; + nr_regs++; + } + } } + + clean_stack_garbage(m, prog, nr_stack_slots, first_off); } -static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_regs, +static void restore_regs(const struct btf_func_model *m, u8 **prog, int stack_size) { - int i, j, arg_size; - bool next_same_struct = false; + int i, j, arg_regs, nr_regs = 0; /* Restore function arguments from stack. * For a function that accepts two pointers the sequence will be: * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10] * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8] + * + * The logic here is similar to what we do in save_args() */ - for (i = 0, j = 0; i < min(nr_regs, 6); i++) { - /* The arg_size is at most 16 bytes, enforced by the verifier. */ - arg_size = m->arg_size[j]; - if (arg_size > 8) { - arg_size = 8; - next_same_struct = !next_same_struct; + for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { + arg_regs = (m->arg_size[i] + 7) / 8; + if (nr_regs + arg_regs <= 6) { + for (j = 0; j < arg_regs; j++) { + emit_ldx(prog, BPF_DW, + nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs, + BPF_REG_FP, + -stack_size); + stack_size -= 8; + nr_regs++; + } + } else { + stack_size -= 8 * arg_regs; } - emit_ldx(prog, bytes_to_bpf_size(arg_size), - i == 5 ? X86_REG_R9 : BPF_REG_1 + i, - BPF_REG_FP, - -(stack_size - i * 8)); - - j = next_same_struct ? j : j + 1; + if (nr_regs >= 6) + break; } } @@ -1938,7 +2149,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, /* arg1: mov rdi, progs[i] */ emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); /* arg2: lea rsi, [rbp - ctx_cookie_off] */ - EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); + if (!is_imm8(-run_ctx_off)) + EMIT3_off32(0x48, 0x8D, 0xB5, -run_ctx_off); + else + EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog)) return -EINVAL; @@ -1954,7 +2168,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, emit_nops(&prog, 2); /* arg1: lea rdi, [rbp - stack_size] */ - EMIT4(0x48, 0x8D, 0x7D, -stack_size); + if (!is_imm8(-stack_size)) + EMIT3_off32(0x48, 0x8D, 0xBD, -stack_size); + else + EMIT4(0x48, 0x8D, 0x7D, -stack_size); /* arg2: progs[i]->insnsi for interpreter */ if (!p->jited) emit_mov_imm64(&prog, BPF_REG_2, @@ -1984,7 +2201,10 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, /* arg2: mov rsi, rbx <- start time in nsec */ emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); /* arg3: lea rdx, [rbp - run_ctx_off] */ - EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); + if (!is_imm8(-run_ctx_off)) + EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off); + else + EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog)) return -EINVAL; @@ -2136,7 +2356,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i void *func_addr) { int i, ret, nr_regs = m->nr_args, stack_size = 0; - int regs_off, nregs_off, ip_off, run_ctx_off; + int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; @@ -2150,8 +2370,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) nr_regs += (m->arg_size[i] + 7) / 8 - 1; - /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ - if (nr_regs > 6) + /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6 + * are passed through regs, the remains are through stack. + */ + if (nr_regs > MAX_BPF_FUNC_ARGS) return -ENOTSUPP; /* Generated trampoline stack layout: @@ -2170,7 +2392,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i * * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * + * RBP - rbx_off [ rbx value ] always + * * RBP - run_ctx_off [ bpf_tramp_run_ctx ] + * + * [ stack_argN ] BPF_TRAMP_F_CALL_ORIG + * [ ... ] + * [ stack_arg2 ] + * RBP - arg_stack_off [ stack_arg1 ] */ /* room for return value of orig_call or fentry prog */ @@ -2190,9 +2419,26 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i ip_off = stack_size; + stack_size += 8; + rbx_off = stack_size; + stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7; run_ctx_off = stack_size; + if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG)) { + /* the space that used to pass arguments on-stack */ + stack_size += (nr_regs - get_nr_used_regs(m)) * 8; + /* make sure the stack pointer is 16-byte aligned if we + * need pass arguments on stack, which means + * [stack_size + 8(rbp) + 8(rip) + 8(origin rip)] + * should be 16-byte aligned. Following code depend on + * that stack_size is already 8-byte aligned. + */ + stack_size += (stack_size % 16) ? 0 : 8; + } + + arg_stack_off = stack_size; + if (flags & BPF_TRAMP_F_SKIP_FRAME) { /* skip patched call instruction and point orig_call to actual * body of the kernel function. @@ -2212,8 +2458,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i x86_call_depth_emit_accounting(&prog, NULL); EMIT1(0x55); /* push rbp */ EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ - EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ - EMIT1(0x53); /* push rbx */ + if (!is_imm8(stack_size)) + /* sub rsp, stack_size */ + EMIT3_off32(0x48, 0x81, 0xEC, stack_size); + else + /* sub rsp, stack_size */ + EMIT4(0x48, 0x83, 0xEC, stack_size); + /* mov QWORD PTR [rbp - rbx_off], rbx */ + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off); /* Store number of argument registers of the traced function: * mov rax, nr_regs @@ -2231,7 +2483,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off); } - save_regs(m, &prog, nr_regs, regs_off); + save_args(m, &prog, regs_off, false); if (flags & BPF_TRAMP_F_CALL_ORIG) { /* arg1: mov rdi, im */ @@ -2261,7 +2513,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (flags & BPF_TRAMP_F_CALL_ORIG) { - restore_regs(m, &prog, nr_regs, regs_off); + restore_regs(m, &prog, regs_off); + save_args(m, &prog, arg_stack_off, true); if (flags & BPF_TRAMP_F_ORIG_STACK) { emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); @@ -2302,7 +2555,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i } if (flags & BPF_TRAMP_F_RESTORE_REGS) - restore_regs(m, &prog, nr_regs, regs_off); + restore_regs(m, &prog, regs_off); /* This needs to be done regardless. If there were fmod_ret programs, * the return value is only updated on the stack and still needs to be @@ -2321,7 +2574,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (save_ret) emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); - EMIT1(0x5B); /* pop rbx */ + emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off); EMIT1(0xC9); /* leave */ if (flags & BPF_TRAMP_F_SKIP_FRAME) /* skip our return address and return to parent */ diff --git a/arch/x86/platform/efi/memmap.c b/arch/x86/platform/efi/memmap.c index c69f8471e6d0..4ef20b49eb5e 100644 --- a/arch/x86/platform/efi/memmap.c +++ b/arch/x86/platform/efi/memmap.c @@ -82,7 +82,7 @@ int __init efi_memmap_alloc(unsigned int num_entries, /** * efi_memmap_install - Install a new EFI memory map in efi.memmap - * @ctx: map allocation parameters (address, size, flags) + * @data: efi memmap installation parameters * * Unlike efi_memmap_init_*(), this function does not allow the caller * to switch from early to late mappings. It simply uses the existing diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index a60af0230e27..a6ab43f69b7d 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -202,21 +202,17 @@ static int param_set_action(const char *val, const struct kernel_param *kp) { int i; int n = ARRAY_SIZE(valid_acts); - char arg[ACTION_LEN], *p; + char arg[ACTION_LEN]; /* (remove possible '\n') */ - strncpy(arg, val, ACTION_LEN - 1); - arg[ACTION_LEN - 1] = '\0'; - p = strchr(arg, '\n'); - if (p) - *p = '\0'; + strscpy(arg, val, strnchrnul(val, sizeof(arg)-1, '\n') - val + 1); for (i = 0; i < n; i++) if (!strcmp(arg, valid_acts[i].action)) break; if (i < n) { - strcpy(uv_nmi_action, arg); + strscpy(uv_nmi_action, arg, sizeof(uv_nmi_action)); pr_info("UV: New NMI action:%s\n", uv_nmi_action); return 0; } @@ -959,7 +955,7 @@ static int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) /* Unexpected return, revert action to "dump" */ if (master) - strncpy(uv_nmi_action, "dump", strlen(uv_nmi_action)); + strscpy(uv_nmi_action, "dump", sizeof(uv_nmi_action)); } /* Pause as all CPU's enter the NMI handler */ diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 7558139920f8..aea47e793963 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -14,6 +14,7 @@ #include <crypto/sha2.h> #include <asm/purgatory.h> +#include "../boot/compressed/error.h" #include "../boot/string.h" u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(".kexec-purgatory"); diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index 93b658248d01..27fc170838e9 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -79,7 +79,7 @@ #ifdef CONFIG_ACPI #include <linux/acpi.h> #include <asm/acpi.h> -#include <acpi/pdc_intel.h> +#include <acpi/proc_cap_intel.h> #include <acpi/processor.h> #include <xen/interface/platform.h> #endif @@ -288,17 +288,17 @@ static bool __init xen_check_mwait(void) native_cpuid(&ax, &bx, &cx, &dx); - /* Ask the Hypervisor whether to clear ACPI_PDC_C_C2C3_FFH. If so, + /* Ask the Hypervisor whether to clear ACPI_PROC_CAP_C_C2C3_FFH. If so, * don't expose MWAIT_LEAF and let ACPI pick the IOPORT version of C3. */ buf[0] = ACPI_PDC_REVISION_ID; buf[1] = 1; - buf[2] = (ACPI_PDC_C_CAPABILITY_SMP | ACPI_PDC_EST_CAPABILITY_SWSMP); + buf[2] = (ACPI_PROC_CAP_C_CAPABILITY_SMP | ACPI_PROC_CAP_EST_CAPABILITY_SWSMP); set_xen_guest_handle(op.u.set_pminfo.pdc, buf); if ((HYPERVISOR_platform_op(&op) == 0) && - (buf[2] & (ACPI_PDC_C_C1_FFH | ACPI_PDC_C_C2C3_FFH))) { + (buf[2] & (ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH))) { cpuid_leaf5_ecx_val = cx; cpuid_leaf5_edx_val = dx; } @@ -523,7 +523,7 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) BUG_ON(size > PAGE_SIZE); BUG_ON(va & ~PAGE_MASK); - pfn = virt_to_pfn(va); + pfn = virt_to_pfn((void *)va); mfn = pfn_to_mfn(pfn); pte = pfn_pte(pfn, PAGE_KERNEL_RO); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 8796ec310483..1b5cba70c236 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -2202,13 +2202,13 @@ static void xen_zap_pfn_range(unsigned long vaddr, unsigned int order, mcs = __xen_mc_entry(0); if (in_frames) - in_frames[i] = virt_to_mfn(vaddr); + in_frames[i] = virt_to_mfn((void *)vaddr); MULTI_update_va_mapping(mcs.mc, vaddr, VOID_PTE, 0); - __set_phys_to_machine(virt_to_pfn(vaddr), INVALID_P2M_ENTRY); + __set_phys_to_machine(virt_to_pfn((void *)vaddr), INVALID_P2M_ENTRY); if (out_frames) - out_frames[i] = virt_to_pfn(vaddr); + out_frames[i] = virt_to_pfn((void *)vaddr); } xen_mc_issue(0); } @@ -2250,7 +2250,7 @@ static void xen_remap_exchanged_ptes(unsigned long vaddr, int order, MULTI_update_va_mapping(mcs.mc, vaddr, mfn_pte(mfn, PAGE_KERNEL), flags); - set_phys_to_machine(virt_to_pfn(vaddr), mfn); + set_phys_to_machine(virt_to_pfn((void *)vaddr), mfn); } xen_mc_issue(0); @@ -2310,12 +2310,6 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, int success; unsigned long vstart = (unsigned long)phys_to_virt(pstart); - /* - * Currently an auto-translated guest will not perform I/O, nor will - * it require PAE page directories below 4GB. Therefore any calls to - * this function are redundant and can be ignored. - */ - if (unlikely(order > MAX_CONTIG_ORDER)) return -ENOMEM; @@ -2327,7 +2321,7 @@ int xen_create_contiguous_region(phys_addr_t pstart, unsigned int order, xen_zap_pfn_range(vstart, order, in_frames, NULL); /* 2. Get a new contiguous memory extent. */ - out_frame = virt_to_pfn(vstart); + out_frame = virt_to_pfn((void *)vstart); success = xen_exchange_memory(1UL << order, 0, in_frames, 1, order, &out_frame, address_bits); @@ -2360,7 +2354,7 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) spin_lock_irqsave(&xen_reservation_lock, flags); /* 1. Find start MFN of contiguous extent. */ - in_frame = virt_to_mfn(vstart); + in_frame = virt_to_mfn((void *)vstart); /* 2. Zap current PTEs. */ xen_zap_pfn_range(vstart, order, NULL, out_frames); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 8b5cf7bb1f47..50c998b844fb 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -340,7 +340,7 @@ static void __init xen_do_set_identity_and_remap_chunk( WARN_ON(size == 0); - mfn_save = virt_to_mfn(buf); + mfn_save = virt_to_mfn((void *)buf); for (ident_pfn_iter = start_pfn, remap_pfn_iter = remap_pfn; ident_pfn_iter < ident_end_pfn; @@ -503,7 +503,7 @@ void __init xen_remap_memory(void) unsigned long pfn_s = ~0UL; unsigned long len = 0; - mfn_save = virt_to_mfn(buf); + mfn_save = virt_to_mfn((void *)buf); while (xen_remap_mfn != INVALID_P2M_ENTRY) { /* Map the remap information */ diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 2b69c3c035b6..fc1a4f3c81d9 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -422,3 +422,4 @@ 449 common futex_waitv sys_futex_waitv 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 |