diff options
Diffstat (limited to 'arch/riscv')
51 files changed, 1047 insertions, 272 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 3ee60ddef93e..410d1d6e1b6c 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -173,6 +173,8 @@ config RISCV select PCI_DOMAINS_GENERIC if PCI select PCI_MSI if PCI select RISCV_ALTERNATIVE if !XIP_KERNEL + select RISCV_APLIC + select RISCV_IMSIC select RISCV_INTC select RISCV_TIMER if RISCV_SBI select SIFIVE_PLIC diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 910ba8837add..2acc7d876e1f 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -82,14 +82,14 @@ config ERRATA_THEAD Otherwise, please say "N" here to avoid unnecessary overhead. -config ERRATA_THEAD_PBMT - bool "Apply T-Head memory type errata" +config ERRATA_THEAD_MAE + bool "Apply T-Head's memory attribute extension (XTheadMae) errata" depends on ERRATA_THEAD && 64BIT && MMU select RISCV_ALTERNATIVE_EARLY default y help - This will apply the memory type errata to handle the non-standard - memory type bits in page-table-entries on T-Head SoCs. + This will apply the memory attribute extension errata to handle the + non-standard PTE utilization on T-Head SoCs (XTheadMae). If you don't know what to do here, say "Y". diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 623de5f8a208..f51bb24bc84c 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -1,12 +1,12 @@ menu "SoC selection" config ARCH_MICROCHIP_POLARFIRE - def_bool SOC_MICROCHIP_POLARFIRE + def_bool ARCH_MICROCHIP -config SOC_MICROCHIP_POLARFIRE - bool "Microchip PolarFire SoCs" +config ARCH_MICROCHIP + bool "Microchip SoCs" help - This enables support for Microchip PolarFire SoC platforms. + This enables support for Microchip SoC platforms. config ARCH_RENESAS bool "Renesas RISC-V SoCs" @@ -14,9 +14,6 @@ config ARCH_RENESAS This enables support for the RISC-V based Renesas SoCs. config ARCH_SIFIVE - def_bool SOC_SIFIVE - -config SOC_SIFIVE bool "SiFive SoCs" select ERRATA_SIFIVE if !XIP_KERNEL help @@ -55,9 +52,6 @@ config ARCH_THEAD This enables support for the RISC-V based T-HEAD SoCs. config ARCH_VIRT - def_bool SOC_VIRT - -config SOC_VIRT bool "QEMU Virt Machine" select CLINT_TIMER if RISCV_M_MODE select POWER_RESET @@ -72,11 +66,13 @@ config SOC_VIRT This enables support for QEMU Virt Machine. config ARCH_CANAAN - def_bool SOC_CANAAN + bool "Canaan Kendryte SoC" + help + This enables support for Canaan Kendryte series SoC platform hardware. -config SOC_CANAAN +config SOC_CANAAN_K210 bool "Canaan Kendryte K210 SoC" - depends on !MMU + depends on !MMU && ARCH_CANAAN select CLINT_TIMER if RISCV_M_MODE select ARCH_HAS_RESET_CONTROLLER select PINCTRL diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 5b3115a19852..1e002d8003c5 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -154,7 +154,7 @@ vdso-install-y += arch/riscv/kernel/vdso/vdso.so.dbg vdso-install-$(CONFIG_COMPAT) += arch/riscv/kernel/compat_vdso/compat_vdso.so.dbg ifneq ($(CONFIG_XIP_KERNEL),y) -ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_ARCH_CANAAN),yy) +ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN_K210),yy) KBUILD_IMAGE := $(boot)/loader.bin else ifeq ($(CONFIG_EFI_ZBOOT),) diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 8e7fc0edf21d..869c0345b908 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -14,8 +14,6 @@ # Based on the ia64 and arm64 boot/Makefile. # -KCOV_INSTRUMENT := n - OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S OBJCOPYFLAGS_loader.bin :=-O binary OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S diff --git a/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi b/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi index f35324b9173c..e0ddf8f602c7 100644 --- a/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi +++ b/arch/riscv/boot/dts/renesas/r9a07g043f.dtsi @@ -54,6 +54,81 @@ dma-noncoherent; interrupt-parent = <&plic>; + irqc: interrupt-controller@110a0000 { + compatible = "renesas,r9a07g043f-irqc"; + reg = <0 0x110a0000 0 0x20000>; + #interrupt-cells = <2>; + #address-cells = <0>; + interrupt-controller; + interrupts = <32 IRQ_TYPE_LEVEL_HIGH>, + <33 IRQ_TYPE_LEVEL_HIGH>, + <34 IRQ_TYPE_LEVEL_HIGH>, + <35 IRQ_TYPE_LEVEL_HIGH>, + <36 IRQ_TYPE_LEVEL_HIGH>, + <37 IRQ_TYPE_LEVEL_HIGH>, + <38 IRQ_TYPE_LEVEL_HIGH>, + <39 IRQ_TYPE_LEVEL_HIGH>, + <40 IRQ_TYPE_LEVEL_HIGH>, + <476 IRQ_TYPE_LEVEL_HIGH>, + <477 IRQ_TYPE_LEVEL_HIGH>, + <478 IRQ_TYPE_LEVEL_HIGH>, + <479 IRQ_TYPE_LEVEL_HIGH>, + <480 IRQ_TYPE_LEVEL_HIGH>, + <481 IRQ_TYPE_LEVEL_HIGH>, + <482 IRQ_TYPE_LEVEL_HIGH>, + <483 IRQ_TYPE_LEVEL_HIGH>, + <484 IRQ_TYPE_LEVEL_HIGH>, + <485 IRQ_TYPE_LEVEL_HIGH>, + <486 IRQ_TYPE_LEVEL_HIGH>, + <487 IRQ_TYPE_LEVEL_HIGH>, + <488 IRQ_TYPE_LEVEL_HIGH>, + <489 IRQ_TYPE_LEVEL_HIGH>, + <490 IRQ_TYPE_LEVEL_HIGH>, + <491 IRQ_TYPE_LEVEL_HIGH>, + <492 IRQ_TYPE_LEVEL_HIGH>, + <493 IRQ_TYPE_LEVEL_HIGH>, + <494 IRQ_TYPE_LEVEL_HIGH>, + <495 IRQ_TYPE_LEVEL_HIGH>, + <496 IRQ_TYPE_LEVEL_HIGH>, + <497 IRQ_TYPE_LEVEL_HIGH>, + <498 IRQ_TYPE_LEVEL_HIGH>, + <499 IRQ_TYPE_LEVEL_HIGH>, + <500 IRQ_TYPE_LEVEL_HIGH>, + <501 IRQ_TYPE_LEVEL_HIGH>, + <502 IRQ_TYPE_LEVEL_HIGH>, + <503 IRQ_TYPE_LEVEL_HIGH>, + <504 IRQ_TYPE_LEVEL_HIGH>, + <505 IRQ_TYPE_LEVEL_HIGH>, + <506 IRQ_TYPE_LEVEL_HIGH>, + <507 IRQ_TYPE_LEVEL_HIGH>, + <57 IRQ_TYPE_LEVEL_HIGH>, + <66 IRQ_TYPE_EDGE_RISING>, + <67 IRQ_TYPE_EDGE_RISING>, + <68 IRQ_TYPE_EDGE_RISING>, + <69 IRQ_TYPE_EDGE_RISING>, + <70 IRQ_TYPE_EDGE_RISING>, + <71 IRQ_TYPE_EDGE_RISING>; + interrupt-names = "nmi", + "irq0", "irq1", "irq2", "irq3", + "irq4", "irq5", "irq6", "irq7", + "tint0", "tint1", "tint2", "tint3", + "tint4", "tint5", "tint6", "tint7", + "tint8", "tint9", "tint10", "tint11", + "tint12", "tint13", "tint14", "tint15", + "tint16", "tint17", "tint18", "tint19", + "tint20", "tint21", "tint22", "tint23", + "tint24", "tint25", "tint26", "tint27", + "tint28", "tint29", "tint30", "tint31", + "bus-err", "ec7tie1-0", "ec7tie2-0", + "ec7tiovf-0", "ec7tie1-1", "ec7tie2-1", + "ec7tiovf-1"; + clocks = <&cpg CPG_MOD R9A07G043_IAX45_CLK>, + <&cpg CPG_MOD R9A07G043_IAX45_PCLK>; + clock-names = "clk", "pclk"; + power-domains = <&cpg>; + resets = <&cpg R9A07G043_IAX45_RESETN>; + }; + plic: interrupt-controller@12c00000 { compatible = "renesas,r9a07g043-plic", "andestech,nceplic100"; #interrupt-cells = <2>; diff --git a/arch/riscv/boot/dts/renesas/rzfive-smarc-som.dtsi b/arch/riscv/boot/dts/renesas/rzfive-smarc-som.dtsi index 433ab5c6a626..5e808242649e 100644 --- a/arch/riscv/boot/dts/renesas/rzfive-smarc-som.dtsi +++ b/arch/riscv/boot/dts/renesas/rzfive-smarc-som.dtsi @@ -6,19 +6,3 @@ */ #include <arm64/renesas/rzg2ul-smarc-som.dtsi> - -#if (!SW_ET0_EN_N) -ð0 { - phy0: ethernet-phy@7 { - /delete-property/ interrupt-parent; - /delete-property/ interrupts; - }; -}; -#endif - -ð1 { - phy1: ethernet-phy@7 { - /delete-property/ interrupt-parent; - /delete-property/ interrupts; - }; -}; diff --git a/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts b/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts index 3af9e34b3bc7..cd013588adc0 100644 --- a/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts +++ b/arch/riscv/boot/dts/sophgo/cv1800b-milkv-duo.dts @@ -23,9 +23,15 @@ stdout-path = "serial0:115200n8"; }; - memory@80000000 { - device_type = "memory"; - reg = <0x80000000 0x3f40000>; + reserved-memory { + #address-cells = <1>; + #size-cells = <1>; + ranges; + + coprocessor_rtos: region@83f40000 { + reg = <0x83f40000 0xc0000>; + no-map; + }; }; }; @@ -33,6 +39,14 @@ clock-frequency = <25000000>; }; +&sdhci0 { + status = "okay"; + bus-width = <4>; + no-1-8-v; + no-mmc; + no-sdio; +}; + &uart0 { status = "okay"; }; diff --git a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi index 165e9e320a8c..ec9530972ae2 100644 --- a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi +++ b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi @@ -7,6 +7,11 @@ / { compatible = "sophgo,cv1800b"; + + memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x4000000>; + }; }; &plic { @@ -16,3 +21,7 @@ &clint { compatible = "sophgo,cv1800b-clint", "thead,c900-clint"; }; + +&clk { + compatible = "sophgo,cv1800-clk"; +}; diff --git a/arch/riscv/boot/dts/sophgo/cv1812h.dtsi b/arch/riscv/boot/dts/sophgo/cv1812h.dtsi index 3e7a942f5c1a..7fa4c1e2d1da 100644 --- a/arch/riscv/boot/dts/sophgo/cv1812h.dtsi +++ b/arch/riscv/boot/dts/sophgo/cv1812h.dtsi @@ -22,3 +22,7 @@ &clint { compatible = "sophgo,cv1812h-clint", "thead,c900-clint"; }; + +&clk { + compatible = "sophgo,cv1810-clk"; +}; diff --git a/arch/riscv/boot/dts/sophgo/cv18xx.dtsi b/arch/riscv/boot/dts/sophgo/cv18xx.dtsi index 2d6f4a4b1e58..891932ae470f 100644 --- a/arch/riscv/boot/dts/sophgo/cv18xx.dtsi +++ b/arch/riscv/boot/dts/sophgo/cv18xx.dtsi @@ -4,6 +4,8 @@ * Copyright (C) 2023 Inochi Amaoto <inochiama@outlook.com> */ +#include <dt-bindings/clock/sophgo,cv1800.h> +#include <dt-bindings/gpio/gpio.h> #include <dt-bindings/interrupt-controller/irq.h> / { @@ -53,6 +55,12 @@ dma-noncoherent; ranges; + clk: clock-controller@3002000 { + reg = <0x03002000 0x1000>; + clocks = <&osc>; + #clock-cells = <1>; + }; + gpio0: gpio@3020000 { compatible = "snps,dw-apb-gpio"; reg = <0x3020000 0x1000>; @@ -125,11 +133,67 @@ }; }; + i2c0: i2c@4000000 { + compatible = "snps,designware-i2c"; + reg = <0x04000000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clk CLK_I2C>, <&clk CLK_APB_I2C0>; + clock-names = "ref", "pclk"; + interrupts = <49 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + i2c1: i2c@4010000 { + compatible = "snps,designware-i2c"; + reg = <0x04010000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clk CLK_I2C>, <&clk CLK_APB_I2C1>; + clock-names = "ref", "pclk"; + interrupts = <50 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + i2c2: i2c@4020000 { + compatible = "snps,designware-i2c"; + reg = <0x04020000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clk CLK_I2C>, <&clk CLK_APB_I2C2>; + clock-names = "ref", "pclk"; + interrupts = <51 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + i2c3: i2c@4030000 { + compatible = "snps,designware-i2c"; + reg = <0x04030000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clk CLK_I2C>, <&clk CLK_APB_I2C3>; + clock-names = "ref", "pclk"; + interrupts = <52 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + i2c4: i2c@4040000 { + compatible = "snps,designware-i2c"; + reg = <0x04040000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clk CLK_I2C>, <&clk CLK_APB_I2C4>; + clock-names = "ref", "pclk"; + interrupts = <53 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + uart0: serial@4140000 { compatible = "snps,dw-apb-uart"; reg = <0x04140000 0x100>; interrupts = <44 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&osc>; + clocks = <&clk CLK_UART0>, <&clk CLK_APB_UART0>; + clock-names = "baudclk", "apb_pclk"; reg-shift = <2>; reg-io-width = <4>; status = "disabled"; @@ -139,7 +203,8 @@ compatible = "snps,dw-apb-uart"; reg = <0x04150000 0x100>; interrupts = <45 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&osc>; + clocks = <&clk CLK_UART1>, <&clk CLK_APB_UART1>; + clock-names = "baudclk", "apb_pclk"; reg-shift = <2>; reg-io-width = <4>; status = "disabled"; @@ -149,7 +214,8 @@ compatible = "snps,dw-apb-uart"; reg = <0x04160000 0x100>; interrupts = <46 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&osc>; + clocks = <&clk CLK_UART2>, <&clk CLK_APB_UART2>; + clock-names = "baudclk", "apb_pclk"; reg-shift = <2>; reg-io-width = <4>; status = "disabled"; @@ -159,22 +225,78 @@ compatible = "snps,dw-apb-uart"; reg = <0x04170000 0x100>; interrupts = <47 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&osc>; + clocks = <&clk CLK_UART3>, <&clk CLK_APB_UART3>; + clock-names = "baudclk", "apb_pclk"; reg-shift = <2>; reg-io-width = <4>; status = "disabled"; }; + spi0: spi@4180000 { + compatible = "snps,dw-apb-ssi"; + reg = <0x04180000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clk CLK_SPI>, <&clk CLK_APB_SPI0>; + clock-names = "ssi_clk", "pclk"; + interrupts = <54 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + spi1: spi@4190000 { + compatible = "snps,dw-apb-ssi"; + reg = <0x04190000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clk CLK_SPI>, <&clk CLK_APB_SPI1>; + clock-names = "ssi_clk", "pclk"; + interrupts = <55 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + spi2: spi@41a0000 { + compatible = "snps,dw-apb-ssi"; + reg = <0x041a0000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clk CLK_SPI>, <&clk CLK_APB_SPI2>; + clock-names = "ssi_clk", "pclk"; + interrupts = <56 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + + spi3: spi@41b0000 { + compatible = "snps,dw-apb-ssi"; + reg = <0x041b0000 0x10000>; + #address-cells = <1>; + #size-cells = <0>; + clocks = <&clk CLK_SPI>, <&clk CLK_APB_SPI3>; + clock-names = "ssi_clk", "pclk"; + interrupts = <57 IRQ_TYPE_LEVEL_HIGH>; + status = "disabled"; + }; + uart4: serial@41c0000 { compatible = "snps,dw-apb-uart"; reg = <0x041c0000 0x100>; interrupts = <48 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&osc>; + clocks = <&clk CLK_UART4>, <&clk CLK_APB_UART4>; + clock-names = "baudclk", "apb_pclk"; reg-shift = <2>; reg-io-width = <4>; status = "disabled"; }; + sdhci0: mmc@4310000 { + compatible = "sophgo,cv1800b-dwcmshc"; + reg = <0x4310000 0x1000>; + interrupts = <36 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&clk CLK_AXI4_SD0>, + <&clk CLK_SD0>; + clock-names = "core", "bus"; + status = "disabled"; + }; + plic: interrupt-controller@70000000 { reg = <0x70000000 0x4000000>; interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 9>; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index fc0ec2ee13bc..3cae018f9315 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -25,14 +25,15 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set CONFIG_PROFILING=y -CONFIG_SOC_MICROCHIP_POLARFIRE=y +CONFIG_ARCH_MICROCHIP=y CONFIG_ARCH_RENESAS=y -CONFIG_SOC_SIFIVE=y +CONFIG_ARCH_SIFIVE=y CONFIG_ARCH_SOPHGO=y CONFIG_SOC_STARFIVE=y CONFIG_ARCH_SUNXI=y CONFIG_ARCH_THEAD=y -CONFIG_SOC_VIRT=y +CONFIG_ARCH_VIRT=y +CONFIG_ARCH_CANAAN=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y CONFIG_PM=y diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig index 7e75200543f4..af9601da4643 100644 --- a/arch/riscv/configs/nommu_k210_defconfig +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -11,7 +11,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_SYSFS_SYSCALL is not set # CONFIG_FHANDLE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -27,7 +27,8 @@ CONFIG_EXPERT=y CONFIG_SLUB=y CONFIG_SLUB_TINY=y # CONFIG_MMU is not set -CONFIG_SOC_CANAAN=y +CONFIG_ARCH_CANAAN=y +CONFIG_SOC_CANAAN_K210=y CONFIG_NONPORTABLE=y CONFIG_SMP=y CONFIG_NR_CPUS=2 diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig index 0ba353e9ca71..dd460c649152 100644 --- a/arch/riscv/configs/nommu_k210_sdcard_defconfig +++ b/arch/riscv/configs/nommu_k210_sdcard_defconfig @@ -3,7 +3,7 @@ CONFIG_LOG_BUF_SHIFT=13 CONFIG_CC_OPTIMIZE_FOR_SIZE=y # CONFIG_SYSFS_SYSCALL is not set # CONFIG_FHANDLE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_FUTEX is not set # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set @@ -19,7 +19,8 @@ CONFIG_EXPERT=y CONFIG_SLUB=y CONFIG_SLUB_TINY=y # CONFIG_MMU is not set -CONFIG_SOC_CANAAN=y +CONFIG_ARCH_CANAAN=y +CONFIG_SOC_CANAAN_K210=y CONFIG_NONPORTABLE=y CONFIG_SMP=y CONFIG_NR_CPUS=2 diff --git a/arch/riscv/configs/nommu_virt_defconfig b/arch/riscv/configs/nommu_virt_defconfig index b794e2f8144e..d4b03dc3c2c0 100644 --- a/arch/riscv/configs/nommu_virt_defconfig +++ b/arch/riscv/configs/nommu_virt_defconfig @@ -10,7 +10,7 @@ CONFIG_CC_OPTIMIZE_FOR_SIZE=y CONFIG_EXPERT=y # CONFIG_SYSFS_SYSCALL is not set # CONFIG_FHANDLE is not set -# CONFIG_BASE_FULL is not set +CONFIG_BASE_SMALL=y # CONFIG_EPOLL is not set # CONFIG_SIGNALFD is not set # CONFIG_TIMERFD is not set @@ -24,7 +24,7 @@ CONFIG_EXPERT=y CONFIG_SLUB=y CONFIG_SLUB_TINY=y # CONFIG_MMU is not set -CONFIG_SOC_VIRT=y +CONFIG_ARCH_VIRT=y CONFIG_NONPORTABLE=y CONFIG_SMP=y CONFIG_CMDLINE="root=/dev/vda rw earlycon=uart8250,mmio,0x10000000,115200n8 console=ttyS0" diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c index b1c410bbc1ae..bf6a0a6318ee 100644 --- a/arch/riscv/errata/thead/errata.c +++ b/arch/riscv/errata/thead/errata.c @@ -19,20 +19,26 @@ #include <asm/patch.h> #include <asm/vendorid_list.h> -static bool errata_probe_pbmt(unsigned int stage, - unsigned long arch_id, unsigned long impid) +#define CSR_TH_SXSTATUS 0x5c0 +#define SXSTATUS_MAEE _AC(0x200000, UL) + +static bool errata_probe_mae(unsigned int stage, + unsigned long arch_id, unsigned long impid) { - if (!IS_ENABLED(CONFIG_ERRATA_THEAD_PBMT)) + if (!IS_ENABLED(CONFIG_ERRATA_THEAD_MAE)) return false; if (arch_id != 0 || impid != 0) return false; - if (stage == RISCV_ALTERNATIVES_EARLY_BOOT || - stage == RISCV_ALTERNATIVES_MODULE) - return true; + if (stage != RISCV_ALTERNATIVES_EARLY_BOOT && + stage != RISCV_ALTERNATIVES_MODULE) + return false; + + if (!(csr_read(CSR_TH_SXSTATUS) & SXSTATUS_MAEE)) + return false; - return false; + return true; } /* @@ -140,8 +146,8 @@ static u32 thead_errata_probe(unsigned int stage, { u32 cpu_req_errata = 0; - if (errata_probe_pbmt(stage, archid, impid)) - cpu_req_errata |= BIT(ERRATA_THEAD_PBMT); + if (errata_probe_mae(stage, archid, impid)) + cpu_req_errata |= BIT(ERRATA_THEAD_MAE); errata_probe_cmo(stage, archid, impid); diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index 2468c55933cd..25966995da04 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -168,7 +168,8 @@ #define VSIP_TO_HVIP_SHIFT (IRQ_VS_SOFT - IRQ_S_SOFT) #define VSIP_VALID_MASK ((_AC(1, UL) << IRQ_S_SOFT) | \ (_AC(1, UL) << IRQ_S_TIMER) | \ - (_AC(1, UL) << IRQ_S_EXT)) + (_AC(1, UL) << IRQ_S_EXT) | \ + (_AC(1, UL) << IRQ_PMU_OVF)) /* AIA CSR bits */ #define TOPI_IID_SHIFT 16 @@ -281,7 +282,7 @@ #define CSR_HPMCOUNTER30H 0xc9e #define CSR_HPMCOUNTER31H 0xc9f -#define CSR_SSCOUNTOVF 0xda0 +#define CSR_SCOUNTOVF 0xda0 #define CSR_SSTATUS 0x100 #define CSR_SIE 0x104 diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 1f2dbfb8a8bf..efd851e1b483 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -23,7 +23,7 @@ #endif #ifdef CONFIG_ERRATA_THEAD -#define ERRATA_THEAD_PBMT 0 +#define ERRATA_THEAD_MAE 0 #define ERRATA_THEAD_PMU 1 #define ERRATA_THEAD_NUMBER 2 #endif @@ -53,20 +53,20 @@ asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \ * in the default case. */ #define ALT_SVPBMT_SHIFT 61 -#define ALT_THEAD_PBMT_SHIFT 59 +#define ALT_THEAD_MAE_SHIFT 59 #define ALT_SVPBMT(_val, prot) \ asm(ALTERNATIVE_2("li %0, 0\t\nnop", \ "li %0, %1\t\nslli %0,%0,%3", 0, \ RISCV_ISA_EXT_SVPBMT, CONFIG_RISCV_ISA_SVPBMT, \ "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ - ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \ + ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \ : "=r"(_val) \ : "I"(prot##_SVPBMT >> ALT_SVPBMT_SHIFT), \ - "I"(prot##_THEAD >> ALT_THEAD_PBMT_SHIFT), \ + "I"(prot##_THEAD >> ALT_THEAD_MAE_SHIFT), \ "I"(ALT_SVPBMT_SHIFT), \ - "I"(ALT_THEAD_PBMT_SHIFT)) + "I"(ALT_THEAD_MAE_SHIFT)) -#ifdef CONFIG_ERRATA_THEAD_PBMT +#ifdef CONFIG_ERRATA_THEAD_MAE /* * IO/NOCACHE memory types are handled together with svpbmt, * so on T-Head chips, check if no other memory type is set, @@ -83,11 +83,11 @@ asm volatile(ALTERNATIVE( \ "slli t3, t3, %3\n\t" \ "or %0, %0, t3\n\t" \ "2:", THEAD_VENDOR_ID, \ - ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \ + ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \ : "+r"(_val) \ - : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \ - "I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \ - "I"(ALT_THEAD_PBMT_SHIFT) \ + : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_MAE_SHIFT), \ + "I"(_PAGE_PMA_THEAD >> ALT_THEAD_MAE_SHIFT), \ + "I"(ALT_THEAD_MAE_SHIFT) \ : "t3") #else #define ALT_THEAD_PMA(_val) diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index 484d04a92fa6..d96281278586 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -43,6 +43,17 @@ KVM_ARCH_REQ_FLAGS(5, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(6) +#define KVM_HEDELEG_DEFAULT (BIT(EXC_INST_MISALIGNED) | \ + BIT(EXC_BREAKPOINT) | \ + BIT(EXC_SYSCALL) | \ + BIT(EXC_INST_PAGE_FAULT) | \ + BIT(EXC_LOAD_PAGE_FAULT) | \ + BIT(EXC_STORE_PAGE_FAULT)) + +#define KVM_HIDELEG_DEFAULT (BIT(IRQ_VS_SOFT) | \ + BIT(IRQ_VS_TIMER) | \ + BIT(IRQ_VS_EXT)) + enum kvm_riscv_hfence_type { KVM_RISCV_HFENCE_UNKNOWN = 0, KVM_RISCV_HFENCE_GVMA_VMID_GPA, @@ -169,6 +180,7 @@ struct kvm_vcpu_csr { struct kvm_vcpu_config { u64 henvcfg; u64 hstateen0; + unsigned long hedeleg; }; struct kvm_vcpu_smstateen_csr { @@ -211,6 +223,7 @@ struct kvm_vcpu_arch { /* CPU context upon Guest VCPU reset */ struct kvm_cpu_context guest_reset_context; + spinlock_t reset_cntx_lock; /* CPU CSR context upon Guest VCPU reset */ struct kvm_vcpu_csr guest_reset_csr; @@ -252,8 +265,9 @@ struct kvm_vcpu_arch { /* Cache pages needed to program page tables with spinlock held */ struct kvm_mmu_memory_cache mmu_page_cache; - /* VCPU power-off state */ - bool power_off; + /* VCPU power state */ + struct kvm_mp_state mp_state; + spinlock_t mp_state_lock; /* Don't run the VCPU (blocked) */ bool pause; @@ -374,8 +388,11 @@ int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu); bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask); +void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); +void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); +bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_sbi_sta_reset(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_record_steal_time(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/include/asm/kvm_vcpu_pmu.h b/arch/riscv/include/asm/kvm_vcpu_pmu.h index 395518a1664e..fa0f535bbbf0 100644 --- a/arch/riscv/include/asm/kvm_vcpu_pmu.h +++ b/arch/riscv/include/asm/kvm_vcpu_pmu.h @@ -20,7 +20,7 @@ static_assert(RISCV_KVM_MAX_COUNTERS <= 64); struct kvm_fw_event { /* Current value of the event */ - unsigned long value; + u64 value; /* Event monitoring status */ bool started; @@ -36,6 +36,7 @@ struct kvm_pmc { bool started; /* Monitoring event ID */ unsigned long event_idx; + struct kvm_vcpu *vcpu; }; /* PMU data structure per vcpu */ @@ -50,6 +51,12 @@ struct kvm_pmu { bool init_done; /* Bit map of all the virtual counter used */ DECLARE_BITMAP(pmc_in_use, RISCV_KVM_MAX_COUNTERS); + /* Bit map of all the virtual counter overflown */ + DECLARE_BITMAP(pmc_overflown, RISCV_KVM_MAX_COUNTERS); + /* The address of the counter snapshot area (guest physical address) */ + gpa_t snapshot_addr; + /* The actual data of the snapshot */ + struct riscv_pmu_snapshot_data *sdata; }; #define vcpu_to_pmu(vcpu) (&(vcpu)->arch.pmu_context) @@ -82,9 +89,14 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba unsigned long ctr_mask, unsigned long flags, unsigned long eidx, u64 evtdata, struct kvm_vcpu_sbi_return *retdata); -int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, +int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, struct kvm_vcpu_sbi_return *retdata); +int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + struct kvm_vcpu_sbi_return *retdata); void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu); +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long flags, + struct kvm_vcpu_sbi_return *retdata); void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu); void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu); diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 2947423b5082..115ac98b8d72 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -89,7 +89,7 @@ typedef struct page *pgtable_t; #define PTE_FMT "%08lx" #endif -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) && defined(CONFIG_MMU) /* * We override this value as its generic definition uses __pa too early in * the boot process (before kernel_map.va_pa_offset is set). diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 661b2b4fe758..55a7c3ec246b 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -55,6 +55,9 @@ #define MODULES_LOWEST_VADDR (KERNEL_LINK_ADDR - SZ_2G) #define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G) #define MODULES_END (PFN_ALIGN((unsigned long)&_start)) +#else +#define MODULES_VADDR VMALLOC_START +#define MODULES_END VMALLOC_END #endif /* @@ -897,7 +900,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define PAGE_SHARED __pgprot(0) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL -#define TASK_SIZE 0xffffffffUL +#define TASK_SIZE _AC(-1, UL) #define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 6e68f8dff76b..112a0a0d9f46 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -131,6 +131,8 @@ enum sbi_ext_pmu_fid { SBI_EXT_PMU_COUNTER_START, SBI_EXT_PMU_COUNTER_STOP, SBI_EXT_PMU_COUNTER_FW_READ, + SBI_EXT_PMU_COUNTER_FW_READ_HI, + SBI_EXT_PMU_SNAPSHOT_SET_SHMEM, }; union sbi_pmu_ctr_info { @@ -147,6 +149,13 @@ union sbi_pmu_ctr_info { }; }; +/* Data structure to contain the pmu snapshot data */ +struct riscv_pmu_snapshot_data { + u64 ctr_overflow_mask; + u64 ctr_values[64]; + u64 reserved[447]; +}; + #define RISCV_PMU_RAW_EVENT_MASK GENMASK_ULL(47, 0) #define RISCV_PMU_RAW_EVENT_IDX 0x20000 @@ -232,20 +241,22 @@ enum sbi_pmu_ctr_type { #define SBI_PMU_EVENT_IDX_INVALID 0xFFFFFFFF /* Flags defined for config matching function */ -#define SBI_PMU_CFG_FLAG_SKIP_MATCH (1 << 0) -#define SBI_PMU_CFG_FLAG_CLEAR_VALUE (1 << 1) -#define SBI_PMU_CFG_FLAG_AUTO_START (1 << 2) -#define SBI_PMU_CFG_FLAG_SET_VUINH (1 << 3) -#define SBI_PMU_CFG_FLAG_SET_VSINH (1 << 4) -#define SBI_PMU_CFG_FLAG_SET_UINH (1 << 5) -#define SBI_PMU_CFG_FLAG_SET_SINH (1 << 6) -#define SBI_PMU_CFG_FLAG_SET_MINH (1 << 7) +#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0) +#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1) +#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2) +#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3) +#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4) +#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5) +#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6) +#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7) /* Flags defined for counter start function */ -#define SBI_PMU_START_FLAG_SET_INIT_VALUE (1 << 0) +#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0) +#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1) /* Flags defined for counter stop function */ -#define SBI_PMU_STOP_FLAG_RESET (1 << 0) +#define SBI_PMU_STOP_FLAG_RESET BIT(0) +#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1) enum sbi_ext_dbcn_fid { SBI_EXT_DBCN_CONSOLE_WRITE = 0, @@ -266,7 +277,7 @@ struct sbi_sta_struct { u8 pad[47]; } __packed; -#define SBI_STA_SHMEM_DISABLE -1 +#define SBI_SHMEM_DISABLE -1 /* SBI spec version fields */ #define SBI_SPEC_VERSION_DEFAULT 0x1 @@ -284,6 +295,7 @@ struct sbi_sta_struct { #define SBI_ERR_ALREADY_AVAILABLE -6 #define SBI_ERR_ALREADY_STARTED -7 #define SBI_ERR_ALREADY_STOPPED -8 +#define SBI_ERR_NO_SHMEM -9 extern unsigned long sbi_spec_version; struct sbiret { @@ -355,8 +367,8 @@ static inline unsigned long sbi_minor_version(void) static inline unsigned long sbi_mk_version(unsigned long major, unsigned long minor) { - return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << - SBI_SPEC_VERSION_MAJOR_SHIFT) | minor; + return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT) + | (minor & SBI_SPEC_VERSION_MINOR_MASK); } int sbi_err_map_linux_errno(int err); diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h index 9f2a8e3ff204..2902f68dc913 100644 --- a/arch/riscv/include/uapi/asm/hwprobe.h +++ b/arch/riscv/include/uapi/asm/hwprobe.h @@ -54,7 +54,7 @@ struct riscv_hwprobe { #define RISCV_HWPROBE_EXT_ZFHMIN (1 << 28) #define RISCV_HWPROBE_EXT_ZIHINTNTL (1 << 29) #define RISCV_HWPROBE_EXT_ZVFH (1 << 30) -#define RISCV_HWPROBE_EXT_ZVFHMIN (1 << 31) +#define RISCV_HWPROBE_EXT_ZVFHMIN (1ULL << 31) #define RISCV_HWPROBE_EXT_ZFA (1ULL << 32) #define RISCV_HWPROBE_EXT_ZTSO (1ULL << 33) #define RISCV_HWPROBE_EXT_ZACAS (1ULL << 34) diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index b1c503c2959c..e878e7cc3978 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -167,6 +167,7 @@ enum KVM_RISCV_ISA_EXT_ID { KVM_RISCV_ISA_EXT_ZFA, KVM_RISCV_ISA_EXT_ZTSO, KVM_RISCV_ISA_EXT_ZACAS, + KVM_RISCV_ISA_EXT_SSCOFPMF, KVM_RISCV_ISA_EXT_MAX, }; diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 3df4cb788c1f..24e37d1ef7ec 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -34,12 +34,6 @@ obj-compat_vdso := $(addprefix $(obj)/, $(obj-compat_vdso)) obj-y += compat_vdso.o CPPFLAGS_compat_vdso.lds += -P -C -DCOMPAT_VDSO -U$(ARCH) -# Disable profiling and instrumentation for VDSO code -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n - # Force dependency $(obj)/compat_vdso.o: $(obj)/compat_vdso.so @@ -58,7 +52,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # Generate VDSO offsets using helper script -gen-compat_vdsosym := $(srctree)/$(src)/gen_compat_vdso_offsets.sh +gen-compat_vdsosym := $(src)/gen_compat_vdso_offsets.sh quiet_cmd_compat_vdsosym = VDSOSYM $@ cmd_compat_vdsosym = $(NM) $< | $(gen-compat_vdsosym) | LC_ALL=C sort > $@ diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 5e5a82644451..906f9a3a5d65 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -11,7 +11,6 @@ #include <linux/kernel.h> #include <linux/log2.h> #include <linux/moduleloader.h> -#include <linux/vmalloc.h> #include <linux/sizes.h> #include <linux/pgtable.h> #include <asm/alternative.h> @@ -905,17 +904,6 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, return 0; } -#if defined(CONFIG_MMU) && defined(CONFIG_64BIT) -void *module_alloc(unsigned long size) -{ - return __vmalloc_node_range(size, 1, MODULES_VADDR, - MODULES_END, GFP_KERNEL, - PAGE_KERNEL, VM_FLUSH_RESET_PERMS, - NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) diff --git a/arch/riscv/kernel/paravirt.c b/arch/riscv/kernel/paravirt.c index 0d6225fd3194..fa6b0339a65d 100644 --- a/arch/riscv/kernel/paravirt.c +++ b/arch/riscv/kernel/paravirt.c @@ -62,7 +62,7 @@ static int sbi_sta_steal_time_set_shmem(unsigned long lo, unsigned long hi, ret = sbi_ecall(SBI_EXT_STA, SBI_EXT_STA_STEAL_TIME_SET_SHMEM, lo, hi, flags, 0, 0, 0); if (ret.error) { - if (lo == SBI_STA_SHMEM_DISABLE && hi == SBI_STA_SHMEM_DISABLE) + if (lo == SBI_SHMEM_DISABLE && hi == SBI_SHMEM_DISABLE) pr_warn("Failed to disable steal-time shmem"); else pr_warn("Failed to set steal-time shmem"); @@ -84,8 +84,8 @@ static int pv_time_cpu_online(unsigned int cpu) static int pv_time_cpu_down_prepare(unsigned int cpu) { - return sbi_sta_steal_time_set_shmem(SBI_STA_SHMEM_DISABLE, - SBI_STA_SHMEM_DISABLE, 0); + return sbi_sta_steal_time_set_shmem(SBI_SHMEM_DISABLE, + SBI_SHMEM_DISABLE, 0); } static u64 pv_time_steal_clock(int cpu) diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile index b75f150b923d..50bc5ef7dd2f 100644 --- a/arch/riscv/kernel/pi/Makefile +++ b/arch/riscv/kernel/pi/Makefile @@ -17,12 +17,6 @@ KBUILD_CFLAGS += -mcmodel=medany CFLAGS_cmdline_early.o += -D__NO_FORTIFY CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY -GCOV_PROFILE := n -KASAN_SANITIZE := n -KCSAN_SANITIZE := n -UBSAN_SANITIZE := n -KCOV_INSTRUMENT := n - $(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \ --remove-section=.note.gnu.property \ --prefix-alloc-sections=.init.pi diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c index 7142ec42e889..a69dfa610aa8 100644 --- a/arch/riscv/kernel/probes/ftrace.c +++ b/arch/riscv/kernel/probes/ftrace.c @@ -11,6 +11,9 @@ void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, struct kprobe_ctlblk *kcb; int bit; + if (unlikely(kprobe_ftrace_disabled)) + return; + bit = ftrace_test_recursion_trylock(ip, parent_ip); if (bit < 0) return; diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 71a8b8945b26..dfb28e57d900 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -105,16 +105,6 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } -#ifdef CONFIG_MMU -void *alloc_insn_page(void) -{ - return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, - GFP_KERNEL, PAGE_KERNEL_READ_EXEC, - VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, - __builtin_return_address(0)); -} -#endif - /* install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 272c431ac5b9..f7ef8ad9b550 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -39,12 +39,6 @@ endif CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) CFLAGS_REMOVE_hwprobe.o = $(CC_FLAGS_FTRACE) $(CC_FLAGS_SCS) -# Disable profiling and instrumentation for VDSO code -GCOV_PROFILE := n -KCOV_INSTRUMENT := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n - # Force dependency $(obj)/vdso.o: $(obj)/vdso.so @@ -60,7 +54,7 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) # Generate VDSO offsets using helper script -gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +gen-vdsosym := $(src)/gen_vdso_offsets.sh quiet_cmd_vdsosym = VDSOSYM $@ cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ diff --git a/arch/riscv/kvm/Makefile b/arch/riscv/kvm/Makefile index c9646521f113..c2cacfbc06a0 100644 --- a/arch/riscv/kvm/Makefile +++ b/arch/riscv/kvm/Makefile @@ -3,7 +3,7 @@ # Makefile for RISC-V KVM support # -ccflags-y += -I $(srctree)/$(src) +ccflags-y += -I $(src) include $(srctree)/virt/kvm/Makefile.kvm diff --git a/arch/riscv/kvm/aia.c b/arch/riscv/kvm/aia.c index a944294f6f23..0f0a9d11bb5f 100644 --- a/arch/riscv/kvm/aia.c +++ b/arch/riscv/kvm/aia.c @@ -545,6 +545,9 @@ void kvm_riscv_aia_enable(void) enable_percpu_irq(hgei_parent_irq, irq_get_trigger_type(hgei_parent_irq)); csr_set(CSR_HIE, BIT(IRQ_S_GEXT)); + /* Enable IRQ filtering for overflow interrupt only if sscofpmf is present */ + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) + csr_write(CSR_HVIEN, BIT(IRQ_PMU_OVF)); } void kvm_riscv_aia_disable(void) @@ -558,6 +561,8 @@ void kvm_riscv_aia_disable(void) return; hgctrl = get_cpu_ptr(&aia_hgei); + if (__riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSCOFPMF)) + csr_clear(CSR_HVIEN, BIT(IRQ_PMU_OVF)); /* Disable per-CPU SGEI interrupt */ csr_clear(CSR_HIE, BIT(IRQ_S_GEXT)); disable_percpu_irq(hgei_parent_irq); diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index 225a435d9c9a..bab2ec34cd87 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -22,22 +22,8 @@ long kvm_arch_dev_ioctl(struct file *filp, int kvm_arch_hardware_enable(void) { - unsigned long hideleg, hedeleg; - - hedeleg = 0; - hedeleg |= (1UL << EXC_INST_MISALIGNED); - hedeleg |= (1UL << EXC_BREAKPOINT); - hedeleg |= (1UL << EXC_SYSCALL); - hedeleg |= (1UL << EXC_INST_PAGE_FAULT); - hedeleg |= (1UL << EXC_LOAD_PAGE_FAULT); - hedeleg |= (1UL << EXC_STORE_PAGE_FAULT); - csr_write(CSR_HEDELEG, hedeleg); - - hideleg = 0; - hideleg |= (1UL << IRQ_VS_SOFT); - hideleg |= (1UL << IRQ_VS_TIMER); - hideleg |= (1UL << IRQ_VS_EXT); - csr_write(CSR_HIDELEG, hideleg); + csr_write(CSR_HEDELEG, KVM_HEDELEG_DEFAULT); + csr_write(CSR_HIDELEG, KVM_HIDELEG_DEFAULT); /* VS should access only the time counter directly. Everything else should trap */ csr_write(CSR_HCOUNTEREN, 0x02); diff --git a/arch/riscv/kvm/mmu.c b/arch/riscv/kvm/mmu.c index a9e2fd7245e1..b63650f9b966 100644 --- a/arch/riscv/kvm/mmu.c +++ b/arch/riscv/kvm/mmu.c @@ -550,26 +550,6 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) return false; } -bool kvm_set_spte_gfn(struct kvm *kvm, struct kvm_gfn_range *range) -{ - int ret; - kvm_pfn_t pfn = pte_pfn(range->arg.pte); - - if (!kvm->arch.pgd) - return false; - - WARN_ON(range->end - range->start != 1); - - ret = gstage_map_page(kvm, NULL, range->start << PAGE_SHIFT, - __pfn_to_phys(pfn), PAGE_SIZE, true, true); - if (ret) { - kvm_debug("Failed to map G-stage page (error %d)\n", ret); - return true; - } - - return false; -} - bool kvm_age_gfn(struct kvm *kvm, struct kvm_gfn_range *range) { pte_t *ptep; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index b5ca9f2e98ac..17e21df36cc1 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -64,7 +64,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) memcpy(csr, reset_csr, sizeof(*csr)); + spin_lock(&vcpu->arch.reset_cntx_lock); memcpy(cntx, reset_cntx, sizeof(*cntx)); + spin_unlock(&vcpu->arch.reset_cntx_lock); kvm_riscv_vcpu_fp_reset(vcpu); @@ -102,6 +104,8 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) struct kvm_cpu_context *cntx; struct kvm_vcpu_csr *reset_csr = &vcpu->arch.guest_reset_csr; + spin_lock_init(&vcpu->arch.mp_state_lock); + /* Mark this VCPU never ran */ vcpu->arch.ran_atleast_once = false; vcpu->arch.mmu_page_cache.gfp_zero = __GFP_ZERO; @@ -119,12 +123,16 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) spin_lock_init(&vcpu->arch.hfence_lock); /* Setup reset state of shadow SSTATUS and HSTATUS CSRs */ + spin_lock_init(&vcpu->arch.reset_cntx_lock); + + spin_lock(&vcpu->arch.reset_cntx_lock); cntx = &vcpu->arch.guest_reset_context; cntx->sstatus = SR_SPP | SR_SPIE; cntx->hstatus = 0; cntx->hstatus |= HSTATUS_VTW; cntx->hstatus |= HSTATUS_SPVP; cntx->hstatus |= HSTATUS_SPV; + spin_unlock(&vcpu->arch.reset_cntx_lock); if (kvm_riscv_vcpu_alloc_vector_context(vcpu, cntx)) return -ENOMEM; @@ -201,7 +209,7 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { return (kvm_riscv_vcpu_has_interrupts(vcpu, -1UL) && - !vcpu->arch.power_off && !vcpu->arch.pause); + !kvm_riscv_vcpu_stopped(vcpu) && !vcpu->arch.pause); } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) @@ -365,6 +373,13 @@ void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) } } + /* Sync up the HVIP.LCOFIP bit changes (only clear) by the guest */ + if ((csr->hvip ^ hvip) & (1UL << IRQ_PMU_OVF)) { + if (!(hvip & (1UL << IRQ_PMU_OVF)) && + !test_and_set_bit(IRQ_PMU_OVF, v->irqs_pending_mask)) + clear_bit(IRQ_PMU_OVF, v->irqs_pending); + } + /* Sync-up AIA high interrupts */ kvm_riscv_vcpu_aia_sync_interrupts(vcpu); @@ -382,7 +397,8 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; set_bit(irq, vcpu->arch.irqs_pending); @@ -397,14 +413,15 @@ int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) { /* - * We only allow VS-mode software, timer, and external + * We only allow VS-mode software, timer, counter overflow and external * interrupts when irq is one of the local interrupts * defined by RISC-V privilege specification. */ if (irq < IRQ_LOCAL_MAX && irq != IRQ_VS_SOFT && irq != IRQ_VS_TIMER && - irq != IRQ_VS_EXT) + irq != IRQ_VS_EXT && + irq != IRQ_PMU_OVF) return -EINVAL; clear_bit(irq, vcpu->arch.irqs_pending); @@ -429,26 +446,42 @@ bool kvm_riscv_vcpu_has_interrupts(struct kvm_vcpu *vcpu, u64 mask) return kvm_riscv_vcpu_aia_has_interrupts(vcpu, mask); } -void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +void __kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = true; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } -void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_riscv_vcpu_power_off(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + +void __kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) { - vcpu->arch.power_off = false; + WRITE_ONCE(vcpu->arch.mp_state.mp_state, KVM_MP_STATE_RUNNABLE); kvm_vcpu_wake_up(vcpu); } +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +{ + spin_lock(&vcpu->arch.mp_state_lock); + __kvm_riscv_vcpu_power_on(vcpu); + spin_unlock(&vcpu->arch.mp_state_lock); +} + +bool kvm_riscv_vcpu_stopped(struct kvm_vcpu *vcpu) +{ + return READ_ONCE(vcpu->arch.mp_state.mp_state) == KVM_MP_STATE_STOPPED; +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - if (vcpu->arch.power_off) - mp_state->mp_state = KVM_MP_STATE_STOPPED; - else - mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + *mp_state = READ_ONCE(vcpu->arch.mp_state); return 0; } @@ -458,25 +491,36 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, { int ret = 0; + spin_lock(&vcpu->arch.mp_state_lock); + switch (mp_state->mp_state) { case KVM_MP_STATE_RUNNABLE: - vcpu->arch.power_off = false; + WRITE_ONCE(vcpu->arch.mp_state, *mp_state); break; case KVM_MP_STATE_STOPPED: - kvm_riscv_vcpu_power_off(vcpu); + __kvm_riscv_vcpu_power_off(vcpu); break; default: ret = -EINVAL; } + spin_unlock(&vcpu->arch.mp_state_lock); + return ret; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, struct kvm_guest_debug *dbg) { - /* TODO; To be implemented later. */ - return -EINVAL; + if (dbg->control & KVM_GUESTDBG_ENABLE) { + vcpu->guest_debug = dbg->control; + vcpu->arch.cfg.hedeleg &= ~BIT(EXC_BREAKPOINT); + } else { + vcpu->guest_debug = 0; + vcpu->arch.cfg.hedeleg |= BIT(EXC_BREAKPOINT); + } + + return 0; } static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) @@ -505,6 +549,10 @@ static void kvm_riscv_vcpu_setup_config(struct kvm_vcpu *vcpu) if (riscv_isa_extension_available(isa, SMSTATEEN)) cfg->hstateen0 |= SMSTATEEN0_SSTATEEN0; } + + cfg->hedeleg = KVM_HEDELEG_DEFAULT; + if (vcpu->guest_debug) + cfg->hedeleg &= ~BIT(EXC_BREAKPOINT); } void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) @@ -519,6 +567,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) csr_write(CSR_VSEPC, csr->vsepc); csr_write(CSR_VSCAUSE, csr->vscause); csr_write(CSR_VSTVAL, csr->vstval); + csr_write(CSR_HEDELEG, cfg->hedeleg); csr_write(CSR_HVIP, csr->hvip); csr_write(CSR_VSATP, csr->vsatp); csr_write(CSR_HENVCFG, cfg->henvcfg); @@ -584,11 +633,11 @@ static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { kvm_vcpu_srcu_read_unlock(vcpu); rcuwait_wait_event(wait, - (!vcpu->arch.power_off) && (!vcpu->arch.pause), + (!kvm_riscv_vcpu_stopped(vcpu)) && (!vcpu->arch.pause), TASK_INTERRUPTIBLE); kvm_vcpu_srcu_read_lock(vcpu); - if (vcpu->arch.power_off || vcpu->arch.pause) { + if (kvm_riscv_vcpu_stopped(vcpu) || vcpu->arch.pause) { /* * Awaken to handle a signal, request to * sleep again later. diff --git a/arch/riscv/kvm/vcpu_exit.c b/arch/riscv/kvm/vcpu_exit.c index 2415722c01b8..5761f95abb60 100644 --- a/arch/riscv/kvm/vcpu_exit.c +++ b/arch/riscv/kvm/vcpu_exit.c @@ -204,6 +204,10 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, if (vcpu->arch.guest_context.hstatus & HSTATUS_SPV) ret = kvm_riscv_vcpu_sbi_ecall(vcpu, run); break; + case EXC_BREAKPOINT: + run->exit_reason = KVM_EXIT_DEBUG; + ret = 0; + break; default: break; } diff --git a/arch/riscv/kvm/vcpu_onereg.c b/arch/riscv/kvm/vcpu_onereg.c index 994adc26db4b..c676275ea0a0 100644 --- a/arch/riscv/kvm/vcpu_onereg.c +++ b/arch/riscv/kvm/vcpu_onereg.c @@ -36,6 +36,7 @@ static const unsigned long kvm_isa_ext_arr[] = { /* Multi letter extensions (alphabetically sorted) */ KVM_ISA_EXT_ARR(SMSTATEEN), KVM_ISA_EXT_ARR(SSAIA), + KVM_ISA_EXT_ARR(SSCOFPMF), KVM_ISA_EXT_ARR(SSTC), KVM_ISA_EXT_ARR(SVINVAL), KVM_ISA_EXT_ARR(SVNAPOT), @@ -99,6 +100,9 @@ static bool kvm_riscv_vcpu_isa_enable_allowed(unsigned long ext) switch (ext) { case KVM_RISCV_ISA_EXT_H: return false; + case KVM_RISCV_ISA_EXT_SSCOFPMF: + /* Sscofpmf depends on interrupt filtering defined in ssaia */ + return __riscv_isa_extension_available(NULL, RISCV_ISA_EXT_SSAIA); case KVM_RISCV_ISA_EXT_V: return riscv_v_vstate_ctrl_user_allowed(); default: @@ -116,6 +120,8 @@ static bool kvm_riscv_vcpu_isa_disable_allowed(unsigned long ext) case KVM_RISCV_ISA_EXT_C: case KVM_RISCV_ISA_EXT_I: case KVM_RISCV_ISA_EXT_M: + /* There is not architectural config bit to disable sscofpmf completely */ + case KVM_RISCV_ISA_EXT_SSCOFPMF: case KVM_RISCV_ISA_EXT_SSTC: case KVM_RISCV_ISA_EXT_SVINVAL: case KVM_RISCV_ISA_EXT_SVNAPOT: diff --git a/arch/riscv/kvm/vcpu_pmu.c b/arch/riscv/kvm/vcpu_pmu.c index 86391a5061dd..04db1f993c47 100644 --- a/arch/riscv/kvm/vcpu_pmu.c +++ b/arch/riscv/kvm/vcpu_pmu.c @@ -14,6 +14,7 @@ #include <asm/csr.h> #include <asm/kvm_vcpu_sbi.h> #include <asm/kvm_vcpu_pmu.h> +#include <asm/sbi.h> #include <linux/bitops.h> #define kvm_pmu_num_counters(pmu) ((pmu)->num_hw_ctrs + (pmu)->num_fw_ctrs) @@ -39,7 +40,7 @@ static u64 kvm_pmu_get_sample_period(struct kvm_pmc *pmc) u64 sample_period; if (!pmc->counter_val) - sample_period = counter_val_mask + 1; + sample_period = counter_val_mask; else sample_period = (-pmc->counter_val) & counter_val_mask; @@ -196,6 +197,36 @@ static int pmu_get_pmc_index(struct kvm_pmu *pmu, unsigned long eidx, return kvm_pmu_get_programmable_pmc_index(pmu, eidx, cbase, cmask); } +static int pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + unsigned long *out_val) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + struct kvm_pmc *pmc; + int fevent_code; + + if (!IS_ENABLED(CONFIG_32BIT)) { + pr_warn("%s: should be invoked for only RV32\n", __func__); + return -EINVAL; + } + + if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { + pr_warn("Invalid counter id [%ld]during read\n", cidx); + return -EINVAL; + } + + pmc = &kvpmu->pmc[cidx]; + + if (pmc->cinfo.type != SBI_PMU_CTR_TYPE_FW) + return -EINVAL; + + fevent_code = get_event_code(pmc->event_idx); + pmc->counter_val = kvpmu->fw_event[fevent_code].value; + + *out_val = pmc->counter_val >> 32; + + return 0; +} + static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, unsigned long *out_val) { @@ -204,6 +235,11 @@ static int pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, u64 enabled, running; int fevent_code; + if (cidx >= kvm_pmu_num_counters(kvpmu) || cidx == 1) { + pr_warn("Invalid counter id [%ld] during read\n", cidx); + return -EINVAL; + } + pmc = &kvpmu->pmc[cidx]; if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { @@ -229,8 +265,50 @@ static int kvm_pmu_validate_counter_mask(struct kvm_pmu *kvpmu, unsigned long ct return 0; } -static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, - unsigned long flags, unsigned long eidx, unsigned long evtdata) +static void kvm_riscv_pmu_overflow(struct perf_event *perf_event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + struct kvm_pmc *pmc = perf_event->overflow_handler_context; + struct kvm_vcpu *vcpu = pmc->vcpu; + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + struct riscv_pmu *rpmu = to_riscv_pmu(perf_event->pmu); + u64 period; + + /* + * Stop the event counting by directly accessing the perf_event. + * Otherwise, this needs to deferred via a workqueue. + * That will introduce skew in the counter value because the actual + * physical counter would start after returning from this function. + * It will be stopped again once the workqueue is scheduled + */ + rpmu->pmu.stop(perf_event, PERF_EF_UPDATE); + + /* + * The hw counter would start automatically when this function returns. + * Thus, the host may continue to interrupt and inject it to the guest + * even without the guest configuring the next event. Depending on the hardware + * the host may have some sluggishness only if privilege mode filtering is not + * available. In an ideal world, where qemu is not the only capable hardware, + * this can be removed. + * FYI: ARM64 does this way while x86 doesn't do anything as such. + * TODO: Should we keep it for RISC-V ? + */ + period = -(local64_read(&perf_event->count)); + + local64_set(&perf_event->hw.period_left, 0); + perf_event->attr.sample_period = period; + perf_event->hw.sample_period = period; + + set_bit(pmc->idx, kvpmu->pmc_overflown); + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_PMU_OVF); + + rpmu->pmu.start(perf_event, PERF_EF_RELOAD); +} + +static long kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr *attr, + unsigned long flags, unsigned long eidx, + unsigned long evtdata) { struct perf_event *event; @@ -247,7 +325,7 @@ static int kvm_pmu_create_perf_event(struct kvm_pmc *pmc, struct perf_event_attr */ attr->sample_period = kvm_pmu_get_sample_period(pmc); - event = perf_event_create_kernel_counter(attr, -1, current, NULL, pmc); + event = perf_event_create_kernel_counter(attr, -1, current, kvm_riscv_pmu_overflow, pmc); if (IS_ERR(event)) { pr_err("kvm pmu event creation failed for eidx %lx: %ld\n", eidx, PTR_ERR(event)); return PTR_ERR(event); @@ -310,6 +388,80 @@ int kvm_riscv_vcpu_pmu_read_hpm(struct kvm_vcpu *vcpu, unsigned int csr_num, return ret; } +static void kvm_pmu_clear_snapshot_area(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); + + if (kvpmu->sdata) { + if (kvpmu->snapshot_addr != INVALID_GPA) { + memset(kvpmu->sdata, 0, snapshot_area_size); + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, + kvpmu->sdata, snapshot_area_size); + } else { + pr_warn("snapshot address invalid\n"); + } + kfree(kvpmu->sdata); + kvpmu->sdata = NULL; + } + kvpmu->snapshot_addr = INVALID_GPA; +} + +int kvm_riscv_vcpu_pmu_snapshot_set_shmem(struct kvm_vcpu *vcpu, unsigned long saddr_low, + unsigned long saddr_high, unsigned long flags, + struct kvm_vcpu_sbi_return *retdata) +{ + struct kvm_pmu *kvpmu = vcpu_to_pmu(vcpu); + int snapshot_area_size = sizeof(struct riscv_pmu_snapshot_data); + int sbiret = 0; + gpa_t saddr; + unsigned long hva; + bool writable; + + if (!kvpmu || flags) { + sbiret = SBI_ERR_INVALID_PARAM; + goto out; + } + + if (saddr_low == SBI_SHMEM_DISABLE && saddr_high == SBI_SHMEM_DISABLE) { + kvm_pmu_clear_snapshot_area(vcpu); + return 0; + } + + saddr = saddr_low; + + if (saddr_high != 0) { + if (IS_ENABLED(CONFIG_32BIT)) + saddr |= ((gpa_t)saddr_high << 32); + else + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + hva = kvm_vcpu_gfn_to_hva_prot(vcpu, saddr >> PAGE_SHIFT, &writable); + if (kvm_is_error_hva(hva) || !writable) { + sbiret = SBI_ERR_INVALID_ADDRESS; + goto out; + } + + kvpmu->sdata = kzalloc(snapshot_area_size, GFP_ATOMIC); + if (!kvpmu->sdata) + return -ENOMEM; + + if (kvm_vcpu_write_guest(vcpu, saddr, kvpmu->sdata, snapshot_area_size)) { + kfree(kvpmu->sdata); + sbiret = SBI_ERR_FAILURE; + goto out; + } + + kvpmu->snapshot_addr = saddr; + +out: + retdata->err_val = sbiret; + + return 0; +} + int kvm_riscv_vcpu_pmu_num_ctrs(struct kvm_vcpu *vcpu, struct kvm_vcpu_sbi_return *retdata) { @@ -343,20 +495,40 @@ int kvm_riscv_vcpu_pmu_ctr_start(struct kvm_vcpu *vcpu, unsigned long ctr_base, int i, pmc_index, sbiret = 0; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_START_FLAG_INIT_SNAPSHOT; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set) { + if (kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + if (kvm_vcpu_read_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data))) { + pr_warn("Unable to read snapshot shared memory while starting counters\n"); + sbiret = SBI_ERR_FAILURE; + goto out; + } + } /* Start the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; if (!test_bit(pmc_index, kvpmu->pmc_in_use)) continue; + /* The guest started the counter again. Reset the overflow status */ + clear_bit(pmc_index, kvpmu->pmc_overflown); pmc = &kvpmu->pmc[pmc_index]; - if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) + if (flags & SBI_PMU_START_FLAG_SET_INIT_VALUE) { pmc->counter_val = ival; + } else if (snap_flag_set) { + /* The counter index in the snapshot are relative to the counter base */ + pmc->counter_val = kvpmu->sdata->ctr_values[i]; + } + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) { fevent_code = get_event_code(pmc->event_idx); if (fevent_code >= SBI_PMU_FW_MAX) { @@ -400,12 +572,19 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, u64 enabled, running; struct kvm_pmc *pmc; int fevent_code; + bool snap_flag_set = flags & SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT; + bool shmem_needs_update = false; if (kvm_pmu_validate_counter_mask(kvpmu, ctr_base, ctr_mask) < 0) { sbiret = SBI_ERR_INVALID_PARAM; goto out; } + if (snap_flag_set && kvpmu->snapshot_addr == INVALID_GPA) { + sbiret = SBI_ERR_NO_SHMEM; + goto out; + } + /* Stop the counters that have been configured and requested by the guest */ for_each_set_bit(i, &ctr_mask, RISCV_MAX_COUNTERS) { pmc_index = i + ctr_base; @@ -432,21 +611,49 @@ int kvm_riscv_vcpu_pmu_ctr_stop(struct kvm_vcpu *vcpu, unsigned long ctr_base, sbiret = SBI_ERR_ALREADY_STOPPED; } - if (flags & SBI_PMU_STOP_FLAG_RESET) { - /* Relase the counter if this is a reset request */ - pmc->counter_val += perf_event_read_value(pmc->perf_event, - &enabled, &running); + if (flags & SBI_PMU_STOP_FLAG_RESET) + /* Release the counter if this is a reset request */ kvm_pmu_release_perf_event(pmc); - } } else { sbiret = SBI_ERR_INVALID_PARAM; } + + if (snap_flag_set && !sbiret) { + if (pmc->cinfo.type == SBI_PMU_CTR_TYPE_FW) + pmc->counter_val = kvpmu->fw_event[fevent_code].value; + else if (pmc->perf_event) + pmc->counter_val += perf_event_read_value(pmc->perf_event, + &enabled, &running); + /* + * The counter and overflow indicies in the snapshot region are w.r.to + * cbase. Modify the set bit in the counter mask instead of the pmc_index + * which indicates the absolute counter index. + */ + if (test_bit(pmc_index, kvpmu->pmc_overflown)) + kvpmu->sdata->ctr_overflow_mask |= BIT(i); + kvpmu->sdata->ctr_values[i] = pmc->counter_val; + shmem_needs_update = true; + } + if (flags & SBI_PMU_STOP_FLAG_RESET) { pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; clear_bit(pmc_index, kvpmu->pmc_in_use); + clear_bit(pmc_index, kvpmu->pmc_overflown); + if (snap_flag_set) { + /* + * Only clear the given counter as the caller is responsible to + * validate both the overflow mask and configured counters. + */ + kvpmu->sdata->ctr_overflow_mask &= ~BIT(i); + shmem_needs_update = true; + } } } + if (shmem_needs_update) + kvm_vcpu_write_guest(vcpu, kvpmu->snapshot_addr, kvpmu->sdata, + sizeof(struct riscv_pmu_snapshot_data)); + out: retdata->err_val = sbiret; @@ -458,7 +665,8 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba unsigned long eidx, u64 evtdata, struct kvm_vcpu_sbi_return *retdata) { - int ctr_idx, ret, sbiret = 0; + int ctr_idx, sbiret = 0; + long ret; bool is_fevent; unsigned long event_code; u32 etype = kvm_pmu_get_perf_event_type(eidx); @@ -517,8 +725,10 @@ int kvm_riscv_vcpu_pmu_ctr_cfg_match(struct kvm_vcpu *vcpu, unsigned long ctr_ba kvpmu->fw_event[event_code].started = true; } else { ret = kvm_pmu_create_perf_event(pmc, &attr, flags, eidx, evtdata); - if (ret) - return ret; + if (ret) { + sbiret = SBI_ERR_NOT_SUPPORTED; + goto out; + } } set_bit(ctr_idx, kvpmu->pmc_in_use); @@ -530,7 +740,19 @@ out: return 0; } -int kvm_riscv_vcpu_pmu_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, +int kvm_riscv_vcpu_pmu_fw_ctr_read_hi(struct kvm_vcpu *vcpu, unsigned long cidx, + struct kvm_vcpu_sbi_return *retdata) +{ + int ret; + + ret = pmu_fw_ctr_read_hi(vcpu, cidx, &retdata->out_val); + if (ret == -EINVAL) + retdata->err_val = SBI_ERR_INVALID_PARAM; + + return 0; +} + +int kvm_riscv_vcpu_pmu_fw_ctr_read(struct kvm_vcpu *vcpu, unsigned long cidx, struct kvm_vcpu_sbi_return *retdata) { int ret; @@ -566,6 +788,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) kvpmu->num_hw_ctrs = num_hw_ctrs + 1; kvpmu->num_fw_ctrs = SBI_PMU_FW_MAX; memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvpmu->snapshot_addr = INVALID_GPA; if (kvpmu->num_hw_ctrs > RISCV_KVM_MAX_HW_CTRS) { pr_warn_once("Limiting the hardware counters to 32 as specified by the ISA"); @@ -585,6 +808,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) pmc = &kvpmu->pmc[i]; pmc->idx = i; pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; + pmc->vcpu = vcpu; if (i < kvpmu->num_hw_ctrs) { pmc->cinfo.type = SBI_PMU_CTR_TYPE_HW; if (i < 3) @@ -601,7 +825,7 @@ void kvm_riscv_vcpu_pmu_init(struct kvm_vcpu *vcpu) pmc->cinfo.csr = CSR_CYCLE + i; } else { pmc->cinfo.type = SBI_PMU_CTR_TYPE_FW; - pmc->cinfo.width = BITS_PER_LONG - 1; + pmc->cinfo.width = 63; } } @@ -617,14 +841,16 @@ void kvm_riscv_vcpu_pmu_deinit(struct kvm_vcpu *vcpu) if (!kvpmu) return; - for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_MAX_COUNTERS) { + for_each_set_bit(i, kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS) { pmc = &kvpmu->pmc[i]; pmc->counter_val = 0; kvm_pmu_release_perf_event(pmc); pmc->event_idx = SBI_PMU_EVENT_IDX_INVALID; } - bitmap_zero(kvpmu->pmc_in_use, RISCV_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_in_use, RISCV_KVM_MAX_COUNTERS); + bitmap_zero(kvpmu->pmc_overflown, RISCV_KVM_MAX_COUNTERS); memset(&kvpmu->fw_event, 0, SBI_PMU_FW_MAX * sizeof(struct kvm_fw_event)); + kvm_pmu_clear_snapshot_area(vcpu); } void kvm_riscv_vcpu_pmu_reset(struct kvm_vcpu *vcpu) diff --git a/arch/riscv/kvm/vcpu_sbi.c b/arch/riscv/kvm/vcpu_sbi.c index 72a2ffb8dcd1..62f409d4176e 100644 --- a/arch/riscv/kvm/vcpu_sbi.c +++ b/arch/riscv/kvm/vcpu_sbi.c @@ -138,8 +138,11 @@ void kvm_riscv_vcpu_sbi_system_reset(struct kvm_vcpu *vcpu, unsigned long i; struct kvm_vcpu *tmp; - kvm_for_each_vcpu(i, tmp, vcpu->kvm) - tmp->arch.power_off = true; + kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + spin_lock(&vcpu->arch.mp_state_lock); + WRITE_ONCE(tmp->arch.mp_state.mp_state, KVM_MP_STATE_STOPPED); + spin_unlock(&vcpu->arch.mp_state_lock); + } kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&run->system_event, 0, sizeof(run->system_event)); diff --git a/arch/riscv/kvm/vcpu_sbi_hsm.c b/arch/riscv/kvm/vcpu_sbi_hsm.c index 7dca0e9381d9..dce667f4b6ab 100644 --- a/arch/riscv/kvm/vcpu_sbi_hsm.c +++ b/arch/riscv/kvm/vcpu_sbi_hsm.c @@ -18,13 +18,20 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) struct kvm_cpu_context *cp = &vcpu->arch.guest_context; struct kvm_vcpu *target_vcpu; unsigned long target_vcpuid = cp->a0; + int ret = 0; target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!target_vcpu->arch.power_off) - return SBI_ERR_ALREADY_AVAILABLE; + spin_lock(&target_vcpu->arch.mp_state_lock); + + if (!kvm_riscv_vcpu_stopped(target_vcpu)) { + ret = SBI_ERR_ALREADY_AVAILABLE; + goto out; + } + + spin_lock(&target_vcpu->arch.reset_cntx_lock); reset_cntx = &target_vcpu->arch.guest_reset_context; /* start address */ reset_cntx->sepc = cp->a1; @@ -32,21 +39,35 @@ static int kvm_sbi_hsm_vcpu_start(struct kvm_vcpu *vcpu) reset_cntx->a0 = target_vcpuid; /* private data passed from kernel */ reset_cntx->a1 = cp->a2; + spin_unlock(&target_vcpu->arch.reset_cntx_lock); + kvm_make_request(KVM_REQ_VCPU_RESET, target_vcpu); - kvm_riscv_vcpu_power_on(target_vcpu); + __kvm_riscv_vcpu_power_on(target_vcpu); - return 0; +out: + spin_unlock(&target_vcpu->arch.mp_state_lock); + + return ret; } static int kvm_sbi_hsm_vcpu_stop(struct kvm_vcpu *vcpu) { - if (vcpu->arch.power_off) - return SBI_ERR_FAILURE; + int ret = 0; - kvm_riscv_vcpu_power_off(vcpu); + spin_lock(&vcpu->arch.mp_state_lock); - return 0; + if (kvm_riscv_vcpu_stopped(vcpu)) { + ret = SBI_ERR_FAILURE; + goto out; + } + + __kvm_riscv_vcpu_power_off(vcpu); + +out: + spin_unlock(&vcpu->arch.mp_state_lock); + + return ret; } static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) @@ -58,7 +79,7 @@ static int kvm_sbi_hsm_vcpu_get_status(struct kvm_vcpu *vcpu) target_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, target_vcpuid); if (!target_vcpu) return SBI_ERR_INVALID_PARAM; - if (!target_vcpu->arch.power_off) + if (!kvm_riscv_vcpu_stopped(target_vcpu)) return SBI_HSM_STATE_STARTED; else if (vcpu->stat.generic.blocking) return SBI_HSM_STATE_SUSPENDED; @@ -71,14 +92,11 @@ static int kvm_sbi_ext_hsm_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, { int ret = 0; struct kvm_cpu_context *cp = &vcpu->arch.guest_context; - struct kvm *kvm = vcpu->kvm; unsigned long funcid = cp->a6; switch (funcid) { case SBI_EXT_HSM_HART_START: - mutex_lock(&kvm->lock); ret = kvm_sbi_hsm_vcpu_start(vcpu); - mutex_unlock(&kvm->lock); break; case SBI_EXT_HSM_HART_STOP: ret = kvm_sbi_hsm_vcpu_stop(vcpu); diff --git a/arch/riscv/kvm/vcpu_sbi_pmu.c b/arch/riscv/kvm/vcpu_sbi_pmu.c index 7eca72df2cbd..e4be34e03e83 100644 --- a/arch/riscv/kvm/vcpu_sbi_pmu.c +++ b/arch/riscv/kvm/vcpu_sbi_pmu.c @@ -42,9 +42,9 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, #endif /* * This can fail if perf core framework fails to create an event. - * Forward the error to userspace because it's an error which - * happened within the host kernel. The other option would be - * to convert to an SBI error and forward to the guest. + * No need to forward the error to userspace and exit the guest. + * The operation can continue without profiling. Forward the + * appropriate SBI error to the guest. */ ret = kvm_riscv_vcpu_pmu_ctr_cfg_match(vcpu, cp->a0, cp->a1, cp->a2, cp->a3, temp, retdata); @@ -62,7 +62,16 @@ static int kvm_sbi_ext_pmu_handler(struct kvm_vcpu *vcpu, struct kvm_run *run, ret = kvm_riscv_vcpu_pmu_ctr_stop(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; case SBI_EXT_PMU_COUNTER_FW_READ: - ret = kvm_riscv_vcpu_pmu_ctr_read(vcpu, cp->a0, retdata); + ret = kvm_riscv_vcpu_pmu_fw_ctr_read(vcpu, cp->a0, retdata); + break; + case SBI_EXT_PMU_COUNTER_FW_READ_HI: + if (IS_ENABLED(CONFIG_32BIT)) + ret = kvm_riscv_vcpu_pmu_fw_ctr_read_hi(vcpu, cp->a0, retdata); + else + retdata->out_val = 0; + break; + case SBI_EXT_PMU_SNAPSHOT_SET_SHMEM: + ret = kvm_riscv_vcpu_pmu_snapshot_set_shmem(vcpu, cp->a0, cp->a1, cp->a2, retdata); break; default: retdata->err_val = SBI_ERR_NOT_SUPPORTED; diff --git a/arch/riscv/kvm/vcpu_sbi_sta.c b/arch/riscv/kvm/vcpu_sbi_sta.c index d8cf9ca28c61..5f35427114c1 100644 --- a/arch/riscv/kvm/vcpu_sbi_sta.c +++ b/arch/riscv/kvm/vcpu_sbi_sta.c @@ -93,8 +93,8 @@ static int kvm_sbi_sta_steal_time_set_shmem(struct kvm_vcpu *vcpu) if (flags != 0) return SBI_ERR_INVALID_PARAM; - if (shmem_phys_lo == SBI_STA_SHMEM_DISABLE && - shmem_phys_hi == SBI_STA_SHMEM_DISABLE) { + if (shmem_phys_lo == SBI_SHMEM_DISABLE && + shmem_phys_hi == SBI_SHMEM_DISABLE) { vcpu->arch.sta.shmem = INVALID_GPA; return 0; } diff --git a/arch/riscv/kvm/vm.c b/arch/riscv/kvm/vm.c index ce58bc48e5b8..7396b8654f45 100644 --- a/arch/riscv/kvm/vm.c +++ b/arch/riscv/kvm/vm.c @@ -186,6 +186,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: case KVM_CAP_IMMEDIATE_EXIT: + case KVM_CAP_SET_GUEST_DEBUG: r = 1; break; case KVM_CAP_NR_VCPUS: diff --git a/arch/riscv/mm/dma-noncoherent.c b/arch/riscv/mm/dma-noncoherent.c index 843107f834b2..cb89d7e0ba88 100644 --- a/arch/riscv/mm/dma-noncoherent.c +++ b/arch/riscv/mm/dma-noncoherent.c @@ -128,8 +128,7 @@ void arch_dma_prep_coherent(struct page *page, size_t size) ALT_CMO_OP(FLUSH, flush_addr, size, riscv_cbom_block_size); } -void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - bool coherent) +void arch_setup_dma_ops(struct device *dev, bool coherent) { WARN_TAINT(!coherent && riscv_cbom_block_size > ARCH_DMA_MINALIGN, TAINT_CPU_OUT_OF_SPEC, diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fe8e159394d8..9940171c79f0 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -24,6 +24,7 @@ #include <linux/elf.h> #endif #include <linux/kfence.h> +#include <linux/execmem.h> #include <asm/fixmap.h> #include <asm/io.h> @@ -231,7 +232,7 @@ static void __init setup_bootmem(void) * In 64-bit, any use of __va/__pa before this point is wrong as we * did not know the start of DRAM before. */ - if (IS_ENABLED(CONFIG_64BIT)) + if (IS_ENABLED(CONFIG_64BIT) && IS_ENABLED(CONFIG_MMU)) kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; /* @@ -1481,3 +1482,37 @@ void __init pgtable_cache_init(void) preallocate_pgd_pages_range(MODULES_VADDR, MODULES_END, "bpf/modules"); } #endif + +#ifdef CONFIG_EXECMEM +#ifdef CONFIG_MMU +static struct execmem_info execmem_info __ro_after_init; + +struct execmem_info __init *execmem_arch_setup(void) +{ + execmem_info = (struct execmem_info){ + .ranges = { + [EXECMEM_DEFAULT] = { + .start = MODULES_VADDR, + .end = MODULES_END, + .pgprot = PAGE_KERNEL, + .alignment = 1, + }, + [EXECMEM_KPROBES] = { + .start = VMALLOC_START, + .end = VMALLOC_END, + .pgprot = PAGE_KERNEL_READ_EXEC, + .alignment = 1, + }, + [EXECMEM_BPF] = { + .start = BPF_JIT_REGION_START, + .end = BPF_JIT_REGION_END, + .pgprot = PAGE_KERNEL, + .alignment = PAGE_SIZE, + }, + }, + }; + + return &execmem_info; +} +#endif /* CONFIG_MMU */ +#endif /* CONFIG_EXECMEM */ diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h index f4b6b3b9edda..fdbf88ca8b70 100644 --- a/arch/riscv/net/bpf_jit.h +++ b/arch/riscv/net/bpf_jit.h @@ -81,6 +81,8 @@ struct rv_jit_context { int nexentries; unsigned long flags; int stack_size; + u64 arena_vm_start; + u64 user_vm_start; }; /* Convert from ninsns to bytes. */ @@ -606,7 +608,7 @@ static inline u32 rv_nop(void) return rv_i_insn(0, 0, 0, 0, 0x13); } -/* RVC instrutions. */ +/* RVC instructions. */ static inline u16 rvc_addi4spn(u8 rd, u32 imm10) { @@ -735,7 +737,7 @@ static inline u16 rvc_swsp(u32 imm8, u8 rs2) return rv_css_insn(0x6, imm, rs2, 0x2); } -/* RVZBB instrutions. */ +/* RVZBB instructions. */ static inline u32 rvzbb_sextb(u8 rd, u8 rs1) { return rv_i_insn(0x604, rs1, 1, rd, 0x13); diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 1adf2f39ce59..79a001d5533e 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -12,12 +12,14 @@ #include <linux/stop_machine.h> #include <asm/patch.h> #include <asm/cfi.h> +#include <asm/percpu.h> #include "bpf_jit.h" #define RV_FENTRY_NINSNS 2 #define RV_REG_TCC RV_REG_A6 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ +#define RV_REG_ARENA RV_REG_S7 /* For storing arena_vm_start */ static const int regmap[] = { [BPF_REG_0] = RV_REG_A5, @@ -255,6 +257,10 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) emit_ld(RV_REG_S6, store_offset, RV_REG_SP, ctx); store_offset -= 8; } + if (ctx->arena_vm_start) { + emit_ld(RV_REG_ARENA, store_offset, RV_REG_SP, ctx); + store_offset -= 8; + } emit_addi(RV_REG_SP, RV_REG_SP, stack_adjust, ctx); /* Set return value. */ @@ -498,33 +504,33 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, break; /* src_reg = atomic_fetch_<op>(dst_reg + off16, src_reg) */ case BPF_ADD | BPF_FETCH: - emit(is64 ? rv_amoadd_d(rs, rs, rd, 0, 0) : - rv_amoadd_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoadd_d(rs, rs, rd, 1, 1) : + rv_amoadd_w(rs, rs, rd, 1, 1), ctx); if (!is64) emit_zextw(rs, rs, ctx); break; case BPF_AND | BPF_FETCH: - emit(is64 ? rv_amoand_d(rs, rs, rd, 0, 0) : - rv_amoand_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoand_d(rs, rs, rd, 1, 1) : + rv_amoand_w(rs, rs, rd, 1, 1), ctx); if (!is64) emit_zextw(rs, rs, ctx); break; case BPF_OR | BPF_FETCH: - emit(is64 ? rv_amoor_d(rs, rs, rd, 0, 0) : - rv_amoor_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoor_d(rs, rs, rd, 1, 1) : + rv_amoor_w(rs, rs, rd, 1, 1), ctx); if (!is64) emit_zextw(rs, rs, ctx); break; case BPF_XOR | BPF_FETCH: - emit(is64 ? rv_amoxor_d(rs, rs, rd, 0, 0) : - rv_amoxor_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoxor_d(rs, rs, rd, 1, 1) : + rv_amoxor_w(rs, rs, rd, 1, 1), ctx); if (!is64) emit_zextw(rs, rs, ctx); break; /* src_reg = atomic_xchg(dst_reg + off16, src_reg); */ case BPF_XCHG: - emit(is64 ? rv_amoswap_d(rs, rs, rd, 0, 0) : - rv_amoswap_w(rs, rs, rd, 0, 0), ctx); + emit(is64 ? rv_amoswap_d(rs, rs, rd, 1, 1) : + rv_amoswap_w(rs, rs, rd, 1, 1), ctx); if (!is64) emit_zextw(rs, rs, ctx); break; @@ -548,6 +554,7 @@ static void emit_atomic(u8 rd, u8 rs, s16 off, s32 imm, bool is64, #define BPF_FIXUP_OFFSET_MASK GENMASK(26, 0) #define BPF_FIXUP_REG_MASK GENMASK(31, 27) +#define REG_DONT_CLEAR_MARKER 0 /* RV_REG_ZERO unused in pt_regmap */ bool ex_handler_bpf(const struct exception_table_entry *ex, struct pt_regs *regs) @@ -555,7 +562,8 @@ bool ex_handler_bpf(const struct exception_table_entry *ex, off_t offset = FIELD_GET(BPF_FIXUP_OFFSET_MASK, ex->fixup); int regs_offset = FIELD_GET(BPF_FIXUP_REG_MASK, ex->fixup); - *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0; + if (regs_offset != REG_DONT_CLEAR_MARKER) + *(unsigned long *)((void *)regs + pt_regmap[regs_offset]) = 0; regs->epc = (unsigned long)&ex->fixup - offset; return true; @@ -572,7 +580,8 @@ static int add_exception_handler(const struct bpf_insn *insn, off_t fixup_offset; if (!ctx->insns || !ctx->ro_insns || !ctx->prog->aux->extable || - (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX)) + (BPF_MODE(insn->code) != BPF_PROBE_MEM && BPF_MODE(insn->code) != BPF_PROBE_MEMSX && + BPF_MODE(insn->code) != BPF_PROBE_MEM32)) return 0; if (WARN_ON_ONCE(ctx->nexentries >= ctx->prog->aux->num_exentries)) @@ -722,6 +731,9 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of if (ret) return ret; + /* store prog start time */ + emit_mv(RV_REG_S1, RV_REG_A0, ctx); + /* if (__bpf_prog_enter(prog) == 0) * goto skip_exec_of_prog; */ @@ -729,9 +741,6 @@ static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_of /* nop reserved for conditional jump */ emit(rv_nop(), ctx); - /* store prog start time */ - emit_mv(RV_REG_S1, RV_REG_A0, ctx); - /* arg1: &args_off */ emit_addi(RV_REG_A0, RV_REG_FP, -args_off, ctx); if (!p->jited) @@ -1073,6 +1082,33 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, /* dst = src */ case BPF_ALU | BPF_MOV | BPF_X: case BPF_ALU64 | BPF_MOV | BPF_X: + if (insn_is_cast_user(insn)) { + emit_mv(RV_REG_T1, rs, ctx); + emit_zextw(RV_REG_T1, RV_REG_T1, ctx); + emit_imm(rd, (ctx->user_vm_start >> 32) << 32, ctx); + emit(rv_beq(RV_REG_T1, RV_REG_ZERO, 4), ctx); + emit_or(RV_REG_T1, rd, RV_REG_T1, ctx); + emit_mv(rd, RV_REG_T1, ctx); + break; + } else if (insn_is_mov_percpu_addr(insn)) { + if (rd != rs) + emit_mv(rd, rs, ctx); +#ifdef CONFIG_SMP + /* Load current CPU number in T1 */ + emit_ld(RV_REG_T1, offsetof(struct thread_info, cpu), + RV_REG_TP, ctx); + /* << 3 because offsets are 8 bytes */ + emit_slli(RV_REG_T1, RV_REG_T1, 3, ctx); + /* Load address of __per_cpu_offset array in T2 */ + emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx); + /* Add offset of current CPU to __per_cpu_offset */ + emit_add(RV_REG_T1, RV_REG_T2, RV_REG_T1, ctx); + /* Load __per_cpu_offset[cpu] in T1 */ + emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx); + /* Add the offset to Rd */ + emit_add(rd, rd, RV_REG_T1, ctx); +#endif + } if (imm == 1) { /* Special mov32 for zext */ emit_zextw(rd, rd, ctx); @@ -1457,6 +1493,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, bool fixed_addr; u64 addr; + /* Inline calls to bpf_get_smp_processor_id() + * + * RV_REG_TP holds the address of the current CPU's task_struct and thread_info is + * at offset 0 in task_struct. + * Load cpu from thread_info: + * Set R0 to ((struct thread_info *)(RV_REG_TP))->cpu + * + * This replicates the implementation of raw_smp_processor_id() on RISCV + */ + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { + /* Load current CPU number in R0 */ + emit_ld(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu), + RV_REG_TP, ctx); + break; + } + mark_call(ctx); ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, &fixed_addr); @@ -1539,6 +1591,11 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: + /* LDX | PROBE_MEM32: dst = *(unsigned size *)(src + RV_REG_ARENA + off) */ + case BPF_LDX | BPF_PROBE_MEM32 | BPF_B: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_H: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_W: + case BPF_LDX | BPF_PROBE_MEM32 | BPF_DW: { int insn_len, insns_start; bool sign_ext; @@ -1546,6 +1603,11 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, sign_ext = BPF_MODE(insn->code) == BPF_MEMSX || BPF_MODE(insn->code) == BPF_PROBE_MEMSX; + if (BPF_MODE(insn->code) == BPF_PROBE_MEM32) { + emit_add(RV_REG_T2, rs, RV_REG_ARENA, ctx); + rs = RV_REG_T2; + } + switch (BPF_SIZE(code)) { case BPF_B: if (is_12b_int(off)) { @@ -1682,6 +1744,86 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx); break; + case BPF_ST | BPF_PROBE_MEM32 | BPF_B: + case BPF_ST | BPF_PROBE_MEM32 | BPF_H: + case BPF_ST | BPF_PROBE_MEM32 | BPF_W: + case BPF_ST | BPF_PROBE_MEM32 | BPF_DW: + { + int insn_len, insns_start; + + emit_add(RV_REG_T3, rd, RV_REG_ARENA, ctx); + rd = RV_REG_T3; + + /* Load imm to a register then store it */ + emit_imm(RV_REG_T1, imm, ctx); + + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sb(rd, off, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sb(RV_REG_T2, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_H: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sh(rd, off, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sh(RV_REG_T2, 0, RV_REG_T1), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_W: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sw(rd, off, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit_sw(RV_REG_T2, 0, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_DW: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sd(rd, off, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T2, off, ctx); + emit_add(RV_REG_T2, RV_REG_T2, rd, ctx); + insns_start = ctx->ninsns; + emit_sd(RV_REG_T2, 0, RV_REG_T1, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER, + insn_len); + if (ret) + return ret; + + break; + } + /* STX: *(size *)(dst + off) = src */ case BPF_STX | BPF_MEM | BPF_B: if (is_12b_int(off)) { @@ -1728,6 +1870,84 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_atomic(rd, rs, off, imm, BPF_SIZE(code) == BPF_DW, ctx); break; + + case BPF_STX | BPF_PROBE_MEM32 | BPF_B: + case BPF_STX | BPF_PROBE_MEM32 | BPF_H: + case BPF_STX | BPF_PROBE_MEM32 | BPF_W: + case BPF_STX | BPF_PROBE_MEM32 | BPF_DW: + { + int insn_len, insns_start; + + emit_add(RV_REG_T2, rd, RV_REG_ARENA, ctx); + rd = RV_REG_T2; + + switch (BPF_SIZE(code)) { + case BPF_B: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sb(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sb(RV_REG_T1, 0, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_H: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit(rv_sh(rd, off, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit(rv_sh(RV_REG_T1, 0, rs), ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_W: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sw(rd, off, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit_sw(RV_REG_T1, 0, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + case BPF_DW: + if (is_12b_int(off)) { + insns_start = ctx->ninsns; + emit_sd(rd, off, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + emit_imm(RV_REG_T1, off, ctx); + emit_add(RV_REG_T1, RV_REG_T1, rd, ctx); + insns_start = ctx->ninsns; + emit_sd(RV_REG_T1, 0, rs, ctx); + insn_len = ctx->ninsns - insns_start; + break; + } + + ret = add_exception_handler(insn, ctx, REG_DONT_CLEAR_MARKER, + insn_len); + if (ret) + return ret; + + break; + } + default: pr_err("bpf-jit: unknown opcode %02x\n", code); return -EINVAL; @@ -1759,6 +1979,8 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog) stack_adjust += 8; if (seen_reg(RV_REG_S6, ctx)) stack_adjust += 8; + if (ctx->arena_vm_start) + stack_adjust += 8; stack_adjust = round_up(stack_adjust, 16); stack_adjust += bpf_stack_adjust; @@ -1810,6 +2032,10 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog) emit_sd(RV_REG_SP, store_offset, RV_REG_S6, ctx); store_offset -= 8; } + if (ctx->arena_vm_start) { + emit_sd(RV_REG_SP, store_offset, RV_REG_ARENA, ctx); + store_offset -= 8; + } emit_addi(RV_REG_FP, RV_REG_SP, stack_adjust, ctx); @@ -1823,6 +2049,9 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx, bool is_subprog) emit_mv(RV_REG_TCC_SAVED, RV_REG_TCC, ctx); ctx->stack_size = stack_adjust; + + if (ctx->arena_vm_start) + emit_imm(RV_REG_ARENA, ctx->arena_vm_start, ctx); } void bpf_jit_build_epilogue(struct rv_jit_context *ctx) @@ -1839,3 +2068,23 @@ bool bpf_jit_supports_ptr_xchg(void) { return true; } + +bool bpf_jit_supports_arena(void) +{ + return true; +} + +bool bpf_jit_supports_percpu_insn(void) +{ + return true; +} + +bool bpf_jit_inlines_helper_call(s32 imm) +{ + switch (imm) { + case BPF_FUNC_get_smp_processor_id: + return true; + default: + return false; + } +} diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 6b3acac30c06..0a96abdaca65 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -80,6 +80,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto skip_init_ctx; } + ctx->arena_vm_start = bpf_arena_get_kern_vm_start(prog->aux->arena); + ctx->user_vm_start = bpf_arena_get_user_vm_start(prog->aux->arena); ctx->prog = prog; ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); if (!ctx->offset) { @@ -219,19 +221,6 @@ u64 bpf_jit_alloc_exec_limit(void) return BPF_JIT_REGION_SIZE; } -void *bpf_jit_alloc_exec(unsigned long size) -{ - return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, - BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} - -void bpf_jit_free_exec(void *addr) -{ - return vfree(addr); -} - void *bpf_arch_text_copy(void *dst, void *src, size_t len) { int ret; diff --git a/arch/riscv/purgatory/Makefile b/arch/riscv/purgatory/Makefile index 280b0eb352b8..f11945ee2490 100644 --- a/arch/riscv/purgatory/Makefile +++ b/arch/riscv/purgatory/Makefile @@ -1,5 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -OBJECT_FILES_NON_STANDARD := y purgatory-y := purgatory.o sha256.o entry.o string.o ctype.o memcpy.o memset.o purgatory-y += strcmp.o strlen.o strncmp.o @@ -47,13 +46,6 @@ LDFLAGS_purgatory.ro := -r $(PURGATORY_LDFLAGS) LDFLAGS_purgatory.chk := $(PURGATORY_LDFLAGS) targets += purgatory.ro purgatory.chk -# Sanitizer, etc. runtimes are unavailable and cannot be linked here. -GCOV_PROFILE := n -KASAN_SANITIZE := n -UBSAN_SANITIZE := n -KCSAN_SANITIZE := n -KCOV_INSTRUMENT := n - # These are adjustments to the compiler flags used for objects that # make up the standalone purgatory.ro |