diff options
Diffstat (limited to 'arch/riscv')
64 files changed, 4910 insertions, 1011 deletions
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 1a3b5a5276be..a197258595ef 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -20,7 +20,6 @@ config RISCV select CLONE_BACKWARDS select COMMON_CLK select GENERIC_CLOCKEVENTS - select GENERIC_CPU_DEVICES select GENERIC_IRQ_SHOW select GENERIC_PCI_IOMAP select GENERIC_SCHED_CLOCK @@ -29,6 +28,7 @@ config RISCV select GENERIC_SMP_IDLE_THREAD select GENERIC_ATOMIC64 if !64BIT select GENERIC_IOREMAP + select GENERIC_PTDUMP if MMU select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER select HAVE_ASM_MODVERSIONS @@ -50,15 +50,17 @@ config RISCV select PCI_DOMAINS_GENERIC if PCI select PCI_MSI if PCI select RISCV_TIMER - select UACCESS_MEMCPY if !MMU select GENERIC_IRQ_MULTI_HANDLER select GENERIC_ARCH_TOPOLOGY if SMP select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MMIOWB select ARCH_HAS_DEBUG_VIRTUAL - select HAVE_EBPF_JIT if 64BIT + select HAVE_EBPF_JIT select EDAC_SUPPORT select ARCH_HAS_GIGANTIC_PAGE + select ARCH_HAS_SET_DIRECT_MAP + select ARCH_HAS_SET_MEMORY + select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_WANT_HUGE_PMD_SHARE if 64BIT select SPARSEMEM_STATIC if 32BIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU @@ -130,6 +132,9 @@ config ARCH_SELECT_MEMORY_MODEL config ARCH_WANT_GENERAL_HUGETLB def_bool y +config ARCH_SUPPORTS_DEBUG_PAGEALLOC + def_bool y + config SYS_SUPPORTS_HUGETLBFS def_bool y @@ -248,6 +253,17 @@ config NR_CPUS depends on SMP default "8" +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP + select GENERIC_IRQ_MIGRATION + help + + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu. + + Say N if you want to disable CPU hotplug. + choice prompt "CPU Tuning" default TUNE_GENERIC @@ -308,6 +324,13 @@ config SECCOMP and the task is only allowed to execute a few safe syscalls defined by each seccomp mode. +config RISCV_SBI_V01 + bool "SBI v0.1 support" + default y + depends on RISCV_SBI + help + This config allows kernel to use SBI v0.1 APIs. This will be + deprecated in future once legacy M-mode software are no longer in use. endmenu menu "Boot options" diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 3078b2de0b2d..216286db81c9 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -12,20 +12,6 @@ config SOC_SIFIVE config SOC_VIRT bool "QEMU Virt Machine" - select VIRTIO_PCI - select VIRTIO_BALLOON - select VIRTIO_MMIO - select VIRTIO_CONSOLE - select VIRTIO_NET - select NET_9P_VIRTIO - select VIRTIO_BLK - select SCSI_VIRTIO - select DRM_VIRTIO_GPU - select HW_RANDOM_VIRTIO - select RPMSG_CHAR - select RPMSG_VIRTIO - select CRYPTO_DEV_VIRTIO - select VIRTIO_INPUT select POWER_RESET_SYSCON select POWER_RESET_SYSCON_POWEROFF select GOLDFISH @@ -34,4 +20,14 @@ config SOC_VIRT help This enables support for QEMU Virt Machine. +config SOC_KENDRYTE + bool "Kendryte K210 SoC" + depends on !MMU + select BUILTIN_DTB + select SERIAL_SIFIVE if TTY + select SERIAL_SIFIVE_CONSOLE if TTY + select SIFIVE_PLIC + help + This enables support for Kendryte K210 SoC platform hardware. + endmenu diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 259cb53d7f20..fb6e37db836d 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -85,12 +85,12 @@ PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ -ifeq ($(CONFIG_RISCV_M_MODE),y) -KBUILD_IMAGE := $(boot)/loader +ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_KENDRYTE),yy) +KBUILD_IMAGE := $(boot)/loader.bin else KBUILD_IMAGE := $(boot)/Image.gz endif -BOOT_TARGETS := Image Image.gz loader +BOOT_TARGETS := Image Image.gz loader loader.bin all: $(notdir $(KBUILD_IMAGE)) diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore index 8a45a37d2af4..574c10f8ff68 100644 --- a/arch/riscv/boot/.gitignore +++ b/arch/riscv/boot/.gitignore @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0-only Image Image.gz loader diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 36db8145f9f4..3530c59b3ea7 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -41,6 +41,9 @@ $(obj)/Image.lzma: $(obj)/Image FORCE $(obj)/Image.lzo: $(obj)/Image FORCE $(call if_changed,lzo) +$(obj)/loader.bin: $(obj)/loader FORCE + $(call if_changed,objcopy) + install: $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ $(obj)/Image System.map "$(INSTALL_PATH)" diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile index dcc3ada78455..557f0b519c8e 100644 --- a/arch/riscv/boot/dts/Makefile +++ b/arch/riscv/boot/dts/Makefile @@ -1,2 +1,3 @@ # SPDX-License-Identifier: GPL-2.0 subdir-y += sifive +subdir-y += kendryte diff --git a/arch/riscv/boot/dts/kendryte/Makefile b/arch/riscv/boot/dts/kendryte/Makefile new file mode 100644 index 000000000000..815444e69e89 --- /dev/null +++ b/arch/riscv/boot/dts/kendryte/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +dtb-$(CONFIG_SOC_KENDRYTE) += k210.dtb diff --git a/arch/riscv/boot/dts/kendryte/k210.dts b/arch/riscv/boot/dts/kendryte/k210.dts new file mode 100644 index 000000000000..0d1f28fce6b2 --- /dev/null +++ b/arch/riscv/boot/dts/kendryte/k210.dts @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ + +/dts-v1/; + +#include "k210.dtsi" + +/ { + model = "Kendryte K210 generic"; + compatible = "kendryte,k210"; + + chosen { + bootargs = "earlycon console=ttySIF0"; + stdout-path = "serial0"; + }; +}; + +&uarths0 { + status = "okay"; +}; + diff --git a/arch/riscv/boot/dts/kendryte/k210.dtsi b/arch/riscv/boot/dts/kendryte/k210.dtsi new file mode 100644 index 000000000000..c1df56ccb8d5 --- /dev/null +++ b/arch/riscv/boot/dts/kendryte/k210.dtsi @@ -0,0 +1,123 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2019 Sean Anderson <seanga2@gmail.com> + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#include <dt-bindings/clock/k210-clk.h> + +/ { + /* + * Although the K210 is a 64-bit CPU, the address bus is only 32-bits + * wide, and the upper half of all addresses is ignored. + */ + #address-cells = <1>; + #size-cells = <1>; + compatible = "kendryte,k210"; + + aliases { + serial0 = &uarths0; + }; + + /* + * The K210 has an sv39 MMU following the priviledge specification v1.9. + * Since this is a non-ratified draft specification, the kernel does not + * support it and the K210 support enabled only for the !MMU case. + * Be consistent with this by setting the CPUs MMU type to "none". + */ + cpus { + #address-cells = <1>; + #size-cells = <0>; + timebase-frequency = <7800000>; + cpu0: cpu@0 { + device_type = "cpu"; + reg = <0>; + compatible = "kendryte,k210", "sifive,rocket0", "riscv"; + riscv,isa = "rv64imafdc"; + mmu-type = "none"; + i-cache-size = <0x8000>; + i-cache-block-size = <64>; + d-cache-size = <0x8000>; + d-cache-block-size = <64>; + clocks = <&sysctl K210_CLK_CPU>; + clock-frequency = <390000000>; + cpu0_intc: interrupt-controller { + #interrupt-cells = <1>; + interrupt-controller; + compatible = "riscv,cpu-intc"; + }; + }; + cpu1: cpu@1 { + device_type = "cpu"; + reg = <1>; + compatible = "kendryte,k210", "sifive,rocket0", "riscv"; + riscv,isa = "rv64imafdc"; + mmu-type = "none"; + i-cache-size = <0x8000>; + i-cache-block-size = <64>; + d-cache-size = <0x8000>; + d-cache-block-size = <64>; + clocks = <&sysctl K210_CLK_CPU>; + clock-frequency = <390000000>; + cpu1_intc: interrupt-controller { + #interrupt-cells = <1>; + interrupt-controller; + compatible = "riscv,cpu-intc"; + }; + }; + }; + + sram: memory@80000000 { + device_type = "memory"; + reg = <0x80000000 0x400000>, + <0x80400000 0x200000>, + <0x80600000 0x200000>; + reg-names = "sram0", "sram1", "aisram"; + }; + + clocks { + in0: oscillator { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <26000000>; + }; + }; + + soc { + #address-cells = <1>; + #size-cells = <1>; + compatible = "kendryte,k210-soc", "simple-bus"; + ranges; + interrupt-parent = <&plic0>; + + sysctl: sysctl@50440000 { + compatible = "kendryte,k210-sysctl", "simple-mfd"; + reg = <0x50440000 0x1000>; + #clock-cells = <1>; + }; + + clint0: interrupt-controller@2000000 { + compatible = "riscv,clint0"; + reg = <0x2000000 0xC000>; + interrupts-extended = <&cpu0_intc 3>, <&cpu1_intc 3>; + clocks = <&sysctl K210_CLK_ACLK>; + }; + + plic0: interrupt-controller@c000000 { + #interrupt-cells = <1>; + interrupt-controller; + compatible = "kendryte,k210-plic0", "riscv,plic0"; + reg = <0xC000000 0x4000000>; + interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 0xffffffff>, + <&cpu1_intc 11>, <&cpu1_intc 0xffffffff>; + riscv,ndev = <65>; + riscv,max-priority = <7>; + }; + + uarths0: serial@38000000 { + compatible = "kendryte,k210-uarths", "sifive,uart0"; + reg = <0x38000000 0x1000>; + interrupts = <33>; + clocks = <&sysctl K210_CLK_CPU>; + }; + }; +}; diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index c8f084203067..4da4886246a4 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -31,6 +31,7 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETLINK_DIAG=y CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -38,12 +39,15 @@ CONFIG_PCIE_XILINX=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y +CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y +CONFIG_SCSI_VIRTIO=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y CONFIG_NETDEVICES=y +CONFIG_VIRTIO_NET=y CONFIG_MACB=y CONFIG_E1000E=y CONFIG_R8169=y @@ -54,13 +58,16 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_HVC_RISCV_SBI=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_VIRTIO=y CONFIG_SPI=y CONFIG_SPI_SIFIVE=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_POWER_RESET=y CONFIG_DRM=y CONFIG_DRM_RADEON=y +CONFIG_DRM_VIRTIO_GPU=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y @@ -74,6 +81,12 @@ CONFIG_USB_UAS=y CONFIG_MMC=y CONFIG_MMC_SPI=y CONFIG_RTC_CLASS=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_INPUT=y +CONFIG_VIRTIO_MMIO=y +CONFIG_RPMSG_CHAR=y +CONFIG_RPMSG_VIRTIO=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -88,16 +101,17 @@ CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y CONFIG_9P_FS=y CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_PAGEALLOC=y +CONFIG_SCHED_STACK_END_CHECK=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_PER_CPU_MAPS=y CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_WQ_WATCHDOG=y -CONFIG_SCHED_STACK_END_CHECK=y CONFIG_DEBUG_TIMEKEEPING=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y @@ -114,3 +128,4 @@ CONFIG_DEBUG_BLOCK_EXT_DEVT=y # CONFIG_FTRACE is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_MEMTEST=y +# CONFIG_SYSFS_SYSCALL is not set diff --git a/arch/riscv/configs/nommu_k210_defconfig b/arch/riscv/configs/nommu_k210_defconfig new file mode 100644 index 000000000000..632aa2f95e57 --- /dev/null +++ b/arch/riscv/configs/nommu_k210_defconfig @@ -0,0 +1,68 @@ +# CONFIG_CPU_ISOLATION is not set +CONFIG_LOG_BUF_SHIFT=15 +CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12 +CONFIG_BLK_DEV_INITRD=y +CONFIG_INITRAMFS_SOURCE="" +CONFIG_INITRAMFS_FORCE=y +# CONFIG_RD_BZIP2 is not set +# CONFIG_RD_LZMA is not set +# CONFIG_RD_XZ is not set +# CONFIG_RD_LZO is not set +# CONFIG_RD_LZ4 is not set +# CONFIG_BOOT_CONFIG is not set +CONFIG_CC_OPTIMIZE_FOR_SIZE=y +# CONFIG_SYSFS_SYSCALL is not set +# CONFIG_FHANDLE is not set +# CONFIG_BASE_FULL is not set +# CONFIG_EPOLL is not set +# CONFIG_SIGNALFD is not set +# CONFIG_TIMERFD is not set +# CONFIG_EVENTFD is not set +# CONFIG_AIO is not set +# CONFIG_IO_URING is not set +# CONFIG_ADVISE_SYSCALLS is not set +# CONFIG_MEMBARRIER is not set +# CONFIG_KALLSYMS is not set +CONFIG_EMBEDDED=y +# CONFIG_VM_EVENT_COUNTERS is not set +# CONFIG_COMPAT_BRK is not set +CONFIG_SLOB=y +# CONFIG_SLAB_MERGE_DEFAULT is not set +# CONFIG_MMU is not set +CONFIG_SOC_KENDRYTE=y +CONFIG_MAXPHYSMEM_2GB=y +CONFIG_SMP=y +CONFIG_NR_CPUS=2 +CONFIG_CMDLINE="earlycon console=ttySIF0" +CONFIG_CMDLINE_FORCE=y +CONFIG_USE_BUILTIN_DTB=y +CONFIG_BUILTIN_DTB_SOURCE="kendryte/k210" +# CONFIG_BLOCK is not set +CONFIG_BINFMT_FLAT=y +# CONFIG_COREDUMP is not set +CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y +# CONFIG_FW_LOADER is not set +# CONFIG_ALLOW_DEV_COREDUMP is not set +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO is not set +# CONFIG_LEGACY_PTYS is not set +# CONFIG_LDISC_AUTOLOAD is not set +# CONFIG_DEVMEM is not set +# CONFIG_HW_RANDOM is not set +# CONFIG_HWMON is not set +# CONFIG_VGA_CONSOLE is not set +# CONFIG_HID is not set +# CONFIG_USB_SUPPORT is not set +# CONFIG_VIRTIO_MENU is not set +# CONFIG_DNOTIFY is not set +# CONFIG_INOTIFY_USER is not set +# CONFIG_MISC_FILESYSTEMS is not set +CONFIG_LSM="[]" +CONFIG_PRINTK_TIME=y +# CONFIG_DEBUG_MISC is not set +# CONFIG_SCHED_DEBUG is not set +# CONFIG_RCU_TRACE is not set +# CONFIG_FTRACE is not set +# CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/riscv/configs/rv32_defconfig b/arch/riscv/configs/rv32_defconfig index a844920a261f..05bbf5240569 100644 --- a/arch/riscv/configs/rv32_defconfig +++ b/arch/riscv/configs/rv32_defconfig @@ -31,6 +31,7 @@ CONFIG_IP_PNP_BOOTP=y CONFIG_IP_PNP_RARP=y CONFIG_NETLINK_DIAG=y CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y CONFIG_PCI=y CONFIG_PCIEPORTBUS=y CONFIG_PCI_HOST_GENERIC=y @@ -38,12 +39,15 @@ CONFIG_PCIE_XILINX=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_BLK_DEV_LOOP=y +CONFIG_VIRTIO_BLK=y CONFIG_BLK_DEV_SD=y CONFIG_BLK_DEV_SR=y +CONFIG_SCSI_VIRTIO=y CONFIG_ATA=y CONFIG_SATA_AHCI=y CONFIG_SATA_AHCI_PLATFORM=y CONFIG_NETDEVICES=y +CONFIG_VIRTIO_NET=y CONFIG_MACB=y CONFIG_E1000E=y CONFIG_R8169=y @@ -54,11 +58,14 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y CONFIG_SERIAL_EARLYCON_RISCV_SBI=y CONFIG_HVC_RISCV_SBI=y +CONFIG_VIRTIO_CONSOLE=y CONFIG_HW_RANDOM=y +CONFIG_HW_RANDOM_VIRTIO=y # CONFIG_PTP_1588_CLOCK is not set CONFIG_POWER_RESET=y CONFIG_DRM=y CONFIG_DRM_RADEON=y +CONFIG_DRM_VIRTIO_GPU=y CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_USB=y CONFIG_USB_XHCI_HCD=y @@ -70,6 +77,12 @@ CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y CONFIG_RTC_CLASS=y +CONFIG_VIRTIO_PCI=y +CONFIG_VIRTIO_BALLOON=y +CONFIG_VIRTIO_INPUT=y +CONFIG_VIRTIO_MMIO=y +CONFIG_RPMSG_CHAR=y +CONFIG_RPMSG_VIRTIO=y CONFIG_EXT4_FS=y CONFIG_EXT4_FS_POSIX_ACL=y CONFIG_AUTOFS4_FS=y @@ -84,16 +97,17 @@ CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y CONFIG_9P_FS=y CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_CRYPTO_DEV_VIRTIO=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_FS=y CONFIG_DEBUG_PAGEALLOC=y +CONFIG_SCHED_STACK_END_CHECK=y CONFIG_DEBUG_VM=y CONFIG_DEBUG_VM_PGFLAGS=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_PER_CPU_MAPS=y CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_WQ_WATCHDOG=y -CONFIG_SCHED_STACK_END_CHECK=y CONFIG_DEBUG_TIMEKEEPING=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y @@ -110,3 +124,4 @@ CONFIG_DEBUG_BLOCK_EXT_DEVT=y # CONFIG_FTRACE is not set # CONFIG_RUNTIME_TESTING_MENU is not set CONFIG_MEMTEST=y +# CONFIG_SYSFS_SYSCALL is not set diff --git a/arch/riscv/include/asm/Kbuild b/arch/riscv/include/asm/Kbuild index ec0ca8c6ab64..3d9410bb4de0 100644 --- a/arch/riscv/include/asm/Kbuild +++ b/arch/riscv/include/asm/Kbuild @@ -1,35 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -generic-y += bugs.h -generic-y += checksum.h -generic-y += compat.h -generic-y += device.h -generic-y += div64.h generic-y += extable.h generic-y += flat.h -generic-y += dma.h -generic-y += dma-mapping.h -generic-y += emergency-restart.h -generic-y += exec.h -generic-y += fb.h -generic-y += hardirq.h -generic-y += hw_irq.h -generic-y += irq_regs.h -generic-y += irq_work.h -generic-y += kdebug.h -generic-y += kmap_types.h generic-y += kvm_para.h -generic-y += local.h generic-y += local64.h -generic-y += mm-arch-hooks.h -generic-y += percpu.h -generic-y += preempt.h -generic-y += sections.h -generic-y += serial.h -generic-y += shmparam.h -generic-y += topology.h -generic-y += trace_clock.h -generic-y += unaligned.h generic-y += user.h -generic-y += vga.h generic-y += vmlinux.lds.h -generic-y += xor.h diff --git a/arch/riscv/include/asm/bug.h b/arch/riscv/include/asm/bug.h index 75604fec1b1b..d6f1ec08d97b 100644 --- a/arch/riscv/include/asm/bug.h +++ b/arch/riscv/include/asm/bug.h @@ -19,6 +19,14 @@ #define __BUG_INSN_32 _UL(0x00100073) /* ebreak */ #define __BUG_INSN_16 _UL(0x9002) /* c.ebreak */ +#define GET_INSN_LENGTH(insn) \ +({ \ + unsigned long __len; \ + __len = ((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? \ + 4UL : 2UL; \ + __len; \ +}) + typedef u32 bug_insn_t; #ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h index 555b20b11dc3..c8677c75f82c 100644 --- a/arch/riscv/include/asm/cacheflush.h +++ b/arch/riscv/include/asm/cacheflush.h @@ -85,7 +85,7 @@ static inline void flush_dcache_page(struct page *page) * so instead we just flush the whole thing. */ #define flush_icache_range(start, end) flush_icache_all() -#define flush_icache_user_range(vma, pg, addr, len) flush_icache_all() +#define flush_icache_user_range(vma, pg, addr, len) flush_icache_mm(vma->vm_mm, 0) #ifndef CONFIG_SMP diff --git a/arch/riscv/include/asm/clint.h b/arch/riscv/include/asm/clint.h index 6eaa2eedd694..a279b17a6aad 100644 --- a/arch/riscv/include/asm/clint.h +++ b/arch/riscv/include/asm/clint.h @@ -15,12 +15,12 @@ static inline void clint_send_ipi_single(unsigned long hartid) writel(1, clint_ipi_base + hartid); } -static inline void clint_send_ipi_mask(const struct cpumask *hartid_mask) +static inline void clint_send_ipi_mask(const struct cpumask *mask) { - int hartid; + int cpu; - for_each_cpu(hartid, hartid_mask) - clint_send_ipi_single(hartid); + for_each_cpu(cpu, mask) + clint_send_ipi_single(cpuid_to_hartid_map(cpu)); } static inline void clint_clear_ipi(unsigned long hartid) diff --git a/arch/riscv/include/asm/cpu_ops.h b/arch/riscv/include/asm/cpu_ops.h new file mode 100644 index 000000000000..a8ec3c5c1bd2 --- /dev/null +++ b/arch/riscv/include/asm/cpu_ops.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + * Based on arch/arm64/include/asm/cpu_ops.h + */ +#ifndef __ASM_CPU_OPS_H +#define __ASM_CPU_OPS_H + +#include <linux/init.h> +#include <linux/sched.h> +#include <linux/threads.h> + +/** + * struct cpu_operations - Callback operations for hotplugging CPUs. + * + * @name: Name of the boot protocol. + * @cpu_prepare: Early one-time preparation step for a cpu. If there + * is a mechanism for doing so, tests whether it is + * possible to boot the given HART. + * @cpu_start: Boots a cpu into the kernel. + * @cpu_disable: Prepares a cpu to die. May fail for some + * mechanism-specific reason, which will cause the hot + * unplug to be aborted. Called from the cpu to be killed. + * @cpu_stop: Makes a cpu leave the kernel. Must not fail. Called from + * the cpu being stopped. + * @cpu_is_stopped: Ensures a cpu has left the kernel. Called from another + * cpu. + */ +struct cpu_operations { + const char *name; + int (*cpu_prepare)(unsigned int cpu); + int (*cpu_start)(unsigned int cpu, + struct task_struct *tidle); +#ifdef CONFIG_HOTPLUG_CPU + int (*cpu_disable)(unsigned int cpu); + void (*cpu_stop)(void); + int (*cpu_is_stopped)(unsigned int cpu); +#endif +}; + +extern const struct cpu_operations *cpu_ops[NR_CPUS]; +void __init cpu_set_ops(int cpu); +void cpu_update_secondary_bootdata(unsigned int cpuid, + struct task_struct *tidle); + +#endif /* ifndef __ASM_CPU_OPS_H */ diff --git a/arch/riscv/include/asm/current.h b/arch/riscv/include/asm/current.h index dd973efe5d7c..1de233d8e8de 100644 --- a/arch/riscv/include/asm/current.h +++ b/arch/riscv/include/asm/current.h @@ -17,6 +17,8 @@ struct task_struct; +register struct task_struct *riscv_current_is_tp __asm__("tp"); + /* * This only works because "struct thread_info" is at offset 0 from "struct * task_struct". This constraint seems to be necessary on other architectures @@ -26,8 +28,7 @@ struct task_struct; */ static __always_inline struct task_struct *get_current(void) { - register struct task_struct *tp __asm__("tp"); - return tp; + return riscv_current_is_tp; } #define current get_current() diff --git a/arch/riscv/include/asm/fixmap.h b/arch/riscv/include/asm/fixmap.h index 42d2c42f3cc9..2368d49eb4ef 100644 --- a/arch/riscv/include/asm/fixmap.h +++ b/arch/riscv/include/asm/fixmap.h @@ -27,6 +27,8 @@ enum fixed_addresses { FIX_FDT = FIX_FDT_END + FIX_FDT_SIZE / PAGE_SIZE - 1, FIX_PTE, FIX_PMD, + FIX_TEXT_POKE1, + FIX_TEXT_POKE0, FIX_EARLYCON_MEM_BASE, __end_of_fixed_addresses }; diff --git a/arch/riscv/include/asm/futex.h b/arch/riscv/include/asm/futex.h index fdfaf7f3df7c..1b00badb9f87 100644 --- a/arch/riscv/include/asm/futex.h +++ b/arch/riscv/include/asm/futex.h @@ -46,7 +46,8 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) { int oldval = 0, ret = 0; - pagefault_disable(); + if (!access_ok(uaddr, sizeof(u32))) + return -EFAULT; switch (op) { case FUTEX_OP_SET: @@ -73,8 +74,6 @@ arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) ret = -ENOSYS; } - pagefault_enable(); - if (!ret) *oval = oldval; diff --git a/arch/riscv/include/asm/kasan.h b/arch/riscv/include/asm/kasan.h index eee6e6588b12..b47045cb85ce 100644 --- a/arch/riscv/include/asm/kasan.h +++ b/arch/riscv/include/asm/kasan.h @@ -13,7 +13,7 @@ #define KASAN_SHADOW_SCALE_SHIFT 3 #define KASAN_SHADOW_SIZE (UL(1) << (38 - KASAN_SHADOW_SCALE_SHIFT)) -#define KASAN_SHADOW_START 0xffffffc000000000 /* 2^64 - 2^38 */ +#define KASAN_SHADOW_START KERN_VIRT_START /* 2^64 - 2^38 */ #define KASAN_SHADOW_END (KASAN_SHADOW_START + KASAN_SHADOW_SIZE) #define KASAN_SHADOW_OFFSET (KASAN_SHADOW_END - (1ULL << \ diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 8ca1930caa44..2d50f76efe48 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -137,8 +137,7 @@ extern phys_addr_t __phys_addr_symbol(unsigned long x); #define virt_addr_valid(vaddr) (pfn_valid(virt_to_pfn(vaddr))) -#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ - VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_NON_EXEC #include <asm-generic/memory_model.h> #include <asm-generic/getorder.h> diff --git a/arch/riscv/include/asm/patch.h b/arch/riscv/include/asm/patch.h new file mode 100644 index 000000000000..b5918a6e0615 --- /dev/null +++ b/arch/riscv/include/asm/patch.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2020 SiFive + */ + +#ifndef _ASM_RISCV_PATCH_H +#define _ASM_RISCV_PATCH_H + +int riscv_patch_text_nosync(void *addr, const void *insns, size_t len); +int riscv_patch_text(void *addr, u32 insn); + +#endif /* _ASM_RISCV_PATCH_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index e43041519edd..9c188ad2e52d 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -19,6 +19,47 @@ #include <asm/tlbflush.h> #include <linux/mm_types.h> +#ifdef CONFIG_MMU + +#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) +#define VMALLOC_END (PAGE_OFFSET - 1) +#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) + +#define BPF_JIT_REGION_SIZE (SZ_128M) +#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_END (VMALLOC_END) + +/* + * Roughly size the vmemmap space to be large enough to fit enough + * struct pages to map half the virtual address space. Then + * position vmemmap directly below the VMALLOC region. + */ +#define VMEMMAP_SHIFT \ + (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) +#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) +#define VMEMMAP_END (VMALLOC_START - 1) +#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) + +/* + * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel + * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. + */ +#define vmemmap ((struct page *)VMEMMAP_START) + +#define PCI_IO_SIZE SZ_16M +#define PCI_IO_END VMEMMAP_START +#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) + +#define FIXADDR_TOP PCI_IO_START +#ifdef CONFIG_64BIT +#define FIXADDR_SIZE PMD_SIZE +#else +#define FIXADDR_SIZE PGDIR_SIZE +#endif +#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#endif + #ifdef CONFIG_64BIT #include <asm/pgtable-64.h> #else @@ -90,31 +131,6 @@ extern pgd_t swapper_pg_dir[]; #define __S110 PAGE_SHARED_EXEC #define __S111 PAGE_SHARED_EXEC -#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) -#define VMALLOC_END (PAGE_OFFSET - 1) -#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) - -#define BPF_JIT_REGION_SIZE (SZ_128M) -#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) -#define BPF_JIT_REGION_END (VMALLOC_END) - -/* - * Roughly size the vmemmap space to be large enough to fit enough - * struct pages to map half the virtual address space. Then - * position vmemmap directly below the VMALLOC region. - */ -#define VMEMMAP_SHIFT \ - (CONFIG_VA_BITS - PAGE_SHIFT - 1 + STRUCT_PAGE_MAX_SHIFT) -#define VMEMMAP_SIZE BIT(VMEMMAP_SHIFT) -#define VMEMMAP_END (VMALLOC_START - 1) -#define VMEMMAP_START (VMALLOC_START - VMEMMAP_SIZE) - -/* - * Define vmemmap for pfn_to_page & page_to_pfn calls. Needed if kernel - * is configured with CONFIG_SPARSEMEM_VMEMMAP enabled. - */ -#define vmemmap ((struct page *)VMEMMAP_START) - static inline int pmd_present(pmd_t pmd) { return (pmd_val(pmd) & (_PAGE_PRESENT | _PAGE_PROT_NONE)); @@ -432,17 +448,15 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) -#define PCI_IO_SIZE SZ_16M -#define PCI_IO_END VMEMMAP_START -#define PCI_IO_START (PCI_IO_END - PCI_IO_SIZE) - -#define FIXADDR_TOP PCI_IO_START +/* + * In the RV64 Linux scheme, we give the user half of the virtual-address space + * and give the kernel the other (upper) half. + */ #ifdef CONFIG_64BIT -#define FIXADDR_SIZE PMD_SIZE +#define KERN_VIRT_START (-(BIT(CONFIG_VA_BITS)) + TASK_SIZE) #else -#define FIXADDR_SIZE PGDIR_SIZE +#define KERN_VIRT_START FIXADDR_START #endif -#define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) /* * Task size is 0x4000000000 for RV64 or 0x9fc00000 for RV32. diff --git a/arch/riscv/include/asm/ptdump.h b/arch/riscv/include/asm/ptdump.h new file mode 100644 index 000000000000..e29af7191909 --- /dev/null +++ b/arch/riscv/include/asm/ptdump.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 SiFive + */ + +#ifndef _ASM_RISCV_PTDUMP_H +#define _ASM_RISCV_PTDUMP_H + +void ptdump_check_wx(void); + +#endif /* _ASM_RISCV_PTDUMP_H */ diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 2570c1e683d3..653edb25d495 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2015 Regents of the University of California + * Copyright (c) 2020 Western Digital Corporation or its affiliates. */ #ifndef _ASM_RISCV_SBI_H @@ -9,96 +10,148 @@ #include <linux/types.h> #ifdef CONFIG_RISCV_SBI -#define SBI_SET_TIMER 0 -#define SBI_CONSOLE_PUTCHAR 1 -#define SBI_CONSOLE_GETCHAR 2 -#define SBI_CLEAR_IPI 3 -#define SBI_SEND_IPI 4 -#define SBI_REMOTE_FENCE_I 5 -#define SBI_REMOTE_SFENCE_VMA 6 -#define SBI_REMOTE_SFENCE_VMA_ASID 7 -#define SBI_SHUTDOWN 8 - -#define SBI_CALL(which, arg0, arg1, arg2, arg3) ({ \ - register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); \ - register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); \ - register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); \ - register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); \ - register uintptr_t a7 asm ("a7") = (uintptr_t)(which); \ - asm volatile ("ecall" \ - : "+r" (a0) \ - : "r" (a1), "r" (a2), "r" (a3), "r" (a7) \ - : "memory"); \ - a0; \ -}) - -/* Lazy implementations until SBI is finalized */ -#define SBI_CALL_0(which) SBI_CALL(which, 0, 0, 0, 0) -#define SBI_CALL_1(which, arg0) SBI_CALL(which, arg0, 0, 0, 0) -#define SBI_CALL_2(which, arg0, arg1) SBI_CALL(which, arg0, arg1, 0, 0) -#define SBI_CALL_3(which, arg0, arg1, arg2) \ - SBI_CALL(which, arg0, arg1, arg2, 0) -#define SBI_CALL_4(which, arg0, arg1, arg2, arg3) \ - SBI_CALL(which, arg0, arg1, arg2, arg3) - -static inline void sbi_console_putchar(int ch) -{ - SBI_CALL_1(SBI_CONSOLE_PUTCHAR, ch); -} +enum sbi_ext_id { +#ifdef CONFIG_RISCV_SBI_V01 + SBI_EXT_0_1_SET_TIMER = 0x0, + SBI_EXT_0_1_CONSOLE_PUTCHAR = 0x1, + SBI_EXT_0_1_CONSOLE_GETCHAR = 0x2, + SBI_EXT_0_1_CLEAR_IPI = 0x3, + SBI_EXT_0_1_SEND_IPI = 0x4, + SBI_EXT_0_1_REMOTE_FENCE_I = 0x5, + SBI_EXT_0_1_REMOTE_SFENCE_VMA = 0x6, + SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID = 0x7, + SBI_EXT_0_1_SHUTDOWN = 0x8, +#endif + SBI_EXT_BASE = 0x10, + SBI_EXT_TIME = 0x54494D45, + SBI_EXT_IPI = 0x735049, + SBI_EXT_RFENCE = 0x52464E43, + SBI_EXT_HSM = 0x48534D, +}; -static inline int sbi_console_getchar(void) -{ - return SBI_CALL_0(SBI_CONSOLE_GETCHAR); -} +enum sbi_ext_base_fid { + SBI_EXT_BASE_GET_SPEC_VERSION = 0, + SBI_EXT_BASE_GET_IMP_ID, + SBI_EXT_BASE_GET_IMP_VERSION, + SBI_EXT_BASE_PROBE_EXT, + SBI_EXT_BASE_GET_MVENDORID, + SBI_EXT_BASE_GET_MARCHID, + SBI_EXT_BASE_GET_MIMPID, +}; -static inline void sbi_set_timer(uint64_t stime_value) -{ -#if __riscv_xlen == 32 - SBI_CALL_2(SBI_SET_TIMER, stime_value, stime_value >> 32); -#else - SBI_CALL_1(SBI_SET_TIMER, stime_value); -#endif -} +enum sbi_ext_time_fid { + SBI_EXT_TIME_SET_TIMER = 0, +}; -static inline void sbi_shutdown(void) -{ - SBI_CALL_0(SBI_SHUTDOWN); -} +enum sbi_ext_ipi_fid { + SBI_EXT_IPI_SEND_IPI = 0, +}; -static inline void sbi_clear_ipi(void) -{ - SBI_CALL_0(SBI_CLEAR_IPI); -} +enum sbi_ext_rfence_fid { + SBI_EXT_RFENCE_REMOTE_FENCE_I = 0, + SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, + SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, + SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, + SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID, + SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, + SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, +}; -static inline void sbi_send_ipi(const unsigned long *hart_mask) -{ - SBI_CALL_1(SBI_SEND_IPI, hart_mask); -} +enum sbi_ext_hsm_fid { + SBI_EXT_HSM_HART_START = 0, + SBI_EXT_HSM_HART_STOP, + SBI_EXT_HSM_HART_STATUS, +}; + +enum sbi_hsm_hart_status { + SBI_HSM_HART_STATUS_STARTED = 0, + SBI_HSM_HART_STATUS_STOPPED, + SBI_HSM_HART_STATUS_START_PENDING, + SBI_HSM_HART_STATUS_STOP_PENDING, +}; + +#define SBI_SPEC_VERSION_DEFAULT 0x1 +#define SBI_SPEC_VERSION_MAJOR_SHIFT 24 +#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f +#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff + +/* SBI return error codes */ +#define SBI_SUCCESS 0 +#define SBI_ERR_FAILURE -1 +#define SBI_ERR_NOT_SUPPORTED -2 +#define SBI_ERR_INVALID_PARAM -3 +#define SBI_ERR_DENIED -4 +#define SBI_ERR_INVALID_ADDRESS -5 -static inline void sbi_remote_fence_i(const unsigned long *hart_mask) +extern unsigned long sbi_spec_version; +struct sbiret { + long error; + long value; +}; + +int sbi_init(void); +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5); + +void sbi_console_putchar(int ch); +int sbi_console_getchar(void); +void sbi_set_timer(uint64_t stime_value); +void sbi_shutdown(void); +void sbi_clear_ipi(void); +void sbi_send_ipi(const unsigned long *hart_mask); +void sbi_remote_fence_i(const unsigned long *hart_mask); +void sbi_remote_sfence_vma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size); + +void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long asid); +int sbi_remote_hfence_gvma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size); +int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long vmid); +int sbi_remote_hfence_vvma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size); +int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long asid); +int sbi_probe_extension(int ext); + +/* Check if current SBI specification version is 0.1 or not */ +static inline int sbi_spec_is_0_1(void) { - SBI_CALL_1(SBI_REMOTE_FENCE_I, hart_mask); + return (sbi_spec_version == SBI_SPEC_VERSION_DEFAULT) ? 1 : 0; } -static inline void sbi_remote_sfence_vma(const unsigned long *hart_mask, - unsigned long start, - unsigned long size) +/* Get the major version of SBI */ +static inline unsigned long sbi_major_version(void) { - SBI_CALL_3(SBI_REMOTE_SFENCE_VMA, hart_mask, start, size); + return (sbi_spec_version >> SBI_SPEC_VERSION_MAJOR_SHIFT) & + SBI_SPEC_VERSION_MAJOR_MASK; } -static inline void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, - unsigned long start, - unsigned long size, - unsigned long asid) +/* Get the minor version of SBI */ +static inline unsigned long sbi_minor_version(void) { - SBI_CALL_4(SBI_REMOTE_SFENCE_VMA_ASID, hart_mask, start, size, asid); + return sbi_spec_version & SBI_SPEC_VERSION_MINOR_MASK; } + +int sbi_err_map_linux_errno(int err); #else /* CONFIG_RISCV_SBI */ /* stubs for code that is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */ void sbi_set_timer(uint64_t stime_value); void sbi_clear_ipi(void); void sbi_send_ipi(const unsigned long *hart_mask); void sbi_remote_fence_i(const unsigned long *hart_mask); +void sbi_init(void); #endif /* CONFIG_RISCV_SBI */ #endif /* _ASM_RISCV_SBI_H */ diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h new file mode 100644 index 000000000000..c38df4771c09 --- /dev/null +++ b/arch/riscv/include/asm/set_memory.h @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2019 SiFive + */ + +#ifndef _ASM_RISCV_SET_MEMORY_H +#define _ASM_RISCV_SET_MEMORY_H + +#ifndef __ASSEMBLY__ +/* + * Functions to change memory attributes. + */ +#ifdef CONFIG_MMU +int set_memory_ro(unsigned long addr, int numpages); +int set_memory_rw(unsigned long addr, int numpages); +int set_memory_x(unsigned long addr, int numpages); +int set_memory_nx(unsigned long addr, int numpages); +#else +static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } +static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } +#endif + +#ifdef CONFIG_STRICT_KERNEL_RWX +void set_kernel_text_ro(void); +void set_kernel_text_rw(void); +#else +static inline void set_kernel_text_ro(void) { } +static inline void set_kernel_text_rw(void) { } +#endif + +int set_direct_map_invalid_noflush(struct page *page); +int set_direct_map_default_noflush(struct page *page); + +#endif /* __ASSEMBLY__ */ + +#ifdef CONFIG_ARCH_HAS_STRICT_KERNEL_RWX +#ifdef CONFIG_64BIT +#define SECTION_ALIGN (1 << 21) +#else +#define SECTION_ALIGN (1 << 22) +#endif +#else /* !CONFIG_ARCH_HAS_STRICT_KERNEL_RWX */ +#define SECTION_ALIGN L1_CACHE_BYTES +#endif /* CONFIG_ARCH_HAS_STRICT_KERNEL_RWX */ + +#endif /* _ASM_RISCV_SET_MEMORY_H */ diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index a83451d73a4e..f4c7cfda6b7f 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -43,6 +43,13 @@ void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out); */ #define raw_smp_processor_id() (current_thread_info()->cpu) +#if defined CONFIG_HOTPLUG_CPU +int __cpu_disable(void); +void __cpu_die(unsigned int cpu); +void cpu_stop(void); +#else +#endif /* CONFIG_HOTPLUG_CPU */ + #else static inline void show_ipi_stats(struct seq_file *p, int prec) @@ -61,5 +68,22 @@ static inline unsigned long cpuid_to_hartid_map(int cpu) return boot_cpu_hartid; } +static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in, + struct cpumask *out) +{ + cpumask_clear(out); + cpumask_set_cpu(boot_cpu_hartid, out); +} + #endif /* CONFIG_SMP */ + +#if defined(CONFIG_HOTPLUG_CPU) && (CONFIG_SMP) +bool cpu_has_hotplug(unsigned int cpu); +#else +static inline bool cpu_has_hotplug(unsigned int cpu) +{ + return false; +} +#endif + #endif /* _ASM_RISCV_SMP_H */ diff --git a/arch/riscv/include/asm/soc.h b/arch/riscv/include/asm/soc.h new file mode 100644 index 000000000000..7cec1968c8b4 --- /dev/null +++ b/arch/riscv/include/asm/soc.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ + +#ifndef _ASM_RISCV_SOC_H +#define _ASM_RISCV_SOC_H + +#include <linux/of.h> +#include <linux/linkage.h> +#include <linux/types.h> + +#define SOC_EARLY_INIT_DECLARE(name, compat, fn) \ + static const struct of_device_id __soc_early_init__##name \ + __used __section(__soc_early_init_table) \ + = { .compatible = compat, .data = fn } + +void soc_early_init(void); + +extern unsigned long __soc_early_init_table_start; +extern unsigned long __soc_early_init_table_end; + +#endif diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index f462a183a9c2..8ce9d607b53d 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -11,6 +11,24 @@ /* * User space memory access functions */ + +extern unsigned long __must_check __asm_copy_to_user(void __user *to, + const void *from, unsigned long n); +extern unsigned long __must_check __asm_copy_from_user(void *to, + const void __user *from, unsigned long n); + +static inline unsigned long +raw_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + return __asm_copy_from_user(to, from, n); +} + +static inline unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + return __asm_copy_to_user(to, from, n); +} + #ifdef CONFIG_MMU #include <linux/errno.h> #include <linux/compiler.h> @@ -367,24 +385,6 @@ do { \ -EFAULT; \ }) - -extern unsigned long __must_check __asm_copy_to_user(void __user *to, - const void *from, unsigned long n); -extern unsigned long __must_check __asm_copy_from_user(void *to, - const void __user *from, unsigned long n); - -static inline unsigned long -raw_copy_from_user(void *to, const void __user *from, unsigned long n) -{ - return __asm_copy_from_user(to, from, n); -} - -static inline unsigned long -raw_copy_to_user(void __user *to, const void *from, unsigned long n) -{ - return __asm_copy_to_user(to, from, n); -} - extern long strncpy_from_user(char *dest, const char __user *src, long count); extern long __must_check strlen_user(const char __user *str); diff --git a/arch/riscv/kernel/.gitignore b/arch/riscv/kernel/.gitignore index b51634f6a7cd..e052ed331cc1 100644 --- a/arch/riscv/kernel/.gitignore +++ b/arch/riscv/kernel/.gitignore @@ -1 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only /vmlinux.lds diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index f40205cb9a22..86c83081044f 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -4,12 +4,14 @@ # ifdef CONFIG_FTRACE -CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_patch.o = -pg endif extra-y += head.o extra-y += vmlinux.lds +obj-y += soc.o obj-y += cpu.o obj-y += cpufeature.o obj-y += entry.o @@ -26,12 +28,15 @@ obj-y += traps.o obj-y += riscv_ksyms.o obj-y += stacktrace.o obj-y += cacheinfo.o +obj-y += patch.o obj-$(CONFIG_MMU) += vdso.o vdso/ -obj-$(CONFIG_RISCV_M_MODE) += clint.o +obj-$(CONFIG_RISCV_M_MODE) += clint.o traps_misaligned.o obj-$(CONFIG_FPU) += fpu.o obj-$(CONFIG_SMP) += smpboot.o obj-$(CONFIG_SMP) += smp.o +obj-$(CONFIG_SMP) += cpu_ops.o +obj-$(CONFIG_SMP) += cpu_ops_spinwait.o obj-$(CONFIG_MODULES) += module.o obj-$(CONFIG_MODULE_SECTIONS) += module-sections.o @@ -42,5 +47,9 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o obj-$(CONFIG_RISCV_SBI) += sbi.o +ifeq ($(CONFIG_RISCV_SBI), y) +obj-$(CONFIG_SMP) += cpu_ops_sbi.o +endif +obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o clean: diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c new file mode 100644 index 000000000000..df84e0c13db1 --- /dev/null +++ b/arch/riscv/kernel/cpu-hotplug.c @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/sched.h> +#include <linux/err.h> +#include <linux/irq.h> +#include <linux/cpu.h> +#include <linux/sched/hotplug.h> +#include <asm/irq.h> +#include <asm/cpu_ops.h> +#include <asm/sbi.h> + +void cpu_stop(void); +void arch_cpu_idle_dead(void) +{ + cpu_stop(); +} + +bool cpu_has_hotplug(unsigned int cpu) +{ + if (cpu_ops[cpu]->cpu_stop) + return true; + + return false; +} + +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void) +{ + int ret = 0; + unsigned int cpu = smp_processor_id(); + + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_stop) + return -EOPNOTSUPP; + + if (cpu_ops[cpu]->cpu_disable) + ret = cpu_ops[cpu]->cpu_disable(cpu); + + if (ret) + return ret; + + remove_cpu_topology(cpu); + set_cpu_online(cpu, false); + irq_migrate_all_off_this_cpu(); + + return ret; +} + +/* + * Called on the thread which is asking for a CPU to be shutdown. + */ +void __cpu_die(unsigned int cpu) +{ + int ret = 0; + + if (!cpu_wait_death(cpu, 5)) { + pr_err("CPU %u: didn't die\n", cpu); + return; + } + pr_notice("CPU%u: off\n", cpu); + + /* Verify from the firmware if the cpu is really stopped*/ + if (cpu_ops[cpu]->cpu_is_stopped) + ret = cpu_ops[cpu]->cpu_is_stopped(cpu); + if (ret) + pr_warn("CPU%d may not have stopped: %d\n", cpu, ret); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + */ +void cpu_stop(void) +{ + idle_task_exit(); + + (void)cpu_report_death(); + + cpu_ops[smp_processor_id()]->cpu_stop(); + /* It should never reach here */ + BUG(); +} diff --git a/arch/riscv/kernel/cpu_ops.c b/arch/riscv/kernel/cpu_ops.c new file mode 100644 index 000000000000..c4c33bf02369 --- /dev/null +++ b/arch/riscv/kernel/cpu_ops.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ + +#include <linux/errno.h> +#include <linux/mm.h> +#include <linux/of.h> +#include <linux/string.h> +#include <linux/sched.h> +#include <linux/sched/task_stack.h> +#include <asm/cpu_ops.h> +#include <asm/sbi.h> +#include <asm/smp.h> + +const struct cpu_operations *cpu_ops[NR_CPUS] __ro_after_init; + +void *__cpu_up_stack_pointer[NR_CPUS]; +void *__cpu_up_task_pointer[NR_CPUS]; + +extern const struct cpu_operations cpu_ops_sbi; +extern const struct cpu_operations cpu_ops_spinwait; + +void cpu_update_secondary_bootdata(unsigned int cpuid, + struct task_struct *tidle) +{ + int hartid = cpuid_to_hartid_map(cpuid); + + /* Make sure tidle is updated */ + smp_mb(); + WRITE_ONCE(__cpu_up_stack_pointer[hartid], + task_stack_page(tidle) + THREAD_SIZE); + WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); +} + +void __init cpu_set_ops(int cpuid) +{ +#if IS_ENABLED(CONFIG_RISCV_SBI) + if (sbi_probe_extension(SBI_EXT_HSM) > 0) { + if (!cpuid) + pr_info("SBI v0.2 HSM extension detected\n"); + cpu_ops[cpuid] = &cpu_ops_sbi; + } else +#endif + cpu_ops[cpuid] = &cpu_ops_spinwait; +} diff --git a/arch/riscv/kernel/cpu_ops_sbi.c b/arch/riscv/kernel/cpu_ops_sbi.c new file mode 100644 index 000000000000..685fae72b7f5 --- /dev/null +++ b/arch/riscv/kernel/cpu_ops_sbi.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HSM extension and cpu_ops implementation. + * + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ + +#include <linux/init.h> +#include <linux/mm.h> +#include <asm/cpu_ops.h> +#include <asm/sbi.h> +#include <asm/smp.h> + +extern char secondary_start_sbi[]; +const struct cpu_operations cpu_ops_sbi; + +static int sbi_hsm_hart_start(unsigned long hartid, unsigned long saddr, + unsigned long priv) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_START, + hartid, saddr, priv, 0, 0, 0); + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + else + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int sbi_hsm_hart_stop(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_STOP, 0, 0, 0, 0, 0, 0); + + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + else + return 0; +} + +static int sbi_hsm_hart_get_status(unsigned long hartid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_HSM, SBI_EXT_HSM_HART_STATUS, + hartid, 0, 0, 0, 0, 0); + if (ret.error) + return sbi_err_map_linux_errno(ret.error); + else + return ret.value; +} +#endif + +static int sbi_cpu_start(unsigned int cpuid, struct task_struct *tidle) +{ + int rc; + unsigned long boot_addr = __pa_symbol(secondary_start_sbi); + int hartid = cpuid_to_hartid_map(cpuid); + + cpu_update_secondary_bootdata(cpuid, tidle); + rc = sbi_hsm_hart_start(hartid, boot_addr, 0); + + return rc; +} + +static int sbi_cpu_prepare(unsigned int cpuid) +{ + if (!cpu_ops_sbi.cpu_start) { + pr_err("cpu start method not defined for CPU [%d]\n", cpuid); + return -ENODEV; + } + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int sbi_cpu_disable(unsigned int cpuid) +{ + if (!cpu_ops_sbi.cpu_stop) + return -EOPNOTSUPP; + return 0; +} + +static void sbi_cpu_stop(void) +{ + int ret; + + ret = sbi_hsm_hart_stop(); + pr_crit("Unable to stop the cpu %u (%d)\n", smp_processor_id(), ret); +} + +static int sbi_cpu_is_stopped(unsigned int cpuid) +{ + int rc; + int hartid = cpuid_to_hartid_map(cpuid); + + rc = sbi_hsm_hart_get_status(hartid); + + if (rc == SBI_HSM_HART_STATUS_STOPPED) + return 0; + return rc; +} +#endif + +const struct cpu_operations cpu_ops_sbi = { + .name = "sbi", + .cpu_prepare = sbi_cpu_prepare, + .cpu_start = sbi_cpu_start, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = sbi_cpu_disable, + .cpu_stop = sbi_cpu_stop, + .cpu_is_stopped = sbi_cpu_is_stopped, +#endif +}; diff --git a/arch/riscv/kernel/cpu_ops_spinwait.c b/arch/riscv/kernel/cpu_ops_spinwait.c new file mode 100644 index 000000000000..b2c957bb68c1 --- /dev/null +++ b/arch/riscv/kernel/cpu_ops_spinwait.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ + +#include <linux/errno.h> +#include <linux/of.h> +#include <linux/string.h> +#include <asm/cpu_ops.h> +#include <asm/sbi.h> +#include <asm/smp.h> + +const struct cpu_operations cpu_ops_spinwait; + +static int spinwait_cpu_prepare(unsigned int cpuid) +{ + if (!cpu_ops_spinwait.cpu_start) { + pr_err("cpu start method not defined for CPU [%d]\n", cpuid); + return -ENODEV; + } + return 0; +} + +static int spinwait_cpu_start(unsigned int cpuid, struct task_struct *tidle) +{ + /* + * In this protocol, all cpus boot on their own accord. _start + * selects the first cpu to boot the kernel and causes the remainder + * of the cpus to spin in a loop waiting for their stack pointer to be + * setup by that main cpu. Writing to bootdata + * (i.e __cpu_up_stack_pointer) signals to the spinning cpus that they + * can continue the boot process. + */ + cpu_update_secondary_bootdata(cpuid, tidle); + + return 0; +} + +const struct cpu_operations cpu_ops_spinwait = { + .name = "spinwait", + .cpu_prepare = spinwait_cpu_prepare, + .cpu_start = spinwait_cpu_start, +}; diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 208702d8c18e..56d071b2c0a1 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -13,17 +13,11 @@ #include <asm/thread_info.h> #include <asm/asm-offsets.h> - .text - .altmacro - -/* - * Prepares to enter a system call or exception by saving all registers to the - * stack. - */ - .macro SAVE_ALL - LOCAL _restore_kernel_tpsp - LOCAL _save_context +#if !IS_ENABLED(CONFIG_PREEMPTION) +.set resume_kernel, restore_all +#endif +ENTRY(handle_exception) /* * If coming from userspace, preserve the user thread pointer and load * the kernel thread pointer. If we came from the kernel, the scratch @@ -90,77 +84,6 @@ _save_context: REG_S s3, PT_BADADDR(sp) REG_S s4, PT_CAUSE(sp) REG_S s5, PT_TP(sp) - .endm - -/* - * Prepares to return from a system call or exception by restoring all - * registers from the stack. - */ - .macro RESTORE_ALL - REG_L a0, PT_STATUS(sp) - /* - * The current load reservation is effectively part of the processor's - * state, in the sense that load reservations cannot be shared between - * different hart contexts. We can't actually save and restore a load - * reservation, so instead here we clear any existing reservation -- - * it's always legal for implementations to clear load reservations at - * any point (as long as the forward progress guarantee is kept, but - * we'll ignore that here). - * - * Dangling load reservations can be the result of taking a trap in the - * middle of an LR/SC sequence, but can also be the result of a taken - * forward branch around an SC -- which is how we implement CAS. As a - * result we need to clear reservations between the last CAS and the - * jump back to the new context. While it is unlikely the store - * completes, implementations are allowed to expand reservations to be - * arbitrarily large. - */ - REG_L a2, PT_EPC(sp) - REG_SC x0, a2, PT_EPC(sp) - - csrw CSR_STATUS, a0 - csrw CSR_EPC, a2 - - REG_L x1, PT_RA(sp) - REG_L x3, PT_GP(sp) - REG_L x4, PT_TP(sp) - REG_L x5, PT_T0(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x8, PT_S0(sp) - REG_L x9, PT_S1(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - REG_L x18, PT_S2(sp) - REG_L x19, PT_S3(sp) - REG_L x20, PT_S4(sp) - REG_L x21, PT_S5(sp) - REG_L x22, PT_S6(sp) - REG_L x23, PT_S7(sp) - REG_L x24, PT_S8(sp) - REG_L x25, PT_S9(sp) - REG_L x26, PT_S10(sp) - REG_L x27, PT_S11(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) - - REG_L x2, PT_SP(sp) - .endm - -#if !IS_ENABLED(CONFIG_PREEMPTION) -.set resume_kernel, restore_all -#endif - -ENTRY(handle_exception) - SAVE_ALL /* * Set the scratch register to 0, so that if a recursive exception @@ -291,7 +214,63 @@ resume_userspace: csrw CSR_SCRATCH, tp restore_all: - RESTORE_ALL + REG_L a0, PT_STATUS(sp) + /* + * The current load reservation is effectively part of the processor's + * state, in the sense that load reservations cannot be shared between + * different hart contexts. We can't actually save and restore a load + * reservation, so instead here we clear any existing reservation -- + * it's always legal for implementations to clear load reservations at + * any point (as long as the forward progress guarantee is kept, but + * we'll ignore that here). + * + * Dangling load reservations can be the result of taking a trap in the + * middle of an LR/SC sequence, but can also be the result of a taken + * forward branch around an SC -- which is how we implement CAS. As a + * result we need to clear reservations between the last CAS and the + * jump back to the new context. While it is unlikely the store + * completes, implementations are allowed to expand reservations to be + * arbitrarily large. + */ + REG_L a2, PT_EPC(sp) + REG_SC x0, a2, PT_EPC(sp) + + csrw CSR_STATUS, a0 + csrw CSR_EPC, a2 + + REG_L x1, PT_RA(sp) + REG_L x3, PT_GP(sp) + REG_L x4, PT_TP(sp) + REG_L x5, PT_T0(sp) + REG_L x6, PT_T1(sp) + REG_L x7, PT_T2(sp) + REG_L x8, PT_S0(sp) + REG_L x9, PT_S1(sp) + REG_L x10, PT_A0(sp) + REG_L x11, PT_A1(sp) + REG_L x12, PT_A2(sp) + REG_L x13, PT_A3(sp) + REG_L x14, PT_A4(sp) + REG_L x15, PT_A5(sp) + REG_L x16, PT_A6(sp) + REG_L x17, PT_A7(sp) + REG_L x18, PT_S2(sp) + REG_L x19, PT_S3(sp) + REG_L x20, PT_S4(sp) + REG_L x21, PT_S5(sp) + REG_L x22, PT_S6(sp) + REG_L x23, PT_S7(sp) + REG_L x24, PT_S8(sp) + REG_L x25, PT_S9(sp) + REG_L x26, PT_S10(sp) + REG_L x27, PT_S11(sp) + REG_L x28, PT_T3(sp) + REG_L x29, PT_T4(sp) + REG_L x30, PT_T5(sp) + REG_L x31, PT_T6(sp) + + REG_L x2, PT_SP(sp) + #ifdef CONFIG_RISCV_M_MODE mret #else diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index c40fdcdeb950..ce69b34ff55d 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -8,6 +8,7 @@ #include <linux/ftrace.h> #include <linux/uaccess.h> #include <asm/cacheflush.h> +#include <asm/patch.h> #ifdef CONFIG_DYNAMIC_FTRACE static int ftrace_check_current_call(unsigned long hook_pos, @@ -46,20 +47,14 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target, { unsigned int call[2]; unsigned int nops[2] = {NOP4, NOP4}; - int ret = 0; make_call(hook_pos, target, call); - /* replace the auipc-jalr pair at once */ - ret = probe_kernel_write((void *)hook_pos, enable ? call : nops, - MCOUNT_INSN_SIZE); - /* return must be -EPERM on write error */ - if (ret) + /* Replace the auipc-jalr pair at once. Return -EPERM on write error. */ + if (riscv_patch_text_nosync + ((void *)hook_pos, enable ? call : nops, MCOUNT_INSN_SIZE)) return -EPERM; - smp_mb(); - flush_icache_range((void *)hook_pos, (void *)hook_pos + MCOUNT_INSN_SIZE); - return 0; } diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 85f2073e7fe4..98a406474e7d 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -14,7 +14,7 @@ #include <asm/hwcap.h> #include <asm/image.h> -__INIT +__HEAD ENTRY(_start) /* * Image header expected by Linux boot-loaders. The image header data @@ -45,8 +45,111 @@ ENTRY(_start) .ascii RISCV_IMAGE_MAGIC2 .word 0 -.global _start_kernel -_start_kernel: +.align 2 +#ifdef CONFIG_MMU +relocate: + /* Relocate return address */ + li a1, PAGE_OFFSET + la a2, _start + sub a1, a1, a2 + add ra, ra, a1 + + /* Point stvec to virtual address of intruction after satp write */ + la a2, 1f + add a2, a2, a1 + csrw CSR_TVEC, a2 + + /* Compute satp for kernel page tables, but don't load it yet */ + srl a2, a0, PAGE_SHIFT + li a1, SATP_MODE + or a2, a2, a1 + + /* + * Load trampoline page directory, which will cause us to trap to + * stvec if VA != PA, or simply fall through if VA == PA. We need a + * full fence here because setup_vm() just wrote these PTEs and we need + * to ensure the new translations are in use. + */ + la a0, trampoline_pg_dir + srl a0, a0, PAGE_SHIFT + or a0, a0, a1 + sfence.vma + csrw CSR_SATP, a0 +.align 2 +1: + /* Set trap vector to spin forever to help debug */ + la a0, .Lsecondary_park + csrw CSR_TVEC, a0 + + /* Reload the global pointer */ +.option push +.option norelax + la gp, __global_pointer$ +.option pop + + /* + * Switch to kernel page tables. A full fence is necessary in order to + * avoid using the trampoline translations, which are only correct for + * the first superpage. Fetching the fence is guarnteed to work + * because that first superpage is translated the same way. + */ + csrw CSR_SATP, a2 + sfence.vma + + ret +#endif /* CONFIG_MMU */ +#ifdef CONFIG_SMP + .global secondary_start_sbi +secondary_start_sbi: + /* Mask all interrupts */ + csrw CSR_IE, zero + csrw CSR_IP, zero + + /* Load the global pointer */ + .option push + .option norelax + la gp, __global_pointer$ + .option pop + + /* + * Disable FPU to detect illegal usage of + * floating point in kernel space + */ + li t0, SR_FS + csrc CSR_STATUS, t0 + + /* Set trap vector to spin forever to help debug */ + la a3, .Lsecondary_park + csrw CSR_TVEC, a3 + + slli a3, a0, LGREG + la a4, __cpu_up_stack_pointer + la a5, __cpu_up_task_pointer + add a4, a3, a4 + add a5, a3, a5 + REG_L sp, (a4) + REG_L tp, (a5) + + .global secondary_start_common +secondary_start_common: + +#ifdef CONFIG_MMU + /* Enable virtual memory and relocate to virtual address */ + la a0, swapper_pg_dir + call relocate +#endif + tail smp_callin +#endif /* CONFIG_SMP */ + +.Lsecondary_park: + /* We lack SMP support or have too many harts, so park this hart */ + wfi + j .Lsecondary_park + +END(_start) + + __INIT +ENTRY(_start_kernel) /* Mask all interrupts */ csrw CSR_IE, zero csrw CSR_IP, zero @@ -131,62 +234,10 @@ clear_bss_done: call kasan_early_init #endif /* Start the kernel */ + call soc_early_init call parse_dtb tail start_kernel -#ifdef CONFIG_MMU -relocate: - /* Relocate return address */ - li a1, PAGE_OFFSET - la a2, _start - sub a1, a1, a2 - add ra, ra, a1 - - /* Point stvec to virtual address of intruction after satp write */ - la a2, 1f - add a2, a2, a1 - csrw CSR_TVEC, a2 - - /* Compute satp for kernel page tables, but don't load it yet */ - srl a2, a0, PAGE_SHIFT - li a1, SATP_MODE - or a2, a2, a1 - - /* - * Load trampoline page directory, which will cause us to trap to - * stvec if VA != PA, or simply fall through if VA == PA. We need a - * full fence here because setup_vm() just wrote these PTEs and we need - * to ensure the new translations are in use. - */ - la a0, trampoline_pg_dir - srl a0, a0, PAGE_SHIFT - or a0, a0, a1 - sfence.vma - csrw CSR_SATP, a0 -.align 2 -1: - /* Set trap vector to spin forever to help debug */ - la a0, .Lsecondary_park - csrw CSR_TVEC, a0 - - /* Reload the global pointer */ -.option push -.option norelax - la gp, __global_pointer$ -.option pop - - /* - * Switch to kernel page tables. A full fence is necessary in order to - * avoid using the trampoline translations, which are only correct for - * the first superpage. Fetching the fence is guarnteed to work - * because that first superpage is translated the same way. - */ - csrw CSR_SATP, a2 - sfence.vma - - ret -#endif /* CONFIG_MMU */ - .Lsecondary_start: #ifdef CONFIG_SMP /* Set trap vector to spin forever to help debug */ @@ -211,16 +262,10 @@ relocate: beqz tp, .Lwait_for_cpu_up fence -#ifdef CONFIG_MMU - /* Enable virtual memory and relocate to virtual address */ - la a0, swapper_pg_dir - call relocate + tail secondary_start_common #endif - tail smp_callin -#endif - -END(_start) +END(_start_kernel) #ifdef CONFIG_RISCV_M_MODE ENTRY(reset_regs) @@ -301,13 +346,6 @@ ENTRY(reset_regs) END(reset_regs) #endif /* CONFIG_RISCV_M_MODE */ -.section ".text", "ax",@progbits -.align 2 -.Lsecondary_park: - /* We lack SMP support or have too many harts, so park this hart */ - wfi - j .Lsecondary_park - __PAGE_ALIGNED_BSS /* Empty zero page */ .balign PAGE_SIZE diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c new file mode 100644 index 000000000000..8a4fc65ee022 --- /dev/null +++ b/arch/riscv/kernel/patch.c @@ -0,0 +1,120 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 SiFive + */ + +#include <linux/spinlock.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <linux/stop_machine.h> +#include <asm/kprobes.h> +#include <asm/cacheflush.h> +#include <asm/fixmap.h> + +struct riscv_insn_patch { + void *addr; + u32 insn; + atomic_t cpu_count; +}; + +#ifdef CONFIG_MMU +static DEFINE_RAW_SPINLOCK(patch_lock); + +static void __kprobes *patch_map(void *addr, int fixmap) +{ + uintptr_t uintaddr = (uintptr_t) addr; + struct page *page; + + if (core_kernel_text(uintaddr)) + page = phys_to_page(__pa_symbol(addr)); + else if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) + page = vmalloc_to_page(addr); + else + return addr; + + BUG_ON(!page); + + return (void *)set_fixmap_offset(fixmap, page_to_phys(page) + + (uintaddr & ~PAGE_MASK)); +} + +static void __kprobes patch_unmap(int fixmap) +{ + clear_fixmap(fixmap); +} + +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) +{ + void *waddr = addr; + bool across_pages = (((uintptr_t) addr & ~PAGE_MASK) + len) > PAGE_SIZE; + unsigned long flags = 0; + int ret; + + raw_spin_lock_irqsave(&patch_lock, flags); + + if (across_pages) + patch_map(addr + len, FIX_TEXT_POKE1); + + waddr = patch_map(addr, FIX_TEXT_POKE0); + + ret = probe_kernel_write(waddr, insn, len); + + patch_unmap(FIX_TEXT_POKE0); + + if (across_pages) + patch_unmap(FIX_TEXT_POKE1); + + raw_spin_unlock_irqrestore(&patch_lock, flags); + + return ret; +} +#else +static int __kprobes riscv_insn_write(void *addr, const void *insn, size_t len) +{ + return probe_kernel_write(addr, insn, len); +} +#endif /* CONFIG_MMU */ + +int __kprobes riscv_patch_text_nosync(void *addr, const void *insns, size_t len) +{ + u32 *tp = addr; + int ret; + + ret = riscv_insn_write(tp, insns, len); + + if (!ret) + flush_icache_range((uintptr_t) tp, (uintptr_t) tp + len); + + return ret; +} + +static int __kprobes riscv_patch_text_cb(void *data) +{ + struct riscv_insn_patch *patch = data; + int ret = 0; + + if (atomic_inc_return(&patch->cpu_count) == 1) { + ret = + riscv_patch_text_nosync(patch->addr, &patch->insn, + GET_INSN_LENGTH(patch->insn)); + atomic_inc(&patch->cpu_count); + } else { + while (atomic_read(&patch->cpu_count) <= num_online_cpus()) + cpu_relax(); + smp_mb(); + } + + return ret; +} + +int __kprobes riscv_patch_text(void *addr, u32 insn) +{ + struct riscv_insn_patch patch = { + .addr = addr, + .insn = insn, + .cpu_count = ATOMIC_INIT(0), + }; + + return stop_machine_cpuslocked(riscv_patch_text_cb, + &patch, cpu_online_mask); +} diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 817cf7b0974c..610c11e91606 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -22,6 +22,8 @@ #include <asm/switch_to.h> #include <asm/thread_info.h> +unsigned long gp_in_global __asm__("gp"); + extern asmlinkage void ret_from_fork(void); extern asmlinkage void ret_from_kernel_thread(void); @@ -107,9 +109,8 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp, /* p->thread holds context to be restored by __switch_to() */ if (unlikely(p->flags & PF_KTHREAD)) { /* Kernel thread */ - const register unsigned long gp __asm__ ("gp"); memset(childregs, 0, sizeof(struct pt_regs)); - childregs->gp = gp; + childregs->gp = gp_in_global; /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f6c7c3e82d28..7c24da59bccf 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -1,17 +1,588 @@ // SPDX-License-Identifier: GPL-2.0-only +/* + * SBI initialilization and all extension implementation. + * + * Copyright (c) 2020 Western Digital Corporation or its affiliates. + */ #include <linux/init.h> #include <linux/pm.h> #include <asm/sbi.h> +#include <asm/smp.h> + +/* default SBI version is 0.1 */ +unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT; +EXPORT_SYMBOL(sbi_spec_version); + +static void (*__sbi_set_timer)(uint64_t stime); +static int (*__sbi_send_ipi)(const unsigned long *hart_mask); +static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5); + +struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, + unsigned long arg1, unsigned long arg2, + unsigned long arg3, unsigned long arg4, + unsigned long arg5) +{ + struct sbiret ret; + + register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0); + register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1); + register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2); + register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3); + register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4); + register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5); + register uintptr_t a6 asm ("a6") = (uintptr_t)(fid); + register uintptr_t a7 asm ("a7") = (uintptr_t)(ext); + asm volatile ("ecall" + : "+r" (a0), "+r" (a1) + : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7) + : "memory"); + ret.error = a0; + ret.value = a1; + + return ret; +} +EXPORT_SYMBOL(sbi_ecall); + +int sbi_err_map_linux_errno(int err) +{ + switch (err) { + case SBI_SUCCESS: + return 0; + case SBI_ERR_DENIED: + return -EPERM; + case SBI_ERR_INVALID_PARAM: + return -EINVAL; + case SBI_ERR_INVALID_ADDRESS: + return -EFAULT; + case SBI_ERR_NOT_SUPPORTED: + case SBI_ERR_FAILURE: + default: + return -ENOTSUPP; + }; +} +EXPORT_SYMBOL(sbi_err_map_linux_errno); + +#ifdef CONFIG_RISCV_SBI_V01 +/** + * sbi_console_putchar() - Writes given character to the console device. + * @ch: The data to be written to the console. + * + * Return: None + */ +void sbi_console_putchar(int ch) +{ + sbi_ecall(SBI_EXT_0_1_CONSOLE_PUTCHAR, 0, ch, 0, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_console_putchar); + +/** + * sbi_console_getchar() - Reads a byte from console device. + * + * Returns the value read from console. + */ +int sbi_console_getchar(void) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_0_1_CONSOLE_GETCHAR, 0, 0, 0, 0, 0, 0, 0); + + return ret.error; +} +EXPORT_SYMBOL(sbi_console_getchar); + +/** + * sbi_shutdown() - Remove all the harts from executing supervisor code. + * + * Return: None + */ +void sbi_shutdown(void) +{ + sbi_ecall(SBI_EXT_0_1_SHUTDOWN, 0, 0, 0, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_set_timer); + +/** + * sbi_clear_ipi() - Clear any pending IPIs for the calling hart. + * + * Return: None + */ +void sbi_clear_ipi(void) +{ + sbi_ecall(SBI_EXT_0_1_CLEAR_IPI, 0, 0, 0, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_shutdown); + +/** + * sbi_set_timer_v01() - Program the timer for next timer event. + * @stime_value: The value after which next timer event should fire. + * + * Return: None + */ +static void __sbi_set_timer_v01(uint64_t stime_value) +{ +#if __riscv_xlen == 32 + sbi_ecall(SBI_EXT_0_1_SET_TIMER, 0, stime_value, + stime_value >> 32, 0, 0, 0, 0); +#else + sbi_ecall(SBI_EXT_0_1_SET_TIMER, 0, stime_value, 0, 0, 0, 0, 0); +#endif +} + +static int __sbi_send_ipi_v01(const unsigned long *hart_mask) +{ + sbi_ecall(SBI_EXT_0_1_SEND_IPI, 0, (unsigned long)hart_mask, + 0, 0, 0, 0, 0); + return 0; +} + +static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) +{ + int result = 0; + + /* v0.2 function IDs are equivalent to v0.1 extension IDs */ + switch (fid) { + case SBI_EXT_RFENCE_REMOTE_FENCE_I: + sbi_ecall(SBI_EXT_0_1_REMOTE_FENCE_I, 0, + (unsigned long)hart_mask, 0, 0, 0, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: + sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA, 0, + (unsigned long)hart_mask, start, size, + 0, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: + sbi_ecall(SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID, 0, + (unsigned long)hart_mask, start, size, + arg4, 0, 0); + break; + default: + pr_err("SBI call [%d]not supported in SBI v0.1\n", fid); + result = -EINVAL; + } + + return result; +} +#else +static void __sbi_set_timer_v01(uint64_t stime_value) +{ + pr_warn("Timer extension is not available in SBI v%lu.%lu\n", + sbi_major_version(), sbi_minor_version()); +} + +static int __sbi_send_ipi_v01(const unsigned long *hart_mask) +{ + pr_warn("IPI extension is not available in SBI v%lu.%lu\n", + sbi_major_version(), sbi_minor_version()); + + return 0; +} + +static int __sbi_rfence_v01(int fid, const unsigned long *hart_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) +{ + pr_warn("remote fence extension is not available in SBI v%lu.%lu\n", + sbi_major_version(), sbi_minor_version()); + + return 0; +} +#endif /* CONFIG_RISCV_SBI_V01 */ + +static void __sbi_set_timer_v02(uint64_t stime_value) +{ +#if __riscv_xlen == 32 + sbi_ecall(SBI_EXT_TIME, SBI_EXT_TIME_SET_TIMER, stime_value, + stime_value >> 32, 0, 0, 0, 0); +#else + sbi_ecall(SBI_EXT_TIME, SBI_EXT_TIME_SET_TIMER, stime_value, 0, + 0, 0, 0, 0); +#endif +} + +static int __sbi_send_ipi_v02(const unsigned long *hart_mask) +{ + unsigned long hartid, hmask_val, hbase; + struct cpumask tmask; + struct sbiret ret = {0}; + int result; + + if (!hart_mask || !(*hart_mask)) { + riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask); + hart_mask = cpumask_bits(&tmask); + } + + hmask_val = 0; + hbase = 0; + for_each_set_bit(hartid, hart_mask, NR_CPUS) { + if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) { + ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, + hmask_val, hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + hmask_val = 0; + hbase = 0; + } + if (!hmask_val) + hbase = hartid; + hmask_val |= 1UL << (hartid - hbase); + } + + if (hmask_val) { + ret = sbi_ecall(SBI_EXT_IPI, SBI_EXT_IPI_SEND_IPI, + hmask_val, hbase, 0, 0, 0, 0); + if (ret.error) + goto ecall_failed; + } + + return 0; + +ecall_failed: + result = sbi_err_map_linux_errno(ret.error); + pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", + __func__, hbase, hmask_val, result); + return result; +} + +static int __sbi_rfence_v02_call(unsigned long fid, unsigned long hmask_val, + unsigned long hbase, unsigned long start, + unsigned long size, unsigned long arg4, + unsigned long arg5) +{ + struct sbiret ret = {0}; + int ext = SBI_EXT_RFENCE; + int result = 0; + + switch (fid) { + case SBI_EXT_RFENCE_REMOTE_FENCE_I: + ret = sbi_ecall(ext, fid, hmask_val, hbase, 0, 0, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, arg4, 0); + break; + + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, arg4, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, 0, 0); + break; + case SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID: + ret = sbi_ecall(ext, fid, hmask_val, hbase, start, + size, arg4, 0); + break; + default: + pr_err("unknown function ID [%lu] for SBI extension [%d]\n", + fid, ext); + result = -EINVAL; + } + + if (ret.error) { + result = sbi_err_map_linux_errno(ret.error); + pr_err("%s: hbase = [%lu] hmask = [0x%lx] failed (error [%d])\n", + __func__, hbase, hmask_val, result); + } + + return result; +} + +static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask, + unsigned long start, unsigned long size, + unsigned long arg4, unsigned long arg5) +{ + unsigned long hmask_val, hartid, hbase; + struct cpumask tmask; + int result; + + if (!hart_mask || !(*hart_mask)) { + riscv_cpuid_to_hartid_mask(cpu_online_mask, &tmask); + hart_mask = cpumask_bits(&tmask); + } + + hmask_val = 0; + hbase = 0; + for_each_set_bit(hartid, hart_mask, NR_CPUS) { + if (hmask_val && ((hbase + BITS_PER_LONG) <= hartid)) { + result = __sbi_rfence_v02_call(fid, hmask_val, hbase, + start, size, arg4, arg5); + if (result) + return result; + hmask_val = 0; + hbase = 0; + } + if (!hmask_val) + hbase = hartid; + hmask_val |= 1UL << (hartid - hbase); + } + + if (hmask_val) { + result = __sbi_rfence_v02_call(fid, hmask_val, hbase, + start, size, arg4, arg5); + if (result) + return result; + } + + return 0; +} + +/** + * sbi_set_timer() - Program the timer for next timer event. + * @stime_value: The value after which next timer event should fire. + * + * Return: None + */ +void sbi_set_timer(uint64_t stime_value) +{ + __sbi_set_timer(stime_value); +} + +/** + * sbi_send_ipi() - Send an IPI to any hart. + * @hart_mask: A cpu mask containing all the target harts. + * + * Return: None + */ +void sbi_send_ipi(const unsigned long *hart_mask) +{ + __sbi_send_ipi(hart_mask); +} +EXPORT_SYMBOL(sbi_send_ipi); + +/** + * sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts. + * @hart_mask: A cpu mask containing all the target harts. + * + * Return: None + */ +void sbi_remote_fence_i(const unsigned long *hart_mask) +{ + __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I, + hart_mask, 0, 0, 0, 0); +} +EXPORT_SYMBOL(sbi_remote_fence_i); + +/** + * sbi_remote_sfence_vma() - Execute SFENCE.VMA instructions on given remote + * harts for the specified virtual address range. + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the virtual address + * @size: Total size of the virtual address range. + * + * Return: None + */ +void sbi_remote_sfence_vma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size) +{ + __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA, + hart_mask, start, size, 0, 0); +} +EXPORT_SYMBOL(sbi_remote_sfence_vma); + +/** + * sbi_remote_sfence_vma_asid() - Execute SFENCE.VMA instructions on given + * remote harts for a virtual address range belonging to a specific ASID. + * + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the virtual address + * @size: Total size of the virtual address range. + * @asid: The value of address space identifier (ASID). + * + * Return: None + */ +void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long asid) +{ + __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID, + hart_mask, start, size, asid, 0); +} +EXPORT_SYMBOL(sbi_remote_sfence_vma_asid); + +/** + * sbi_remote_hfence_gvma() - Execute HFENCE.GVMA instructions on given remote + * harts for the specified guest physical address range. + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the guest physical address + * @size: Total size of the guest physical address range. + * + * Return: None + */ +int sbi_remote_hfence_gvma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA, + hart_mask, start, size, 0, 0); +} +EXPORT_SYMBOL_GPL(sbi_remote_hfence_gvma); + +/** + * sbi_remote_hfence_gvma_vmid() - Execute HFENCE.GVMA instructions on given + * remote harts for a guest physical address range belonging to a specific VMID. + * + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the guest physical address + * @size: Total size of the guest physical address range. + * @vmid: The value of guest ID (VMID). + * + * Return: 0 if success, Error otherwise. + */ +int sbi_remote_hfence_gvma_vmid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long vmid) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID, + hart_mask, start, size, vmid, 0); +} +EXPORT_SYMBOL(sbi_remote_hfence_gvma_vmid); + +/** + * sbi_remote_hfence_vvma() - Execute HFENCE.VVMA instructions on given remote + * harts for the current guest virtual address range. + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the current guest virtual address + * @size: Total size of the current guest virtual address range. + * + * Return: None + */ +int sbi_remote_hfence_vvma(const unsigned long *hart_mask, + unsigned long start, + unsigned long size) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA, + hart_mask, start, size, 0, 0); +} +EXPORT_SYMBOL(sbi_remote_hfence_vvma); + +/** + * sbi_remote_hfence_vvma_asid() - Execute HFENCE.VVMA instructions on given + * remote harts for current guest virtual address range belonging to a specific + * ASID. + * + * @hart_mask: A cpu mask containing all the target harts. + * @start: Start of the current guest virtual address + * @size: Total size of the current guest virtual address range. + * @asid: The value of address space identifier (ASID). + * + * Return: None + */ +int sbi_remote_hfence_vvma_asid(const unsigned long *hart_mask, + unsigned long start, + unsigned long size, + unsigned long asid) +{ + return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID, + hart_mask, start, size, asid, 0); +} +EXPORT_SYMBOL(sbi_remote_hfence_vvma_asid); + +/** + * sbi_probe_extension() - Check if an SBI extension ID is supported or not. + * @extid: The extension ID to be probed. + * + * Return: Extension specific nonzero value f yes, -ENOTSUPP otherwise. + */ +int sbi_probe_extension(int extid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid, + 0, 0, 0, 0, 0); + if (!ret.error) + if (ret.value) + return ret.value; + + return -ENOTSUPP; +} +EXPORT_SYMBOL(sbi_probe_extension); + +static long __sbi_base_ecall(int fid) +{ + struct sbiret ret; + + ret = sbi_ecall(SBI_EXT_BASE, fid, 0, 0, 0, 0, 0, 0); + if (!ret.error) + return ret.value; + else + return sbi_err_map_linux_errno(ret.error); +} + +static inline long sbi_get_spec_version(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_SPEC_VERSION); +} + +static inline long sbi_get_firmware_id(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_ID); +} + +static inline long sbi_get_firmware_version(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_VERSION); +} static void sbi_power_off(void) { sbi_shutdown(); } -static int __init sbi_init(void) +int __init sbi_init(void) { + int ret; + pm_power_off = sbi_power_off; + ret = sbi_get_spec_version(); + if (ret > 0) + sbi_spec_version = ret; + + pr_info("SBI specification v%lu.%lu detected\n", + sbi_major_version(), sbi_minor_version()); + + if (!sbi_spec_is_0_1()) { + pr_info("SBI implementation ID=0x%lx Version=0x%lx\n", + sbi_get_firmware_id(), sbi_get_firmware_version()); + if (sbi_probe_extension(SBI_EXT_TIME) > 0) { + __sbi_set_timer = __sbi_set_timer_v02; + pr_info("SBI v0.2 TIME extension detected\n"); + } else { + __sbi_set_timer = __sbi_set_timer_v01; + } + if (sbi_probe_extension(SBI_EXT_IPI) > 0) { + __sbi_send_ipi = __sbi_send_ipi_v02; + pr_info("SBI v0.2 IPI extension detected\n"); + } else { + __sbi_send_ipi = __sbi_send_ipi_v01; + } + if (sbi_probe_extension(SBI_EXT_RFENCE) > 0) { + __sbi_rfence = __sbi_rfence_v02; + pr_info("SBI v0.2 RFENCE extension detected\n"); + } else { + __sbi_rfence = __sbi_rfence_v01; + } + } else { + __sbi_set_timer = __sbi_set_timer_v01; + __sbi_send_ipi = __sbi_send_ipi_v01; + __sbi_rfence = __sbi_rfence_v01; + } + return 0; } -early_initcall(sbi_init); diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 0a6d415b0a5a..145128a7e560 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -16,12 +16,14 @@ #include <linux/of_platform.h> #include <linux/sched/task.h> #include <linux/swiotlb.h> +#include <linux/smp.h> #include <asm/clint.h> +#include <asm/cpu_ops.h> #include <asm/setup.h> #include <asm/sections.h> #include <asm/pgtable.h> -#include <asm/smp.h> +#include <asm/sbi.h> #include <asm/tlbflush.h> #include <asm/thread_info.h> #include <asm/kasan.h> @@ -39,9 +41,14 @@ struct screen_info screen_info = { }; #endif -/* The lucky hart to first increment this variable will boot the other cores */ -atomic_t hart_lottery; +/* + * The lucky hart to first increment this variable will boot the other cores. + * This is used before the kernel initializes the BSS so it can't be in the + * BSS. + */ +atomic_t hart_lottery __section(.sdata); unsigned long boot_cpu_hartid; +static DEFINE_PER_CPU(struct cpu, cpu_devices); void __init parse_dtb(void) { @@ -79,9 +86,28 @@ void __init setup_arch(char **cmdline_p) kasan_init(); #endif +#if IS_ENABLED(CONFIG_RISCV_SBI) + sbi_init(); +#endif + #ifdef CONFIG_SMP setup_smp(); #endif riscv_fill_hwcap(); } + +static int __init topology_init(void) +{ + int i; + + for_each_possible_cpu(i) { + struct cpu *cpu = &per_cpu(cpu_devices, i); + + cpu->hotpluggable = cpu_has_hotplug(i); + register_cpu(cpu, i); + } + + return 0; +} +subsys_initcall(topology_init); diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index eb878abcaaf8..e0a6293093f1 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -96,7 +96,7 @@ static void send_ipi_mask(const struct cpumask *mask, enum ipi_message_type op) if (IS_ENABLED(CONFIG_RISCV_SBI)) sbi_send_ipi(cpumask_bits(&hartid_mask)); else - clint_send_ipi_mask(&hartid_mask); + clint_send_ipi_mask(mask); } static void send_ipi_single(int cpu, enum ipi_message_type op) diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 8bc01f0ca73b..4e9922790f6e 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -25,6 +25,7 @@ #include <linux/sched/task_stack.h> #include <linux/sched/mm.h> #include <asm/clint.h> +#include <asm/cpu_ops.h> #include <asm/irq.h> #include <asm/mmu_context.h> #include <asm/tlbflush.h> @@ -34,8 +35,6 @@ #include "head.h" -void *__cpu_up_stack_pointer[NR_CPUS]; -void *__cpu_up_task_pointer[NR_CPUS]; static DECLARE_COMPLETION(cpu_running); void __init smp_prepare_boot_cpu(void) @@ -46,6 +45,7 @@ void __init smp_prepare_boot_cpu(void) void __init smp_prepare_cpus(unsigned int max_cpus) { int cpuid; + int ret; /* This covers non-smp usecase mandated by "nosmp" option */ if (max_cpus == 0) @@ -54,6 +54,11 @@ void __init smp_prepare_cpus(unsigned int max_cpus) for_each_possible_cpu(cpuid) { if (cpuid == smp_processor_id()) continue; + if (cpu_ops[cpuid]->cpu_prepare) { + ret = cpu_ops[cpuid]->cpu_prepare(cpuid); + if (ret) + continue; + } set_cpu_present(cpuid, true); } } @@ -65,6 +70,8 @@ void __init setup_smp(void) bool found_boot_cpu = false; int cpuid = 1; + cpu_set_ops(0); + for_each_of_cpu_node(dn) { hart = riscv_of_processor_hartid(dn); if (hart < 0) @@ -92,36 +99,38 @@ void __init setup_smp(void) cpuid, nr_cpu_ids); for (cpuid = 1; cpuid < nr_cpu_ids; cpuid++) { - if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) + if (cpuid_to_hartid_map(cpuid) != INVALID_HARTID) { + cpu_set_ops(cpuid); set_cpu_possible(cpuid, true); + } } } +int start_secondary_cpu(int cpu, struct task_struct *tidle) +{ + if (cpu_ops[cpu]->cpu_start) + return cpu_ops[cpu]->cpu_start(cpu, tidle); + + return -EOPNOTSUPP; +} + int __cpu_up(unsigned int cpu, struct task_struct *tidle) { int ret = 0; - int hartid = cpuid_to_hartid_map(cpu); tidle->thread_info.cpu = cpu; - /* - * On RISC-V systems, all harts boot on their own accord. Our _start - * selects the first hart to boot the kernel and causes the remainder - * of the harts to spin in a loop waiting for their stack pointer to be - * setup by that main hart. Writing __cpu_up_stack_pointer signals to - * the spinning harts that they can continue the boot process. - */ - smp_mb(); - WRITE_ONCE(__cpu_up_stack_pointer[hartid], - task_stack_page(tidle) + THREAD_SIZE); - WRITE_ONCE(__cpu_up_task_pointer[hartid], tidle); - - lockdep_assert_held(&cpu_running); - wait_for_completion_timeout(&cpu_running, + ret = start_secondary_cpu(cpu, tidle); + if (!ret) { + lockdep_assert_held(&cpu_running); + wait_for_completion_timeout(&cpu_running, msecs_to_jiffies(1000)); - if (!cpu_online(cpu)) { - pr_crit("CPU%u: failed to come online\n", cpu); - ret = -EIO; + if (!cpu_online(cpu)) { + pr_crit("CPU%u: failed to come online\n", cpu); + ret = -EIO; + } + } else { + pr_crit("CPU%u: failed to start\n", cpu); } return ret; @@ -134,7 +143,7 @@ void __init smp_cpus_done(unsigned int max_cpus) /* * C entry point for a secondary processor. */ -asmlinkage __visible void __init smp_callin(void) +asmlinkage __visible void smp_callin(void) { struct mm_struct *mm = &init_mm; diff --git a/arch/riscv/kernel/soc.c b/arch/riscv/kernel/soc.c new file mode 100644 index 000000000000..0b3b3dc9ad0f --- /dev/null +++ b/arch/riscv/kernel/soc.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#include <linux/init.h> +#include <linux/libfdt.h> +#include <asm/pgtable.h> +#include <asm/soc.h> + +/* + * This is called extremly early, before parse_dtb(), to allow initializing + * SoC hardware before memory or any device driver initialization. + */ +void __init soc_early_init(void) +{ + void (*early_fn)(const void *fdt); + const struct of_device_id *s; + const void *fdt = dtb_early_va; + + for (s = (void *)&__soc_early_init_table_start; + (void *)s < (void *)&__soc_early_init_table_end; s++) { + if (!fdt_node_check_compatible(fdt, 0, s->compatible)) { + early_fn = s->data; + early_fn(fdt); + return; + } + } +} diff --git a/arch/riscv/kernel/stacktrace.c b/arch/riscv/kernel/stacktrace.c index 0940681d2f68..02087fe539c6 100644 --- a/arch/riscv/kernel/stacktrace.c +++ b/arch/riscv/kernel/stacktrace.c @@ -19,6 +19,8 @@ struct stackframe { unsigned long ra; }; +register unsigned long sp_in_global __asm__("sp"); + void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, bool (*fn)(unsigned long, void *), void *arg) { @@ -29,7 +31,7 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - const register unsigned long current_sp __asm__ ("sp"); + const register unsigned long current_sp = sp_in_global; fp = (unsigned long)__builtin_frame_address(0); sp = current_sp; pc = (unsigned long)walk_stackframe; @@ -73,8 +75,7 @@ static void notrace walk_stackframe(struct task_struct *task, sp = user_stack_pointer(regs); pc = instruction_pointer(regs); } else if (task == NULL || task == current) { - const register unsigned long current_sp __asm__ ("sp"); - sp = current_sp; + sp = sp_in_global; pc = (unsigned long)walk_stackframe; } else { /* task blocked in __switch_to */ diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index ffb3d94bf0cc..7f58fa53033f 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -97,12 +97,33 @@ DO_ERROR_INFO(do_trap_insn_fault, SIGSEGV, SEGV_ACCERR, "instruction access fault"); DO_ERROR_INFO(do_trap_insn_illegal, SIGILL, ILL_ILLOPC, "illegal instruction"); -DO_ERROR_INFO(do_trap_load_misaligned, - SIGBUS, BUS_ADRALN, "load address misaligned"); DO_ERROR_INFO(do_trap_load_fault, SIGSEGV, SEGV_ACCERR, "load access fault"); +#ifndef CONFIG_RISCV_M_MODE +DO_ERROR_INFO(do_trap_load_misaligned, + SIGBUS, BUS_ADRALN, "Oops - load address misaligned"); DO_ERROR_INFO(do_trap_store_misaligned, - SIGBUS, BUS_ADRALN, "store (or AMO) address misaligned"); + SIGBUS, BUS_ADRALN, "Oops - store (or AMO) address misaligned"); +#else +int handle_misaligned_load(struct pt_regs *regs); +int handle_misaligned_store(struct pt_regs *regs); + +asmlinkage void do_trap_load_misaligned(struct pt_regs *regs) +{ + if (!handle_misaligned_load(regs)) + return; + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - load address misaligned"); +} + +asmlinkage void do_trap_store_misaligned(struct pt_regs *regs) +{ + if (!handle_misaligned_store(regs)) + return; + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - store (or AMO) address misaligned"); +} +#endif DO_ERROR_INFO(do_trap_store_fault, SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); DO_ERROR_INFO(do_trap_ecall_u, @@ -118,7 +139,8 @@ static inline unsigned long get_break_insn_length(unsigned long pc) if (probe_kernel_address((bug_insn_t *)pc, insn)) return 0; - return (((insn & __INSN_LENGTH_MASK) == __INSN_LENGTH_32) ? 4UL : 2UL); + + return GET_INSN_LENGTH(insn); } asmlinkage __visible void do_trap_break(struct pt_regs *regs) @@ -147,7 +169,7 @@ int is_valid_bugaddr(unsigned long pc) } #endif /* CONFIG_GENERIC_BUG */ -void __init trap_init(void) +void trap_init(void) { /* * Set sup0 scratch register to 0, indicating to exception vector @@ -157,5 +179,5 @@ void __init trap_init(void) /* Set the exception vector address */ csr_write(CSR_TVEC, &handle_exception); /* Enable interrupts */ - csr_write(CSR_IE, IE_SIE | IE_EIE); + csr_write(CSR_IE, IE_SIE); } diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c new file mode 100644 index 000000000000..46c4dafe3ba0 --- /dev/null +++ b/arch/riscv/kernel/traps_misaligned.c @@ -0,0 +1,370 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2020 Western Digital Corporation or its affiliates. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/irq.h> + +#include <asm/processor.h> +#include <asm/ptrace.h> +#include <asm/csr.h> + +#define INSN_MATCH_LB 0x3 +#define INSN_MASK_LB 0x707f +#define INSN_MATCH_LH 0x1003 +#define INSN_MASK_LH 0x707f +#define INSN_MATCH_LW 0x2003 +#define INSN_MASK_LW 0x707f +#define INSN_MATCH_LD 0x3003 +#define INSN_MASK_LD 0x707f +#define INSN_MATCH_LBU 0x4003 +#define INSN_MASK_LBU 0x707f +#define INSN_MATCH_LHU 0x5003 +#define INSN_MASK_LHU 0x707f +#define INSN_MATCH_LWU 0x6003 +#define INSN_MASK_LWU 0x707f +#define INSN_MATCH_SB 0x23 +#define INSN_MASK_SB 0x707f +#define INSN_MATCH_SH 0x1023 +#define INSN_MASK_SH 0x707f +#define INSN_MATCH_SW 0x2023 +#define INSN_MASK_SW 0x707f +#define INSN_MATCH_SD 0x3023 +#define INSN_MASK_SD 0x707f + +#define INSN_MATCH_FLW 0x2007 +#define INSN_MASK_FLW 0x707f +#define INSN_MATCH_FLD 0x3007 +#define INSN_MASK_FLD 0x707f +#define INSN_MATCH_FLQ 0x4007 +#define INSN_MASK_FLQ 0x707f +#define INSN_MATCH_FSW 0x2027 +#define INSN_MASK_FSW 0x707f +#define INSN_MATCH_FSD 0x3027 +#define INSN_MASK_FSD 0x707f +#define INSN_MATCH_FSQ 0x4027 +#define INSN_MASK_FSQ 0x707f + +#define INSN_MATCH_C_LD 0x6000 +#define INSN_MASK_C_LD 0xe003 +#define INSN_MATCH_C_SD 0xe000 +#define INSN_MASK_C_SD 0xe003 +#define INSN_MATCH_C_LW 0x4000 +#define INSN_MASK_C_LW 0xe003 +#define INSN_MATCH_C_SW 0xc000 +#define INSN_MASK_C_SW 0xe003 +#define INSN_MATCH_C_LDSP 0x6002 +#define INSN_MASK_C_LDSP 0xe003 +#define INSN_MATCH_C_SDSP 0xe002 +#define INSN_MASK_C_SDSP 0xe003 +#define INSN_MATCH_C_LWSP 0x4002 +#define INSN_MASK_C_LWSP 0xe003 +#define INSN_MATCH_C_SWSP 0xc002 +#define INSN_MASK_C_SWSP 0xe003 + +#define INSN_MATCH_C_FLD 0x2000 +#define INSN_MASK_C_FLD 0xe003 +#define INSN_MATCH_C_FLW 0x6000 +#define INSN_MASK_C_FLW 0xe003 +#define INSN_MATCH_C_FSD 0xa000 +#define INSN_MASK_C_FSD 0xe003 +#define INSN_MATCH_C_FSW 0xe000 +#define INSN_MASK_C_FSW 0xe003 +#define INSN_MATCH_C_FLDSP 0x2002 +#define INSN_MASK_C_FLDSP 0xe003 +#define INSN_MATCH_C_FSDSP 0xa002 +#define INSN_MASK_C_FSDSP 0xe003 +#define INSN_MATCH_C_FLWSP 0x6002 +#define INSN_MASK_C_FLWSP 0xe003 +#define INSN_MATCH_C_FSWSP 0xe002 +#define INSN_MASK_C_FSWSP 0xe003 + +#define INSN_LEN(insn) ((((insn) & 0x3) < 0x3) ? 2 : 4) + +#if defined(CONFIG_64BIT) +#define LOG_REGBYTES 3 +#define XLEN 64 +#else +#define LOG_REGBYTES 2 +#define XLEN 32 +#endif +#define REGBYTES (1 << LOG_REGBYTES) +#define XLEN_MINUS_16 ((XLEN) - 16) + +#define SH_RD 7 +#define SH_RS1 15 +#define SH_RS2 20 +#define SH_RS2C 2 + +#define RV_X(x, s, n) (((x) >> (s)) & ((1 << (n)) - 1)) +#define RVC_LW_IMM(x) ((RV_X(x, 6, 1) << 2) | \ + (RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 5, 1) << 6)) +#define RVC_LD_IMM(x) ((RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 5, 2) << 6)) +#define RVC_LWSP_IMM(x) ((RV_X(x, 4, 3) << 2) | \ + (RV_X(x, 12, 1) << 5) | \ + (RV_X(x, 2, 2) << 6)) +#define RVC_LDSP_IMM(x) ((RV_X(x, 5, 2) << 3) | \ + (RV_X(x, 12, 1) << 5) | \ + (RV_X(x, 2, 3) << 6)) +#define RVC_SWSP_IMM(x) ((RV_X(x, 9, 4) << 2) | \ + (RV_X(x, 7, 2) << 6)) +#define RVC_SDSP_IMM(x) ((RV_X(x, 10, 3) << 3) | \ + (RV_X(x, 7, 3) << 6)) +#define RVC_RS1S(insn) (8 + RV_X(insn, SH_RD, 3)) +#define RVC_RS2S(insn) (8 + RV_X(insn, SH_RS2C, 3)) +#define RVC_RS2(insn) RV_X(insn, SH_RS2C, 5) + +#define SHIFT_RIGHT(x, y) \ + ((y) < 0 ? ((x) << -(y)) : ((x) >> (y))) + +#define REG_MASK \ + ((1 << (5 + LOG_REGBYTES)) - (1 << LOG_REGBYTES)) + +#define REG_OFFSET(insn, pos) \ + (SHIFT_RIGHT((insn), (pos) - LOG_REGBYTES) & REG_MASK) + +#define REG_PTR(insn, pos, regs) \ + (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos)) + +#define GET_RM(insn) (((insn) >> 12) & 7) + +#define GET_RS1(insn, regs) (*REG_PTR(insn, SH_RS1, regs)) +#define GET_RS2(insn, regs) (*REG_PTR(insn, SH_RS2, regs)) +#define GET_RS1S(insn, regs) (*REG_PTR(RVC_RS1S(insn), 0, regs)) +#define GET_RS2S(insn, regs) (*REG_PTR(RVC_RS2S(insn), 0, regs)) +#define GET_RS2C(insn, regs) (*REG_PTR(insn, SH_RS2C, regs)) +#define GET_SP(regs) (*REG_PTR(2, 0, regs)) +#define SET_RD(insn, regs, val) (*REG_PTR(insn, SH_RD, regs) = (val)) +#define IMM_I(insn) ((s32)(insn) >> 20) +#define IMM_S(insn) (((s32)(insn) >> 25 << 5) | \ + (s32)(((insn) >> 7) & 0x1f)) +#define MASK_FUNCT3 0x7000 + +#define GET_PRECISION(insn) (((insn) >> 25) & 3) +#define GET_RM(insn) (((insn) >> 12) & 7) +#define PRECISION_S 0 +#define PRECISION_D 1 + +#define STR(x) XSTR(x) +#define XSTR(x) #x + +#define DECLARE_UNPRIVILEGED_LOAD_FUNCTION(type, insn) \ +static inline type load_##type(const type *addr) \ +{ \ + type val; \ + asm (#insn " %0, %1" \ + : "=&r" (val) : "m" (*addr)); \ + return val; \ +} + +#define DECLARE_UNPRIVILEGED_STORE_FUNCTION(type, insn) \ +static inline void store_##type(type *addr, type val) \ +{ \ + asm volatile (#insn " %0, %1\n" \ + : : "r" (val), "m" (*addr)); \ +} + +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u8, lbu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u16, lhu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s8, lb) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s16, lh) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(s32, lw) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u8, sb) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u16, sh) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u32, sw) +#if defined(CONFIG_64BIT) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lwu) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u64, ld) +DECLARE_UNPRIVILEGED_STORE_FUNCTION(u64, sd) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, ld) +#else +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(u32, lw) +DECLARE_UNPRIVILEGED_LOAD_FUNCTION(ulong, lw) + +static inline u64 load_u64(const u64 *addr) +{ + return load_u32((u32 *)addr) + + ((u64)load_u32((u32 *)addr + 1) << 32); +} + +static inline void store_u64(u64 *addr, u64 val) +{ + store_u32((u32 *)addr, val); + store_u32((u32 *)addr + 1, val >> 32); +} +#endif + +static inline ulong get_insn(ulong mepc) +{ + register ulong __mepc asm ("a2") = mepc; + ulong val, rvc_mask = 3, tmp; + + asm ("and %[tmp], %[addr], 2\n" + "bnez %[tmp], 1f\n" +#if defined(CONFIG_64BIT) + STR(LWU) " %[insn], (%[addr])\n" +#else + STR(LW) " %[insn], (%[addr])\n" +#endif + "and %[tmp], %[insn], %[rvc_mask]\n" + "beq %[tmp], %[rvc_mask], 2f\n" + "sll %[insn], %[insn], %[xlen_minus_16]\n" + "srl %[insn], %[insn], %[xlen_minus_16]\n" + "j 2f\n" + "1:\n" + "lhu %[insn], (%[addr])\n" + "and %[tmp], %[insn], %[rvc_mask]\n" + "bne %[tmp], %[rvc_mask], 2f\n" + "lhu %[tmp], 2(%[addr])\n" + "sll %[tmp], %[tmp], 16\n" + "add %[insn], %[insn], %[tmp]\n" + "2:" + : [insn] "=&r" (val), [tmp] "=&r" (tmp) + : [addr] "r" (__mepc), [rvc_mask] "r" (rvc_mask), + [xlen_minus_16] "i" (XLEN_MINUS_16)); + + return val; +} + +union reg_data { + u8 data_bytes[8]; + ulong data_ulong; + u64 data_u64; +}; + +int handle_misaligned_load(struct pt_regs *regs) +{ + union reg_data val; + unsigned long epc = regs->epc; + unsigned long insn = get_insn(epc); + unsigned long addr = csr_read(mtval); + int i, fp = 0, shift = 0, len = 0; + + regs->epc = 0; + + if ((insn & INSN_MASK_LW) == INSN_MATCH_LW) { + len = 4; + shift = 8 * (sizeof(unsigned long) - len); +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_LD) == INSN_MATCH_LD) { + len = 8; + shift = 8 * (sizeof(unsigned long) - len); + } else if ((insn & INSN_MASK_LWU) == INSN_MATCH_LWU) { + len = 4; +#endif + } else if ((insn & INSN_MASK_FLD) == INSN_MATCH_FLD) { + fp = 1; + len = 8; + } else if ((insn & INSN_MASK_FLW) == INSN_MATCH_FLW) { + fp = 1; + len = 4; + } else if ((insn & INSN_MASK_LH) == INSN_MATCH_LH) { + len = 2; + shift = 8 * (sizeof(unsigned long) - len); + } else if ((insn & INSN_MASK_LHU) == INSN_MATCH_LHU) { + len = 2; +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_C_LD) == INSN_MATCH_C_LD) { + len = 8; + shift = 8 * (sizeof(unsigned long) - len); + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LDSP) == INSN_MATCH_C_LDSP && + ((insn >> SH_RD) & 0x1f)) { + len = 8; + shift = 8 * (sizeof(unsigned long) - len); +#endif + } else if ((insn & INSN_MASK_C_LW) == INSN_MATCH_C_LW) { + len = 4; + shift = 8 * (sizeof(unsigned long) - len); + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_LWSP) == INSN_MATCH_C_LWSP && + ((insn >> SH_RD) & 0x1f)) { + len = 4; + shift = 8 * (sizeof(unsigned long) - len); + } else if ((insn & INSN_MASK_C_FLD) == INSN_MATCH_C_FLD) { + fp = 1; + len = 8; + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_FLDSP) == INSN_MATCH_C_FLDSP) { + fp = 1; + len = 8; +#if defined(CONFIG_32BIT) + } else if ((insn & INSN_MASK_C_FLW) == INSN_MATCH_C_FLW) { + fp = 1; + len = 4; + insn = RVC_RS2S(insn) << SH_RD; + } else if ((insn & INSN_MASK_C_FLWSP) == INSN_MATCH_C_FLWSP) { + fp = 1; + len = 4; +#endif + } else { + regs->epc = epc; + return -1; + } + + val.data_u64 = 0; + for (i = 0; i < len; i++) + val.data_bytes[i] = load_u8((void *)(addr + i)); + + if (fp) + return -1; + SET_RD(insn, regs, val.data_ulong << shift >> shift); + + regs->epc = epc + INSN_LEN(insn); + + return 0; +} + +int handle_misaligned_store(struct pt_regs *regs) +{ + union reg_data val; + unsigned long epc = regs->epc; + unsigned long insn = get_insn(epc); + unsigned long addr = csr_read(mtval); + int i, len = 0; + + regs->epc = 0; + + val.data_ulong = GET_RS2(insn, regs); + + if ((insn & INSN_MASK_SW) == INSN_MATCH_SW) { + len = 4; +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_SD) == INSN_MATCH_SD) { + len = 8; +#endif + } else if ((insn & INSN_MASK_SH) == INSN_MATCH_SH) { + len = 2; +#if defined(CONFIG_64BIT) + } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) { + len = 8; + val.data_ulong = GET_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP && + ((insn >> SH_RD) & 0x1f)) { + len = 8; + val.data_ulong = GET_RS2C(insn, regs); +#endif + } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) { + len = 4; + val.data_ulong = GET_RS2S(insn, regs); + } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP && + ((insn >> SH_RD) & 0x1f)) { + len = 4; + val.data_ulong = GET_RS2C(insn, regs); + } else { + regs->epc = epc; + return -1; + } + + for (i = 0; i < len; i++) + store_u8((void *)(addr + i), val.data_bytes[i]); + + regs->epc = epc + INSN_LEN(insn); + + return 0; +} diff --git a/arch/riscv/kernel/vdso/.gitignore b/arch/riscv/kernel/vdso/.gitignore index 97c2d69d0289..11ebee9e4c1d 100644 --- a/arch/riscv/kernel/vdso/.gitignore +++ b/arch/riscv/kernel/vdso/.gitignore @@ -1,2 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only vdso.lds *.tmp diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 1e0193ded420..0339b6bbe11a 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -9,7 +9,9 @@ #include <asm/page.h> #include <asm/cache.h> #include <asm/thread_info.h> +#include <asm/set_memory.h> +#include <linux/sizes.h> OUTPUT_ARCH(riscv) ENTRY(_start) @@ -20,10 +22,18 @@ SECTIONS /* Beginning of code and text segment */ . = LOAD_OFFSET; _start = .; - __init_begin = .; HEAD_TEXT_SECTION + . = ALIGN(PAGE_SIZE); + + __init_begin = .; INIT_TEXT_SECTION(PAGE_SIZE) INIT_DATA_SECTION(16) + . = ALIGN(8); + __soc_early_init_table : { + __soc_early_init_table_start = .; + KEEP(*(__soc_early_init_table)) + __soc_early_init_table_end = .; + } /* we have to discard exit text and such at runtime, not link time */ .exit.text : { @@ -36,6 +46,7 @@ SECTIONS PERCPU_SECTION(L1_CACHE_BYTES) __init_end = .; + . = ALIGN(SECTION_ALIGN); .text : { _text = .; _stext = .; @@ -53,24 +64,26 @@ SECTIONS /* Start of data section */ _sdata = .; - RO_DATA(L1_CACHE_BYTES) + RO_DATA(SECTION_ALIGN) .srodata : { *(.srodata*) } + EXCEPTION_TABLE(0x10) + + . = ALIGN(SECTION_ALIGN); + _data = .; + RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) .sdata : { __global_pointer$ = . + 0x800; *(.sdata*) /* End of data section */ _edata = .; - *(.sbss*) } BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) - EXCEPTION_TABLE(0x10) - .rel.dyn : { *(.rel.dyn*) } diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 47e7a8204460..0d0db80800c4 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -2,5 +2,5 @@ lib-y += delay.o lib-y += memcpy.o lib-y += memset.o -lib-$(CONFIG_MMU) += uaccess.o +lib-y += uaccess.o lib-$(CONFIG_64BIT) += tishift.o diff --git a/arch/riscv/lib/uaccess.S b/arch/riscv/lib/uaccess.S index f29d2ba2c0a6..fceaeb18cc64 100644 --- a/arch/riscv/lib/uaccess.S +++ b/arch/riscv/lib/uaccess.S @@ -3,14 +3,12 @@ #include <asm/asm.h> #include <asm/csr.h> - .altmacro .macro fixup op reg addr lbl - LOCAL _epc -_epc: +100: \op \reg, \addr .section __ex_table,"a" .balign RISCV_SZPTR - RISCV_PTR _epc, \lbl + RISCV_PTR 100b, \lbl .previous .endm diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index 50b7af58c566..363ef01c30b1 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -7,7 +7,7 @@ endif obj-y += init.o obj-y += extable.o -obj-$(CONFIG_MMU) += fault.o +obj-$(CONFIG_MMU) += fault.o pageattr.o obj-y += cacheflush.o obj-y += context.o @@ -15,6 +15,7 @@ ifeq ($(CONFIG_MMU),y) obj-$(CONFIG_SMP) += tlbflush.o endif obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_PTDUMP_CORE) += ptdump.o obj-$(CONFIG_KASAN) += kasan_init.o ifdef CONFIG_KASAN diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index cf7248e07f43..be84e32adc4c 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -30,7 +30,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs) struct vm_area_struct *vma; struct mm_struct *mm; unsigned long addr, cause; - unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + unsigned int flags = FAULT_FLAG_DEFAULT; int code = SEGV_MAPERR; vm_fault_t fault; @@ -117,7 +117,7 @@ good_area: * signal first. We do not need to release the mmap_sem because it * would already be released in __lock_page_or_retry in mm/filemap.c. */ - if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(tsk)) + if (fault_signal_pending(fault, regs)) return; if (unlikely(fault & VM_FAULT_ERROR)) { @@ -144,11 +144,6 @@ good_area: 1, regs, addr); } if (fault & VM_FAULT_RETRY) { - /* - * Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk - * of starvation. - */ - flags &= ~(FAULT_FLAG_ALLOW_RETRY); flags |= FAULT_FLAG_TRIED; /* diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c index 0d4747e9d5b5..a6189ed36c5f 100644 --- a/arch/riscv/mm/hugetlbpage.c +++ b/arch/riscv/mm/hugetlbpage.c @@ -4,14 +4,12 @@ int pud_huge(pud_t pud) { - return pud_present(pud) && - (pud_val(pud) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); + return pud_leaf(pud); } int pmd_huge(pmd_t pmd) { - return pmd_present(pmd) && - (pmd_val(pmd) & (_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)); + return pmd_leaf(pmd); } static __init int setup_hugepagesz(char *opt) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index fab855963c73..b55be44ff9bd 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -12,6 +12,7 @@ #include <linux/sizes.h> #include <linux/of_fdt.h> #include <linux/libfdt.h> +#include <linux/set_memory.h> #include <asm/fixmap.h> #include <asm/tlbflush.h> @@ -477,6 +478,17 @@ static void __init setup_vm_final(void) csr_write(CSR_SATP, PFN_DOWN(__pa_symbol(swapper_pg_dir)) | SATP_MODE); local_flush_tlb_all(); } + +void free_initmem(void) +{ + unsigned long init_begin = (unsigned long)__init_begin; + unsigned long init_end = (unsigned long)__init_end; + + /* Make the region as non-execuatble. */ + set_memory_nx(init_begin, (init_end - init_begin) >> PAGE_SHIFT); + free_initmem_default(POISON_FREE_INITMEM); +} + #else asmlinkage void __init setup_vm(uintptr_t dtb_pa) { @@ -488,6 +500,38 @@ static inline void setup_vm_final(void) } #endif /* CONFIG_MMU */ +#ifdef CONFIG_STRICT_KERNEL_RWX +void set_kernel_text_rw(void) +{ + unsigned long text_start = (unsigned long)_text; + unsigned long text_end = (unsigned long)_etext; + + set_memory_rw(text_start, (text_end - text_start) >> PAGE_SHIFT); +} + +void set_kernel_text_ro(void) +{ + unsigned long text_start = (unsigned long)_text; + unsigned long text_end = (unsigned long)_etext; + + set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT); +} + +void mark_rodata_ro(void) +{ + unsigned long text_start = (unsigned long)_text; + unsigned long text_end = (unsigned long)_etext; + unsigned long rodata_start = (unsigned long)__start_rodata; + unsigned long data_start = (unsigned long)_data; + unsigned long max_low = (unsigned long)(__va(PFN_PHYS(max_low_pfn))); + + set_memory_ro(text_start, (text_end - text_start) >> PAGE_SHIFT); + set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); + set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); + set_memory_nx(data_start, (max_low - data_start) >> PAGE_SHIFT); +} +#endif + void __init paging_init(void) { setup_vm_final(); diff --git a/arch/riscv/mm/pageattr.c b/arch/riscv/mm/pageattr.c new file mode 100644 index 000000000000..728759eb530a --- /dev/null +++ b/arch/riscv/mm/pageattr.c @@ -0,0 +1,187 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2019 SiFive + */ + +#include <linux/pagewalk.h> +#include <asm/pgtable.h> +#include <asm/tlbflush.h> +#include <asm/bitops.h> + +struct pageattr_masks { + pgprot_t set_mask; + pgprot_t clear_mask; +}; + +static unsigned long set_pageattr_masks(unsigned long val, struct mm_walk *walk) +{ + struct pageattr_masks *masks = walk->private; + unsigned long new_val = val; + + new_val &= ~(pgprot_val(masks->clear_mask)); + new_val |= (pgprot_val(masks->set_mask)); + + return new_val; +} + +static int pageattr_pgd_entry(pgd_t *pgd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pgd_t val = READ_ONCE(*pgd); + + if (pgd_leaf(val)) { + val = __pgd(set_pageattr_masks(pgd_val(val), walk)); + set_pgd(pgd, val); + } + + return 0; +} + +static int pageattr_p4d_entry(p4d_t *p4d, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + p4d_t val = READ_ONCE(*p4d); + + if (p4d_leaf(val)) { + val = __p4d(set_pageattr_masks(p4d_val(val), walk)); + set_p4d(p4d, val); + } + + return 0; +} + +static int pageattr_pud_entry(pud_t *pud, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pud_t val = READ_ONCE(*pud); + + if (pud_leaf(val)) { + val = __pud(set_pageattr_masks(pud_val(val), walk)); + set_pud(pud, val); + } + + return 0; +} + +static int pageattr_pmd_entry(pmd_t *pmd, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pmd_t val = READ_ONCE(*pmd); + + if (pmd_leaf(val)) { + val = __pmd(set_pageattr_masks(pmd_val(val), walk)); + set_pmd(pmd, val); + } + + return 0; +} + +static int pageattr_pte_entry(pte_t *pte, unsigned long addr, + unsigned long next, struct mm_walk *walk) +{ + pte_t val = READ_ONCE(*pte); + + val = __pte(set_pageattr_masks(pte_val(val), walk)); + set_pte(pte, val); + + return 0; +} + +static int pageattr_pte_hole(unsigned long addr, unsigned long next, + int depth, struct mm_walk *walk) +{ + /* Nothing to do here */ + return 0; +} + +const static struct mm_walk_ops pageattr_ops = { + .pgd_entry = pageattr_pgd_entry, + .p4d_entry = pageattr_p4d_entry, + .pud_entry = pageattr_pud_entry, + .pmd_entry = pageattr_pmd_entry, + .pte_entry = pageattr_pte_entry, + .pte_hole = pageattr_pte_hole, +}; + +static int __set_memory(unsigned long addr, int numpages, pgprot_t set_mask, + pgprot_t clear_mask) +{ + int ret; + unsigned long start = addr; + unsigned long end = start + PAGE_SIZE * numpages; + struct pageattr_masks masks = { + .set_mask = set_mask, + .clear_mask = clear_mask + }; + + if (!numpages) + return 0; + + down_read(&init_mm.mmap_sem); + ret = walk_page_range_novma(&init_mm, start, end, &pageattr_ops, NULL, + &masks); + up_read(&init_mm.mmap_sem); + + flush_tlb_kernel_range(start, end); + + return ret; +} + +int set_memory_ro(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, __pgprot(_PAGE_READ), + __pgprot(_PAGE_WRITE)); +} + +int set_memory_rw(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, __pgprot(_PAGE_READ | _PAGE_WRITE), + __pgprot(0)); +} + +int set_memory_x(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, __pgprot(_PAGE_EXEC), __pgprot(0)); +} + +int set_memory_nx(unsigned long addr, int numpages) +{ + return __set_memory(addr, numpages, __pgprot(0), __pgprot(_PAGE_EXEC)); +} + +int set_direct_map_invalid_noflush(struct page *page) +{ + unsigned long start = (unsigned long)page_address(page); + unsigned long end = start + PAGE_SIZE; + struct pageattr_masks masks = { + .set_mask = __pgprot(0), + .clear_mask = __pgprot(_PAGE_PRESENT) + }; + + return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); +} + +int set_direct_map_default_noflush(struct page *page) +{ + unsigned long start = (unsigned long)page_address(page); + unsigned long end = start + PAGE_SIZE; + struct pageattr_masks masks = { + .set_mask = PAGE_KERNEL, + .clear_mask = __pgprot(0) + }; + + return walk_page_range(&init_mm, start, end, &pageattr_ops, &masks); +} + +void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + if (!debug_pagealloc_enabled()) + return; + + if (enable) + __set_memory((unsigned long)page_address(page), numpages, + __pgprot(_PAGE_PRESENT), __pgprot(0)); + else + __set_memory((unsigned long)page_address(page), numpages, + __pgprot(0), __pgprot(_PAGE_PRESENT)); +} diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c new file mode 100644 index 000000000000..7eab76a93106 --- /dev/null +++ b/arch/riscv/mm/ptdump.c @@ -0,0 +1,317 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2019 SiFive + */ + +#include <linux/init.h> +#include <linux/debugfs.h> +#include <linux/seq_file.h> +#include <linux/ptdump.h> + +#include <asm/ptdump.h> +#include <asm/pgtable.h> +#include <asm/kasan.h> + +#define pt_dump_seq_printf(m, fmt, args...) \ +({ \ + if (m) \ + seq_printf(m, fmt, ##args); \ +}) + +#define pt_dump_seq_puts(m, fmt) \ +({ \ + if (m) \ + seq_printf(m, fmt); \ +}) + +/* + * The page dumper groups page table entries of the same type into a single + * description. It uses pg_state to track the range information while + * iterating over the pte entries. When the continuity is broken it then + * dumps out a description of the range. + */ +struct pg_state { + struct ptdump_state ptdump; + struct seq_file *seq; + const struct addr_marker *marker; + unsigned long start_address; + unsigned long start_pa; + unsigned long last_pa; + int level; + u64 current_prot; + bool check_wx; + unsigned long wx_pages; +}; + +/* Address marker */ +struct addr_marker { + unsigned long start_address; + const char *name; +}; + +static struct addr_marker address_markers[] = { +#ifdef CONFIG_KASAN + {KASAN_SHADOW_START, "Kasan shadow start"}, + {KASAN_SHADOW_END, "Kasan shadow end"}, +#endif + {FIXADDR_START, "Fixmap start"}, + {FIXADDR_TOP, "Fixmap end"}, + {PCI_IO_START, "PCI I/O start"}, + {PCI_IO_END, "PCI I/O end"}, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + {VMEMMAP_START, "vmemmap start"}, + {VMEMMAP_END, "vmemmap end"}, +#endif + {VMALLOC_START, "vmalloc() area"}, + {VMALLOC_END, "vmalloc() end"}, + {PAGE_OFFSET, "Linear mapping"}, + {-1, NULL}, +}; + +/* Page Table Entry */ +struct prot_bits { + u64 mask; + u64 val; + const char *set; + const char *clear; +}; + +static const struct prot_bits pte_bits[] = { + { + .mask = _PAGE_SOFT, + .val = _PAGE_SOFT, + .set = "RSW", + .clear = " ", + }, { + .mask = _PAGE_DIRTY, + .val = _PAGE_DIRTY, + .set = "D", + .clear = ".", + }, { + .mask = _PAGE_ACCESSED, + .val = _PAGE_ACCESSED, + .set = "A", + .clear = ".", + }, { + .mask = _PAGE_GLOBAL, + .val = _PAGE_GLOBAL, + .set = "G", + .clear = ".", + }, { + .mask = _PAGE_USER, + .val = _PAGE_USER, + .set = "U", + .clear = ".", + }, { + .mask = _PAGE_EXEC, + .val = _PAGE_EXEC, + .set = "X", + .clear = ".", + }, { + .mask = _PAGE_WRITE, + .val = _PAGE_WRITE, + .set = "W", + .clear = ".", + }, { + .mask = _PAGE_READ, + .val = _PAGE_READ, + .set = "R", + .clear = ".", + }, { + .mask = _PAGE_PRESENT, + .val = _PAGE_PRESENT, + .set = "V", + .clear = ".", + } +}; + +/* Page Level */ +struct pg_level { + const char *name; + u64 mask; +}; + +static struct pg_level pg_level[] = { + { /* pgd */ + .name = "PGD", + }, { /* p4d */ + .name = (CONFIG_PGTABLE_LEVELS > 4) ? "P4D" : "PGD", + }, { /* pud */ + .name = (CONFIG_PGTABLE_LEVELS > 3) ? "PUD" : "PGD", + }, { /* pmd */ + .name = (CONFIG_PGTABLE_LEVELS > 2) ? "PMD" : "PGD", + }, { /* pte */ + .name = "PTE", + }, +}; + +static void dump_prot(struct pg_state *st) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(pte_bits); i++) { + const char *s; + + if ((st->current_prot & pte_bits[i].mask) == pte_bits[i].val) + s = pte_bits[i].set; + else + s = pte_bits[i].clear; + + if (s) + pt_dump_seq_printf(st->seq, " %s", s); + } +} + +#ifdef CONFIG_64BIT +#define ADDR_FORMAT "0x%016lx" +#else +#define ADDR_FORMAT "0x%08lx" +#endif +static void dump_addr(struct pg_state *st, unsigned long addr) +{ + static const char units[] = "KMGTPE"; + const char *unit = units; + unsigned long delta; + + pt_dump_seq_printf(st->seq, ADDR_FORMAT "-" ADDR_FORMAT " ", + st->start_address, addr); + + pt_dump_seq_printf(st->seq, " " ADDR_FORMAT " ", st->start_pa); + delta = (addr - st->start_address) >> 10; + + while (!(delta & 1023) && unit[1]) { + delta >>= 10; + unit++; + } + + pt_dump_seq_printf(st->seq, "%9lu%c %s", delta, *unit, + pg_level[st->level].name); +} + +static void note_prot_wx(struct pg_state *st, unsigned long addr) +{ + if (!st->check_wx) + return; + + if ((st->current_prot & (_PAGE_WRITE | _PAGE_EXEC)) != + (_PAGE_WRITE | _PAGE_EXEC)) + return; + + WARN_ONCE(1, "riscv/mm: Found insecure W+X mapping at address %p/%pS\n", + (void *)st->start_address, (void *)st->start_address); + + st->wx_pages += (addr - st->start_address) / PAGE_SIZE; +} + +static void note_page(struct ptdump_state *pt_st, unsigned long addr, + int level, unsigned long val) +{ + struct pg_state *st = container_of(pt_st, struct pg_state, ptdump); + u64 pa = PFN_PHYS(pte_pfn(__pte(val))); + u64 prot = 0; + + if (level >= 0) + prot = val & pg_level[level].mask; + + if (st->level == -1) { + st->level = level; + st->current_prot = prot; + st->start_address = addr; + st->start_pa = pa; + st->last_pa = pa; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name); + } else if (prot != st->current_prot || + level != st->level || addr >= st->marker[1].start_address) { + if (st->current_prot) { + note_prot_wx(st, addr); + dump_addr(st, addr); + dump_prot(st); + pt_dump_seq_puts(st->seq, "\n"); + } + + while (addr >= st->marker[1].start_address) { + st->marker++; + pt_dump_seq_printf(st->seq, "---[ %s ]---\n", + st->marker->name); + } + + st->start_address = addr; + st->start_pa = pa; + st->last_pa = pa; + st->current_prot = prot; + st->level = level; + } else { + st->last_pa = pa; + } +} + +static void ptdump_walk(struct seq_file *s) +{ + struct pg_state st = { + .seq = s, + .marker = address_markers, + .level = -1, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {KERN_VIRT_START, ULONG_MAX}, + {0, 0} + } + } + }; + + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); +} + +void ptdump_check_wx(void) +{ + struct pg_state st = { + .seq = NULL, + .marker = (struct addr_marker[]) { + {0, NULL}, + {-1, NULL}, + }, + .level = -1, + .check_wx = true, + .ptdump = { + .note_page = note_page, + .range = (struct ptdump_range[]) { + {KERN_VIRT_START, ULONG_MAX}, + {0, 0} + } + } + }; + + ptdump_walk_pgd(&st.ptdump, &init_mm, NULL); + + if (st.wx_pages) + pr_warn("Checked W+X mappings: failed, %lu W+X pages found\n", + st.wx_pages); + else + pr_info("Checked W+X mappings: passed, no W+X pages found\n"); +} + +static int ptdump_show(struct seq_file *m, void *v) +{ + ptdump_walk(m); + + return 0; +} + +DEFINE_SHOW_ATTRIBUTE(ptdump); + +static int ptdump_init(void) +{ + unsigned int i, j; + + for (i = 0; i < ARRAY_SIZE(pg_level); i++) + for (j = 0; j < ARRAY_SIZE(pte_bits); j++) + pg_level[i].mask |= pte_bits[j].mask; + + debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, + &ptdump_fops); + + return 0; +} + +device_initcall(ptdump_init); diff --git a/arch/riscv/net/Makefile b/arch/riscv/net/Makefile index ec5b14763316..9a1e5f0a94e5 100644 --- a/arch/riscv/net/Makefile +++ b/arch/riscv/net/Makefile @@ -1,2 +1,9 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_BPF_JIT) += bpf_jit_comp.o + +obj-$(CONFIG_BPF_JIT) += bpf_jit_core.o + +ifeq ($(CONFIG_ARCH_RV64I),y) + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o +else + obj-$(CONFIG_BPF_JIT) += bpf_jit_comp32.o +endif diff --git a/arch/riscv/net/bpf_jit.h b/arch/riscv/net/bpf_jit.h new file mode 100644 index 000000000000..20e235d06f66 --- /dev/null +++ b/arch/riscv/net/bpf_jit.h @@ -0,0 +1,514 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Common functionality for RV32 and RV64 BPF JIT compilers + * + * Copyright (c) 2019 Björn Töpel <bjorn.topel@gmail.com> + * + */ + +#ifndef _BPF_JIT_H +#define _BPF_JIT_H + +#include <linux/bpf.h> +#include <linux/filter.h> +#include <asm/cacheflush.h> + +enum { + RV_REG_ZERO = 0, /* The constant value 0 */ + RV_REG_RA = 1, /* Return address */ + RV_REG_SP = 2, /* Stack pointer */ + RV_REG_GP = 3, /* Global pointer */ + RV_REG_TP = 4, /* Thread pointer */ + RV_REG_T0 = 5, /* Temporaries */ + RV_REG_T1 = 6, + RV_REG_T2 = 7, + RV_REG_FP = 8, /* Saved register/frame pointer */ + RV_REG_S1 = 9, /* Saved register */ + RV_REG_A0 = 10, /* Function argument/return values */ + RV_REG_A1 = 11, /* Function arguments */ + RV_REG_A2 = 12, + RV_REG_A3 = 13, + RV_REG_A4 = 14, + RV_REG_A5 = 15, + RV_REG_A6 = 16, + RV_REG_A7 = 17, + RV_REG_S2 = 18, /* Saved registers */ + RV_REG_S3 = 19, + RV_REG_S4 = 20, + RV_REG_S5 = 21, + RV_REG_S6 = 22, + RV_REG_S7 = 23, + RV_REG_S8 = 24, + RV_REG_S9 = 25, + RV_REG_S10 = 26, + RV_REG_S11 = 27, + RV_REG_T3 = 28, /* Temporaries */ + RV_REG_T4 = 29, + RV_REG_T5 = 30, + RV_REG_T6 = 31, +}; + +struct rv_jit_context { + struct bpf_prog *prog; + u32 *insns; /* RV insns */ + int ninsns; + int epilogue_offset; + int *offset; /* BPF to RV */ + unsigned long flags; + int stack_size; +}; + +struct rv_jit_data { + struct bpf_binary_header *header; + u8 *image; + struct rv_jit_context ctx; +}; + +static inline void bpf_fill_ill_insns(void *area, unsigned int size) +{ + memset(area, 0, size); +} + +static inline void bpf_flush_icache(void *start, void *end) +{ + flush_icache_range((unsigned long)start, (unsigned long)end); +} + +static inline void emit(const u32 insn, struct rv_jit_context *ctx) +{ + if (ctx->insns) + ctx->insns[ctx->ninsns] = insn; + + ctx->ninsns++; +} + +static inline int epilogue_offset(struct rv_jit_context *ctx) +{ + int to = ctx->epilogue_offset, from = ctx->ninsns; + + return (to - from) << 2; +} + +/* Return -1 or inverted cond. */ +static inline int invert_bpf_cond(u8 cond) +{ + switch (cond) { + case BPF_JEQ: + return BPF_JNE; + case BPF_JGT: + return BPF_JLE; + case BPF_JLT: + return BPF_JGE; + case BPF_JGE: + return BPF_JLT; + case BPF_JLE: + return BPF_JGT; + case BPF_JNE: + return BPF_JEQ; + case BPF_JSGT: + return BPF_JSLE; + case BPF_JSLT: + return BPF_JSGE; + case BPF_JSGE: + return BPF_JSLT; + case BPF_JSLE: + return BPF_JSGT; + } + return -1; +} + +static inline bool is_12b_int(long val) +{ + return -(1L << 11) <= val && val < (1L << 11); +} + +static inline int is_12b_check(int off, int insn) +{ + if (!is_12b_int(off)) { + pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n", + insn, (int)off); + return -1; + } + return 0; +} + +static inline bool is_13b_int(long val) +{ + return -(1L << 12) <= val && val < (1L << 12); +} + +static inline bool is_21b_int(long val) +{ + return -(1L << 20) <= val && val < (1L << 20); +} + +static inline int rv_offset(int insn, int off, struct rv_jit_context *ctx) +{ + int from, to; + + off++; /* BPF branch is from PC+1, RV is from PC */ + from = (insn > 0) ? ctx->offset[insn - 1] : 0; + to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; + return (to - from) << 2; +} + +/* Instruction formats. */ + +static inline u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd, + u8 opcode) +{ + return (funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | + (rd << 7) | opcode; +} + +static inline u32 rv_i_insn(u16 imm11_0, u8 rs1, u8 funct3, u8 rd, u8 opcode) +{ + return (imm11_0 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | + opcode; +} + +static inline u32 rv_s_insn(u16 imm11_0, u8 rs2, u8 rs1, u8 funct3, u8 opcode) +{ + u8 imm11_5 = imm11_0 >> 5, imm4_0 = imm11_0 & 0x1f; + + return (imm11_5 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | + (imm4_0 << 7) | opcode; +} + +static inline u32 rv_b_insn(u16 imm12_1, u8 rs2, u8 rs1, u8 funct3, u8 opcode) +{ + u8 imm12 = ((imm12_1 & 0x800) >> 5) | ((imm12_1 & 0x3f0) >> 4); + u8 imm4_1 = ((imm12_1 & 0xf) << 1) | ((imm12_1 & 0x400) >> 10); + + return (imm12 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | + (imm4_1 << 7) | opcode; +} + +static inline u32 rv_u_insn(u32 imm31_12, u8 rd, u8 opcode) +{ + return (imm31_12 << 12) | (rd << 7) | opcode; +} + +static inline u32 rv_j_insn(u32 imm20_1, u8 rd, u8 opcode) +{ + u32 imm; + + imm = (imm20_1 & 0x80000) | ((imm20_1 & 0x3ff) << 9) | + ((imm20_1 & 0x400) >> 2) | ((imm20_1 & 0x7f800) >> 11); + + return (imm << 12) | (rd << 7) | opcode; +} + +static inline u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1, + u8 funct3, u8 rd, u8 opcode) +{ + u8 funct7 = (funct5 << 2) | (aq << 1) | rl; + + return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode); +} + +/* Instructions shared by both RV32 and RV64. */ + +static inline u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 0, rd, 0x13); +} + +static inline u32 rv_andi(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 7, rd, 0x13); +} + +static inline u32 rv_ori(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 6, rd, 0x13); +} + +static inline u32 rv_xori(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 4, rd, 0x13); +} + +static inline u32 rv_slli(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 1, rd, 0x13); +} + +static inline u32 rv_srli(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 5, rd, 0x13); +} + +static inline u32 rv_srai(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x13); +} + +static inline u32 rv_lui(u8 rd, u32 imm31_12) +{ + return rv_u_insn(imm31_12, rd, 0x37); +} + +static inline u32 rv_auipc(u8 rd, u32 imm31_12) +{ + return rv_u_insn(imm31_12, rd, 0x17); +} + +static inline u32 rv_add(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 0, rd, 0x33); +} + +static inline u32 rv_sub(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x33); +} + +static inline u32 rv_sltu(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 3, rd, 0x33); +} + +static inline u32 rv_and(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 7, rd, 0x33); +} + +static inline u32 rv_or(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 6, rd, 0x33); +} + +static inline u32 rv_xor(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 4, rd, 0x33); +} + +static inline u32 rv_sll(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 1, rd, 0x33); +} + +static inline u32 rv_srl(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 5, rd, 0x33); +} + +static inline u32 rv_sra(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x33); +} + +static inline u32 rv_mul(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 0, rd, 0x33); +} + +static inline u32 rv_mulhu(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 3, rd, 0x33); +} + +static inline u32 rv_divu(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 5, rd, 0x33); +} + +static inline u32 rv_remu(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 7, rd, 0x33); +} + +static inline u32 rv_jal(u8 rd, u32 imm20_1) +{ + return rv_j_insn(imm20_1, rd, 0x6f); +} + +static inline u32 rv_jalr(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 0, rd, 0x67); +} + +static inline u32 rv_beq(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 0, 0x63); +} + +static inline u32 rv_bne(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 1, 0x63); +} + +static inline u32 rv_bltu(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 6, 0x63); +} + +static inline u32 rv_bgtu(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_bltu(rs2, rs1, imm12_1); +} + +static inline u32 rv_bgeu(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 7, 0x63); +} + +static inline u32 rv_bleu(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_bgeu(rs2, rs1, imm12_1); +} + +static inline u32 rv_blt(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 4, 0x63); +} + +static inline u32 rv_bgt(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_blt(rs2, rs1, imm12_1); +} + +static inline u32 rv_bge(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_b_insn(imm12_1, rs2, rs1, 5, 0x63); +} + +static inline u32 rv_ble(u8 rs1, u8 rs2, u16 imm12_1) +{ + return rv_bge(rs2, rs1, imm12_1); +} + +static inline u32 rv_lw(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 2, rd, 0x03); +} + +static inline u32 rv_lbu(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 4, rd, 0x03); +} + +static inline u32 rv_lhu(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 5, rd, 0x03); +} + +static inline u32 rv_sb(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 0, 0x23); +} + +static inline u32 rv_sh(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 1, 0x23); +} + +static inline u32 rv_sw(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 2, 0x23); +} + +static inline u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); +} + +/* + * RV64-only instructions. + * + * These instructions are not available on RV32. Wrap them below a #if to + * ensure that the RV32 JIT doesn't emit any of these instructions. + */ + +#if __riscv_xlen == 64 + +static inline u32 rv_addiw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 0, rd, 0x1b); +} + +static inline u32 rv_slliw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 1, rd, 0x1b); +} + +static inline u32 rv_srliw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(imm11_0, rs1, 5, rd, 0x1b); +} + +static inline u32 rv_sraiw(u8 rd, u8 rs1, u16 imm11_0) +{ + return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x1b); +} + +static inline u32 rv_addw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 0, rd, 0x3b); +} + +static inline u32 rv_subw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x3b); +} + +static inline u32 rv_sllw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 1, rd, 0x3b); +} + +static inline u32 rv_srlw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0, rs2, rs1, 5, rd, 0x3b); +} + +static inline u32 rv_sraw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x3b); +} + +static inline u32 rv_mulw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b); +} + +static inline u32 rv_divuw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b); +} + +static inline u32 rv_remuw(u8 rd, u8 rs1, u8 rs2) +{ + return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b); +} + +static inline u32 rv_ld(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 3, rd, 0x03); +} + +static inline u32 rv_lwu(u8 rd, u16 imm11_0, u8 rs1) +{ + return rv_i_insn(imm11_0, rs1, 6, rd, 0x03); +} + +static inline u32 rv_sd(u8 rs1, u16 imm11_0, u8 rs2) +{ + return rv_s_insn(imm11_0, rs2, rs1, 3, 0x23); +} + +static inline u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) +{ + return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); +} + +#endif /* __riscv_xlen == 64 */ + +void bpf_jit_build_prologue(struct rv_jit_context *ctx); +void bpf_jit_build_epilogue(struct rv_jit_context *ctx); + +int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, + bool extra_pass); + +#endif /* _BPF_JIT_H */ diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c new file mode 100644 index 000000000000..302934177760 --- /dev/null +++ b/arch/riscv/net/bpf_jit_comp32.c @@ -0,0 +1,1310 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * BPF JIT compiler for RV32G + * + * Copyright (c) 2020 Luke Nelson <luke.r.nels@gmail.com> + * Copyright (c) 2020 Xi Wang <xi.wang@gmail.com> + * + * The code is based on the BPF JIT compiler for RV64G by Björn Töpel and + * the BPF JIT compiler for 32-bit ARM by Shubham Bansal and Mircea Gherzan. + */ + +#include <linux/bpf.h> +#include <linux/filter.h> +#include "bpf_jit.h" + +enum { + /* Stack layout - these are offsets from (top of stack - 4). */ + BPF_R6_HI, + BPF_R6_LO, + BPF_R7_HI, + BPF_R7_LO, + BPF_R8_HI, + BPF_R8_LO, + BPF_R9_HI, + BPF_R9_LO, + BPF_AX_HI, + BPF_AX_LO, + /* Stack space for BPF_REG_6 through BPF_REG_9 and BPF_REG_AX. */ + BPF_JIT_SCRATCH_REGS, +}; + +#define STACK_OFFSET(k) (-4 - ((k) * 4)) + +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) + +#define RV_REG_TCC RV_REG_T6 +#define RV_REG_TCC_SAVED RV_REG_S7 + +static const s8 bpf2rv32[][2] = { + /* Return value from in-kernel function, and exit value from eBPF. */ + [BPF_REG_0] = {RV_REG_S2, RV_REG_S1}, + /* Arguments from eBPF program to in-kernel function. */ + [BPF_REG_1] = {RV_REG_A1, RV_REG_A0}, + [BPF_REG_2] = {RV_REG_A3, RV_REG_A2}, + [BPF_REG_3] = {RV_REG_A5, RV_REG_A4}, + [BPF_REG_4] = {RV_REG_A7, RV_REG_A6}, + [BPF_REG_5] = {RV_REG_S4, RV_REG_S3}, + /* + * Callee-saved registers that in-kernel function will preserve. + * Stored on the stack. + */ + [BPF_REG_6] = {STACK_OFFSET(BPF_R6_HI), STACK_OFFSET(BPF_R6_LO)}, + [BPF_REG_7] = {STACK_OFFSET(BPF_R7_HI), STACK_OFFSET(BPF_R7_LO)}, + [BPF_REG_8] = {STACK_OFFSET(BPF_R8_HI), STACK_OFFSET(BPF_R8_LO)}, + [BPF_REG_9] = {STACK_OFFSET(BPF_R9_HI), STACK_OFFSET(BPF_R9_LO)}, + /* Read-only frame pointer to access BPF stack. */ + [BPF_REG_FP] = {RV_REG_S6, RV_REG_S5}, + /* Temporary register for blinding constants. Stored on the stack. */ + [BPF_REG_AX] = {STACK_OFFSET(BPF_AX_HI), STACK_OFFSET(BPF_AX_LO)}, + /* + * Temporary registers used by the JIT to operate on registers stored + * on the stack. Save t0 and t1 to be used as temporaries in generated + * code. + */ + [TMP_REG_1] = {RV_REG_T3, RV_REG_T2}, + [TMP_REG_2] = {RV_REG_T5, RV_REG_T4}, +}; + +static s8 hi(const s8 *r) +{ + return r[0]; +} + +static s8 lo(const s8 *r) +{ + return r[1]; +} + +static void emit_imm(const s8 rd, s32 imm, struct rv_jit_context *ctx) +{ + u32 upper = (imm + (1 << 11)) >> 12; + u32 lower = imm & 0xfff; + + if (upper) { + emit(rv_lui(rd, upper), ctx); + emit(rv_addi(rd, rd, lower), ctx); + } else { + emit(rv_addi(rd, RV_REG_ZERO, lower), ctx); + } +} + +static void emit_imm32(const s8 *rd, s32 imm, struct rv_jit_context *ctx) +{ + /* Emit immediate into lower bits. */ + emit_imm(lo(rd), imm, ctx); + + /* Sign-extend into upper bits. */ + if (imm >= 0) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + else + emit(rv_addi(hi(rd), RV_REG_ZERO, -1), ctx); +} + +static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo, + struct rv_jit_context *ctx) +{ + emit_imm(lo(rd), imm_lo, ctx); + emit_imm(hi(rd), imm_hi, ctx); +} + +static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) +{ + int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 4; + const s8 *r0 = bpf2rv32[BPF_REG_0]; + + store_offset -= 4 * BPF_JIT_SCRATCH_REGS; + + /* Set return value if not tail call. */ + if (!is_tail_call) { + emit(rv_addi(RV_REG_A0, lo(r0), 0), ctx); + emit(rv_addi(RV_REG_A1, hi(r0), 0), ctx); + } + + /* Restore callee-saved registers. */ + emit(rv_lw(RV_REG_RA, store_offset - 0, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_FP, store_offset - 4, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S1, store_offset - 8, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S2, store_offset - 12, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S3, store_offset - 16, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S4, store_offset - 20, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S5, store_offset - 24, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S6, store_offset - 28, RV_REG_SP), ctx); + emit(rv_lw(RV_REG_S7, store_offset - 32, RV_REG_SP), ctx); + + emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); + + if (is_tail_call) { + /* + * goto *(t0 + 4); + * Skips first instruction of prologue which initializes tail + * call counter. Assumes t0 contains address of target program, + * see emit_bpf_tail_call. + */ + emit(rv_jalr(RV_REG_ZERO, RV_REG_T0, 4), ctx); + } else { + emit(rv_jalr(RV_REG_ZERO, RV_REG_RA, 0), ctx); + } +} + +static bool is_stacked(s8 reg) +{ + return reg < 0; +} + +static const s8 *bpf_get_reg64(const s8 *reg, const s8 *tmp, + struct rv_jit_context *ctx) +{ + if (is_stacked(hi(reg))) { + emit(rv_lw(hi(tmp), hi(reg), RV_REG_FP), ctx); + emit(rv_lw(lo(tmp), lo(reg), RV_REG_FP), ctx); + reg = tmp; + } + return reg; +} + +static void bpf_put_reg64(const s8 *reg, const s8 *src, + struct rv_jit_context *ctx) +{ + if (is_stacked(hi(reg))) { + emit(rv_sw(RV_REG_FP, hi(reg), hi(src)), ctx); + emit(rv_sw(RV_REG_FP, lo(reg), lo(src)), ctx); + } +} + +static const s8 *bpf_get_reg32(const s8 *reg, const s8 *tmp, + struct rv_jit_context *ctx) +{ + if (is_stacked(lo(reg))) { + emit(rv_lw(lo(tmp), lo(reg), RV_REG_FP), ctx); + reg = tmp; + } + return reg; +} + +static void bpf_put_reg32(const s8 *reg, const s8 *src, + struct rv_jit_context *ctx) +{ + if (is_stacked(lo(reg))) { + emit(rv_sw(RV_REG_FP, lo(reg), lo(src)), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_sw(RV_REG_FP, hi(reg), RV_REG_ZERO), ctx); + } else if (!ctx->prog->aux->verifier_zext) { + emit(rv_addi(hi(reg), RV_REG_ZERO, 0), ctx); + } +} + +static void emit_jump_and_link(u8 rd, s32 rvoff, bool force_jalr, + struct rv_jit_context *ctx) +{ + s32 upper, lower; + + if (rvoff && is_21b_int(rvoff) && !force_jalr) { + emit(rv_jal(rd, rvoff >> 1), ctx); + return; + } + + upper = (rvoff + (1 << 11)) >> 12; + lower = rvoff & 0xfff; + emit(rv_auipc(RV_REG_T1, upper), ctx); + emit(rv_jalr(rd, RV_REG_T1, lower), ctx); +} + +static void emit_alu_i64(const s8 *dst, s32 imm, + struct rv_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + + switch (op) { + case BPF_MOV: + emit_imm32(rd, imm, ctx); + break; + case BPF_AND: + if (is_12b_int(imm)) { + emit(rv_andi(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_and(lo(rd), lo(rd), RV_REG_T0), ctx); + } + if (imm >= 0) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case BPF_OR: + if (is_12b_int(imm)) { + emit(rv_ori(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_or(lo(rd), lo(rd), RV_REG_T0), ctx); + } + if (imm < 0) + emit(rv_ori(hi(rd), RV_REG_ZERO, -1), ctx); + break; + case BPF_XOR: + if (is_12b_int(imm)) { + emit(rv_xori(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_xor(lo(rd), lo(rd), RV_REG_T0), ctx); + } + if (imm < 0) + emit(rv_xori(hi(rd), hi(rd), -1), ctx); + break; + case BPF_LSH: + if (imm >= 32) { + emit(rv_slli(hi(rd), lo(rd), imm - 32), ctx); + emit(rv_addi(lo(rd), RV_REG_ZERO, 0), ctx); + } else if (imm == 0) { + /* Do nothing. */ + } else { + emit(rv_srli(RV_REG_T0, lo(rd), 32 - imm), ctx); + emit(rv_slli(hi(rd), hi(rd), imm), ctx); + emit(rv_or(hi(rd), RV_REG_T0, hi(rd)), ctx); + emit(rv_slli(lo(rd), lo(rd), imm), ctx); + } + break; + case BPF_RSH: + if (imm >= 32) { + emit(rv_srli(lo(rd), hi(rd), imm - 32), ctx); + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + } else if (imm == 0) { + /* Do nothing. */ + } else { + emit(rv_slli(RV_REG_T0, hi(rd), 32 - imm), ctx); + emit(rv_srli(lo(rd), lo(rd), imm), ctx); + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); + emit(rv_srli(hi(rd), hi(rd), imm), ctx); + } + break; + case BPF_ARSH: + if (imm >= 32) { + emit(rv_srai(lo(rd), hi(rd), imm - 32), ctx); + emit(rv_srai(hi(rd), hi(rd), 31), ctx); + } else if (imm == 0) { + /* Do nothing. */ + } else { + emit(rv_slli(RV_REG_T0, hi(rd), 32 - imm), ctx); + emit(rv_srli(lo(rd), lo(rd), imm), ctx); + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); + emit(rv_srai(hi(rd), hi(rd), imm), ctx); + } + break; + } + + bpf_put_reg64(dst, rd, ctx); +} + +static void emit_alu_i32(const s8 *dst, s32 imm, + struct rv_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *rd = bpf_get_reg32(dst, tmp1, ctx); + + switch (op) { + case BPF_MOV: + emit_imm(lo(rd), imm, ctx); + break; + case BPF_ADD: + if (is_12b_int(imm)) { + emit(rv_addi(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_add(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_SUB: + if (is_12b_int(-imm)) { + emit(rv_addi(lo(rd), lo(rd), -imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_sub(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_AND: + if (is_12b_int(imm)) { + emit(rv_andi(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_and(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_OR: + if (is_12b_int(imm)) { + emit(rv_ori(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_or(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_XOR: + if (is_12b_int(imm)) { + emit(rv_xori(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_xor(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_LSH: + if (is_12b_int(imm)) { + emit(rv_slli(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_sll(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_RSH: + if (is_12b_int(imm)) { + emit(rv_srli(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_srl(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + case BPF_ARSH: + if (is_12b_int(imm)) { + emit(rv_srai(lo(rd), lo(rd), imm), ctx); + } else { + emit_imm(RV_REG_T0, imm, ctx); + emit(rv_sra(lo(rd), lo(rd), RV_REG_T0), ctx); + } + break; + } + + bpf_put_reg32(dst, rd, ctx); +} + +static void emit_alu_r64(const s8 *dst, const s8 *src, + struct rv_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); + + switch (op) { + case BPF_MOV: + emit(rv_addi(lo(rd), lo(rs), 0), ctx); + emit(rv_addi(hi(rd), hi(rs), 0), ctx); + break; + case BPF_ADD: + if (rd == rs) { + emit(rv_srli(RV_REG_T0, lo(rd), 31), ctx); + emit(rv_slli(hi(rd), hi(rd), 1), ctx); + emit(rv_or(hi(rd), RV_REG_T0, hi(rd)), ctx); + emit(rv_slli(lo(rd), lo(rd), 1), ctx); + } else { + emit(rv_add(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_sltu(RV_REG_T0, lo(rd), lo(rs)), ctx); + emit(rv_add(hi(rd), hi(rd), hi(rs)), ctx); + emit(rv_add(hi(rd), hi(rd), RV_REG_T0), ctx); + } + break; + case BPF_SUB: + emit(rv_sub(RV_REG_T1, hi(rd), hi(rs)), ctx); + emit(rv_sltu(RV_REG_T0, lo(rd), lo(rs)), ctx); + emit(rv_sub(hi(rd), RV_REG_T1, RV_REG_T0), ctx); + emit(rv_sub(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_AND: + emit(rv_and(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_and(hi(rd), hi(rd), hi(rs)), ctx); + break; + case BPF_OR: + emit(rv_or(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_or(hi(rd), hi(rd), hi(rs)), ctx); + break; + case BPF_XOR: + emit(rv_xor(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_xor(hi(rd), hi(rd), hi(rs)), ctx); + break; + case BPF_MUL: + emit(rv_mul(RV_REG_T0, hi(rs), lo(rd)), ctx); + emit(rv_mul(hi(rd), hi(rd), lo(rs)), ctx); + emit(rv_mulhu(RV_REG_T1, lo(rd), lo(rs)), ctx); + emit(rv_add(hi(rd), hi(rd), RV_REG_T0), ctx); + emit(rv_mul(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_add(hi(rd), hi(rd), RV_REG_T1), ctx); + break; + case BPF_LSH: + emit(rv_addi(RV_REG_T0, lo(rs), -32), ctx); + emit(rv_blt(RV_REG_T0, RV_REG_ZERO, 8), ctx); + emit(rv_sll(hi(rd), lo(rd), RV_REG_T0), ctx); + emit(rv_addi(lo(rd), RV_REG_ZERO, 0), ctx); + emit(rv_jal(RV_REG_ZERO, 16), ctx); + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 31), ctx); + emit(rv_srli(RV_REG_T0, lo(rd), 1), ctx); + emit(rv_sub(RV_REG_T1, RV_REG_T1, lo(rs)), ctx); + emit(rv_srl(RV_REG_T0, RV_REG_T0, RV_REG_T1), ctx); + emit(rv_sll(hi(rd), hi(rd), lo(rs)), ctx); + emit(rv_or(hi(rd), RV_REG_T0, hi(rd)), ctx); + emit(rv_sll(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_RSH: + emit(rv_addi(RV_REG_T0, lo(rs), -32), ctx); + emit(rv_blt(RV_REG_T0, RV_REG_ZERO, 8), ctx); + emit(rv_srl(lo(rd), hi(rd), RV_REG_T0), ctx); + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + emit(rv_jal(RV_REG_ZERO, 16), ctx); + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 31), ctx); + emit(rv_slli(RV_REG_T0, hi(rd), 1), ctx); + emit(rv_sub(RV_REG_T1, RV_REG_T1, lo(rs)), ctx); + emit(rv_sll(RV_REG_T0, RV_REG_T0, RV_REG_T1), ctx); + emit(rv_srl(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); + emit(rv_srl(hi(rd), hi(rd), lo(rs)), ctx); + break; + case BPF_ARSH: + emit(rv_addi(RV_REG_T0, lo(rs), -32), ctx); + emit(rv_blt(RV_REG_T0, RV_REG_ZERO, 8), ctx); + emit(rv_sra(lo(rd), hi(rd), RV_REG_T0), ctx); + emit(rv_srai(hi(rd), hi(rd), 31), ctx); + emit(rv_jal(RV_REG_ZERO, 16), ctx); + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 31), ctx); + emit(rv_slli(RV_REG_T0, hi(rd), 1), ctx); + emit(rv_sub(RV_REG_T1, RV_REG_T1, lo(rs)), ctx); + emit(rv_sll(RV_REG_T0, RV_REG_T0, RV_REG_T1), ctx); + emit(rv_srl(lo(rd), lo(rd), lo(rs)), ctx); + emit(rv_or(lo(rd), RV_REG_T0, lo(rd)), ctx); + emit(rv_sra(hi(rd), hi(rd), lo(rs)), ctx); + break; + case BPF_NEG: + emit(rv_sub(lo(rd), RV_REG_ZERO, lo(rd)), ctx); + emit(rv_sltu(RV_REG_T0, RV_REG_ZERO, lo(rd)), ctx); + emit(rv_sub(hi(rd), RV_REG_ZERO, hi(rd)), ctx); + emit(rv_sub(hi(rd), hi(rd), RV_REG_T0), ctx); + break; + } + + bpf_put_reg64(dst, rd, ctx); +} + +static void emit_alu_r32(const s8 *dst, const s8 *src, + struct rv_jit_context *ctx, const u8 op) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + const s8 *rd = bpf_get_reg32(dst, tmp1, ctx); + const s8 *rs = bpf_get_reg32(src, tmp2, ctx); + + switch (op) { + case BPF_MOV: + emit(rv_addi(lo(rd), lo(rs), 0), ctx); + break; + case BPF_ADD: + emit(rv_add(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_SUB: + emit(rv_sub(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_AND: + emit(rv_and(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_OR: + emit(rv_or(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_XOR: + emit(rv_xor(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_MUL: + emit(rv_mul(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_DIV: + emit(rv_divu(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_MOD: + emit(rv_remu(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_LSH: + emit(rv_sll(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_RSH: + emit(rv_srl(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_ARSH: + emit(rv_sra(lo(rd), lo(rd), lo(rs)), ctx); + break; + case BPF_NEG: + emit(rv_sub(lo(rd), RV_REG_ZERO, lo(rd)), ctx); + break; + } + + bpf_put_reg32(dst, rd, ctx); +} + +static int emit_branch_r64(const s8 *src1, const s8 *src2, s32 rvoff, + struct rv_jit_context *ctx, const u8 op) +{ + int e, s = ctx->ninsns; + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + + const s8 *rs1 = bpf_get_reg64(src1, tmp1, ctx); + const s8 *rs2 = bpf_get_reg64(src2, tmp2, ctx); + + /* + * NO_JUMP skips over the rest of the instructions and the + * emit_jump_and_link, meaning the BPF branch is not taken. + * JUMP skips directly to the emit_jump_and_link, meaning + * the BPF branch is taken. + * + * The fallthrough case results in the BPF branch being taken. + */ +#define NO_JUMP(idx) (6 + (2 * (idx))) +#define JUMP(idx) (2 + (2 * (idx))) + + switch (op) { + case BPF_JEQ: + emit(rv_bne(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bne(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JGT: + emit(rv_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JLT: + emit(rv_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JGE: + emit(rv_bgtu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bltu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JLE: + emit(rv_bltu(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bgtu(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JNE: + emit(rv_bne(hi(rs1), hi(rs2), JUMP(1)), ctx); + emit(rv_beq(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSGT: + emit(rv_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bleu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSLT: + emit(rv_blt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bgeu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSGE: + emit(rv_bgt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_blt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bltu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSLE: + emit(rv_blt(hi(rs1), hi(rs2), JUMP(2)), ctx); + emit(rv_bgt(hi(rs1), hi(rs2), NO_JUMP(1)), ctx); + emit(rv_bgtu(lo(rs1), lo(rs2), NO_JUMP(0)), ctx); + break; + case BPF_JSET: + emit(rv_and(RV_REG_T0, hi(rs1), hi(rs2)), ctx); + emit(rv_bne(RV_REG_T0, RV_REG_ZERO, JUMP(2)), ctx); + emit(rv_and(RV_REG_T0, lo(rs1), lo(rs2)), ctx); + emit(rv_beq(RV_REG_T0, RV_REG_ZERO, NO_JUMP(0)), ctx); + break; + } + +#undef NO_JUMP +#undef JUMP + + e = ctx->ninsns; + /* Adjust for extra insns. */ + rvoff -= (e - s) << 2; + emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); + return 0; +} + +static int emit_bcc(u8 op, u8 rd, u8 rs, int rvoff, struct rv_jit_context *ctx) +{ + int e, s = ctx->ninsns; + bool far = false; + int off; + + if (op == BPF_JSET) { + /* + * BPF_JSET is a special case: it has no inverse so we always + * treat it as a far branch. + */ + far = true; + } else if (!is_13b_int(rvoff)) { + op = invert_bpf_cond(op); + far = true; + } + + /* + * For a far branch, the condition is negated and we jump over the + * branch itself, and the two instructions from emit_jump_and_link. + * For a near branch, just use rvoff. + */ + off = far ? 6 : (rvoff >> 1); + + switch (op) { + case BPF_JEQ: + emit(rv_beq(rd, rs, off), ctx); + break; + case BPF_JGT: + emit(rv_bgtu(rd, rs, off), ctx); + break; + case BPF_JLT: + emit(rv_bltu(rd, rs, off), ctx); + break; + case BPF_JGE: + emit(rv_bgeu(rd, rs, off), ctx); + break; + case BPF_JLE: + emit(rv_bleu(rd, rs, off), ctx); + break; + case BPF_JNE: + emit(rv_bne(rd, rs, off), ctx); + break; + case BPF_JSGT: + emit(rv_bgt(rd, rs, off), ctx); + break; + case BPF_JSLT: + emit(rv_blt(rd, rs, off), ctx); + break; + case BPF_JSGE: + emit(rv_bge(rd, rs, off), ctx); + break; + case BPF_JSLE: + emit(rv_ble(rd, rs, off), ctx); + break; + case BPF_JSET: + emit(rv_and(RV_REG_T0, rd, rs), ctx); + emit(rv_beq(RV_REG_T0, RV_REG_ZERO, off), ctx); + break; + } + + if (far) { + e = ctx->ninsns; + /* Adjust for extra insns. */ + rvoff -= (e - s) << 2; + emit_jump_and_link(RV_REG_ZERO, rvoff, true, ctx); + } + return 0; +} + +static int emit_branch_r32(const s8 *src1, const s8 *src2, s32 rvoff, + struct rv_jit_context *ctx, const u8 op) +{ + int e, s = ctx->ninsns; + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + + const s8 *rs1 = bpf_get_reg32(src1, tmp1, ctx); + const s8 *rs2 = bpf_get_reg32(src2, tmp2, ctx); + + e = ctx->ninsns; + /* Adjust for extra insns. */ + rvoff -= (e - s) << 2; + + if (emit_bcc(op, lo(rs1), lo(rs2), rvoff, ctx)) + return -1; + + return 0; +} + +static void emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) +{ + const s8 *r0 = bpf2rv32[BPF_REG_0]; + const s8 *r5 = bpf2rv32[BPF_REG_5]; + u32 upper = ((u32)addr + (1 << 11)) >> 12; + u32 lower = addr & 0xfff; + + /* R1-R4 already in correct registers---need to push R5 to stack. */ + emit(rv_addi(RV_REG_SP, RV_REG_SP, -16), ctx); + emit(rv_sw(RV_REG_SP, 0, lo(r5)), ctx); + emit(rv_sw(RV_REG_SP, 4, hi(r5)), ctx); + + /* Backup TCC. */ + emit(rv_addi(RV_REG_TCC_SAVED, RV_REG_TCC, 0), ctx); + + /* + * Use lui/jalr pair to jump to absolute address. Don't use emit_imm as + * the number of emitted instructions should not depend on the value of + * addr. + */ + emit(rv_lui(RV_REG_T1, upper), ctx); + emit(rv_jalr(RV_REG_RA, RV_REG_T1, lower), ctx); + + /* Restore TCC. */ + emit(rv_addi(RV_REG_TCC, RV_REG_TCC_SAVED, 0), ctx); + + /* Set return value and restore stack. */ + emit(rv_addi(lo(r0), RV_REG_A0, 0), ctx); + emit(rv_addi(hi(r0), RV_REG_A1, 0), ctx); + emit(rv_addi(RV_REG_SP, RV_REG_SP, 16), ctx); +} + +static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) +{ + /* + * R1 -> &ctx + * R2 -> &array + * R3 -> index + */ + int tc_ninsn, off, start_insn = ctx->ninsns; + const s8 *arr_reg = bpf2rv32[BPF_REG_2]; + const s8 *idx_reg = bpf2rv32[BPF_REG_3]; + + tc_ninsn = insn ? ctx->offset[insn] - ctx->offset[insn - 1] : + ctx->offset[0]; + + /* max_entries = array->map.max_entries; */ + off = offsetof(struct bpf_array, map.max_entries); + if (is_12b_check(off, insn)) + return -1; + emit(rv_lw(RV_REG_T1, off, lo(arr_reg)), ctx); + + /* + * if (index >= max_entries) + * goto out; + */ + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; + emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx); + + /* + * if ((temp_tcc = tcc - 1) < 0) + * goto out; + */ + emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx); + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; + emit_bcc(BPF_JSLT, RV_REG_T1, RV_REG_ZERO, off, ctx); + + /* + * prog = array->ptrs[index]; + * if (!prog) + * goto out; + */ + emit(rv_slli(RV_REG_T0, lo(idx_reg), 2), ctx); + emit(rv_add(RV_REG_T0, RV_REG_T0, lo(arr_reg)), ctx); + off = offsetof(struct bpf_array, ptrs); + if (is_12b_check(off, insn)) + return -1; + emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx); + off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; + emit_bcc(BPF_JEQ, RV_REG_T0, RV_REG_ZERO, off, ctx); + + /* + * tcc = temp_tcc; + * goto *(prog->bpf_func + 4); + */ + off = offsetof(struct bpf_prog, bpf_func); + if (is_12b_check(off, insn)) + return -1; + emit(rv_lw(RV_REG_T0, off, RV_REG_T0), ctx); + emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); + /* Epilogue jumps to *(t0 + 4). */ + __build_epilogue(true, ctx); + return 0; +} + +static int emit_load_r64(const s8 *dst, const s8 *src, s16 off, + struct rv_jit_context *ctx, const u8 size) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); + + emit_imm(RV_REG_T0, off, ctx); + emit(rv_add(RV_REG_T0, RV_REG_T0, lo(rs)), ctx); + + switch (size) { + case BPF_B: + emit(rv_lbu(lo(rd), 0, RV_REG_T0), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case BPF_H: + emit(rv_lhu(lo(rd), 0, RV_REG_T0), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case BPF_W: + emit(rv_lw(lo(rd), 0, RV_REG_T0), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case BPF_DW: + emit(rv_lw(lo(rd), 0, RV_REG_T0), ctx); + emit(rv_lw(hi(rd), 4, RV_REG_T0), ctx); + break; + } + + bpf_put_reg64(dst, rd, ctx); + return 0; +} + +static int emit_store_r64(const s8 *dst, const s8 *src, s16 off, + struct rv_jit_context *ctx, const u8 size, + const u8 mode) +{ + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + const s8 *rs = bpf_get_reg64(src, tmp2, ctx); + + if (mode == BPF_XADD && size != BPF_W) + return -1; + + emit_imm(RV_REG_T0, off, ctx); + emit(rv_add(RV_REG_T0, RV_REG_T0, lo(rd)), ctx); + + switch (size) { + case BPF_B: + emit(rv_sb(RV_REG_T0, 0, lo(rs)), ctx); + break; + case BPF_H: + emit(rv_sh(RV_REG_T0, 0, lo(rs)), ctx); + break; + case BPF_W: + switch (mode) { + case BPF_MEM: + emit(rv_sw(RV_REG_T0, 0, lo(rs)), ctx); + break; + case BPF_XADD: + emit(rv_amoadd_w(RV_REG_ZERO, lo(rs), RV_REG_T0, 0, 0), + ctx); + break; + } + break; + case BPF_DW: + emit(rv_sw(RV_REG_T0, 0, lo(rs)), ctx); + emit(rv_sw(RV_REG_T0, 4, hi(rs)), ctx); + break; + } + + return 0; +} + +static void emit_rev16(const s8 rd, struct rv_jit_context *ctx) +{ + emit(rv_slli(rd, rd, 16), ctx); + emit(rv_slli(RV_REG_T1, rd, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + emit(rv_add(RV_REG_T1, rd, RV_REG_T1), ctx); + emit(rv_srli(rd, RV_REG_T1, 16), ctx); +} + +static void emit_rev32(const s8 rd, struct rv_jit_context *ctx) +{ + emit(rv_addi(RV_REG_T1, RV_REG_ZERO, 0), ctx); + emit(rv_andi(RV_REG_T0, rd, 255), ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); + emit(rv_slli(RV_REG_T1, RV_REG_T1, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + emit(rv_andi(RV_REG_T0, rd, 255), ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); + emit(rv_slli(RV_REG_T1, RV_REG_T1, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + emit(rv_andi(RV_REG_T0, rd, 255), ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); + emit(rv_slli(RV_REG_T1, RV_REG_T1, 8), ctx); + emit(rv_srli(rd, rd, 8), ctx); + emit(rv_andi(RV_REG_T0, rd, 255), ctx); + emit(rv_add(RV_REG_T1, RV_REG_T1, RV_REG_T0), ctx); + emit(rv_addi(rd, RV_REG_T1, 0), ctx); +} + +static void emit_zext64(const s8 *dst, struct rv_jit_context *ctx) +{ + const s8 *rd; + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + + rd = bpf_get_reg64(dst, tmp1, ctx); + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + bpf_put_reg64(dst, rd, ctx); +} + +int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, + bool extra_pass) +{ + bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || + BPF_CLASS(insn->code) == BPF_JMP; + int s, e, rvoff, i = insn - ctx->prog->insnsi; + u8 code = insn->code; + s16 off = insn->off; + s32 imm = insn->imm; + + const s8 *dst = bpf2rv32[insn->dst_reg]; + const s8 *src = bpf2rv32[insn->src_reg]; + const s8 *tmp1 = bpf2rv32[TMP_REG_1]; + const s8 *tmp2 = bpf2rv32[TMP_REG_2]; + + switch (code) { + case BPF_ALU64 | BPF_MOV | BPF_X: + + case BPF_ALU64 | BPF_ADD | BPF_X: + case BPF_ALU64 | BPF_ADD | BPF_K: + + case BPF_ALU64 | BPF_SUB | BPF_X: + case BPF_ALU64 | BPF_SUB | BPF_K: + + case BPF_ALU64 | BPF_AND | BPF_X: + case BPF_ALU64 | BPF_OR | BPF_X: + case BPF_ALU64 | BPF_XOR | BPF_X: + + case BPF_ALU64 | BPF_MUL | BPF_X: + case BPF_ALU64 | BPF_MUL | BPF_K: + + case BPF_ALU64 | BPF_LSH | BPF_X: + case BPF_ALU64 | BPF_RSH | BPF_X: + case BPF_ALU64 | BPF_ARSH | BPF_X: + if (BPF_SRC(code) == BPF_K) { + emit_imm32(tmp2, imm, ctx); + src = tmp2; + } + emit_alu_r64(dst, src, ctx, BPF_OP(code)); + break; + + case BPF_ALU64 | BPF_NEG: + emit_alu_r64(dst, tmp2, ctx, BPF_OP(code)); + break; + + case BPF_ALU64 | BPF_DIV | BPF_X: + case BPF_ALU64 | BPF_DIV | BPF_K: + case BPF_ALU64 | BPF_MOD | BPF_X: + case BPF_ALU64 | BPF_MOD | BPF_K: + goto notsupported; + + case BPF_ALU64 | BPF_MOV | BPF_K: + case BPF_ALU64 | BPF_AND | BPF_K: + case BPF_ALU64 | BPF_OR | BPF_K: + case BPF_ALU64 | BPF_XOR | BPF_K: + case BPF_ALU64 | BPF_LSH | BPF_K: + case BPF_ALU64 | BPF_RSH | BPF_K: + case BPF_ALU64 | BPF_ARSH | BPF_K: + emit_alu_i64(dst, imm, ctx, BPF_OP(code)); + break; + + case BPF_ALU | BPF_MOV | BPF_X: + if (imm == 1) { + /* Special mov32 for zext. */ + emit_zext64(dst, ctx); + break; + } + /* Fallthrough. */ + + case BPF_ALU | BPF_ADD | BPF_X: + case BPF_ALU | BPF_SUB | BPF_X: + case BPF_ALU | BPF_AND | BPF_X: + case BPF_ALU | BPF_OR | BPF_X: + case BPF_ALU | BPF_XOR | BPF_X: + + case BPF_ALU | BPF_MUL | BPF_X: + case BPF_ALU | BPF_MUL | BPF_K: + + case BPF_ALU | BPF_DIV | BPF_X: + case BPF_ALU | BPF_DIV | BPF_K: + + case BPF_ALU | BPF_MOD | BPF_X: + case BPF_ALU | BPF_MOD | BPF_K: + + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_ARSH | BPF_X: + if (BPF_SRC(code) == BPF_K) { + emit_imm32(tmp2, imm, ctx); + src = tmp2; + } + emit_alu_r32(dst, src, ctx, BPF_OP(code)); + break; + + case BPF_ALU | BPF_MOV | BPF_K: + case BPF_ALU | BPF_ADD | BPF_K: + case BPF_ALU | BPF_SUB | BPF_K: + case BPF_ALU | BPF_AND | BPF_K: + case BPF_ALU | BPF_OR | BPF_K: + case BPF_ALU | BPF_XOR | BPF_K: + case BPF_ALU | BPF_LSH | BPF_K: + case BPF_ALU | BPF_RSH | BPF_K: + case BPF_ALU | BPF_ARSH | BPF_K: + /* + * mul,div,mod are handled in the BPF_X case since there are + * no RISC-V I-type equivalents. + */ + emit_alu_i32(dst, imm, ctx, BPF_OP(code)); + break; + + case BPF_ALU | BPF_NEG: + /* + * src is ignored---choose tmp2 as a dummy register since it + * is not on the stack. + */ + emit_alu_r32(dst, tmp2, ctx, BPF_OP(code)); + break; + + case BPF_ALU | BPF_END | BPF_FROM_LE: + { + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + + switch (imm) { + case 16: + emit(rv_slli(lo(rd), lo(rd), 16), ctx); + emit(rv_srli(lo(rd), lo(rd), 16), ctx); + /* Fallthrough. */ + case 32: + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case 64: + /* Do nothing. */ + break; + default: + pr_err("bpf-jit: BPF_END imm %d invalid\n", imm); + return -1; + } + + bpf_put_reg64(dst, rd, ctx); + break; + } + + case BPF_ALU | BPF_END | BPF_FROM_BE: + { + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + + switch (imm) { + case 16: + emit_rev16(lo(rd), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case 32: + emit_rev32(lo(rd), ctx); + if (!ctx->prog->aux->verifier_zext) + emit(rv_addi(hi(rd), RV_REG_ZERO, 0), ctx); + break; + case 64: + /* Swap upper and lower halves. */ + emit(rv_addi(RV_REG_T0, lo(rd), 0), ctx); + emit(rv_addi(lo(rd), hi(rd), 0), ctx); + emit(rv_addi(hi(rd), RV_REG_T0, 0), ctx); + + /* Swap each half. */ + emit_rev32(lo(rd), ctx); + emit_rev32(hi(rd), ctx); + break; + default: + pr_err("bpf-jit: BPF_END imm %d invalid\n", imm); + return -1; + } + + bpf_put_reg64(dst, rd, ctx); + break; + } + + case BPF_JMP | BPF_JA: + rvoff = rv_offset(i, off, ctx); + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + break; + + case BPF_JMP | BPF_CALL: + { + bool fixed; + int ret; + u64 addr; + + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, + &fixed); + if (ret < 0) + return ret; + emit_call(fixed, addr, ctx); + break; + } + + case BPF_JMP | BPF_TAIL_CALL: + if (emit_bpf_tail_call(i, ctx)) + return -1; + break; + + case BPF_JMP | BPF_JEQ | BPF_X: + case BPF_JMP | BPF_JEQ | BPF_K: + case BPF_JMP32 | BPF_JEQ | BPF_X: + case BPF_JMP32 | BPF_JEQ | BPF_K: + + case BPF_JMP | BPF_JNE | BPF_X: + case BPF_JMP | BPF_JNE | BPF_K: + case BPF_JMP32 | BPF_JNE | BPF_X: + case BPF_JMP32 | BPF_JNE | BPF_K: + + case BPF_JMP | BPF_JLE | BPF_X: + case BPF_JMP | BPF_JLE | BPF_K: + case BPF_JMP32 | BPF_JLE | BPF_X: + case BPF_JMP32 | BPF_JLE | BPF_K: + + case BPF_JMP | BPF_JLT | BPF_X: + case BPF_JMP | BPF_JLT | BPF_K: + case BPF_JMP32 | BPF_JLT | BPF_X: + case BPF_JMP32 | BPF_JLT | BPF_K: + + case BPF_JMP | BPF_JGE | BPF_X: + case BPF_JMP | BPF_JGE | BPF_K: + case BPF_JMP32 | BPF_JGE | BPF_X: + case BPF_JMP32 | BPF_JGE | BPF_K: + + case BPF_JMP | BPF_JGT | BPF_X: + case BPF_JMP | BPF_JGT | BPF_K: + case BPF_JMP32 | BPF_JGT | BPF_X: + case BPF_JMP32 | BPF_JGT | BPF_K: + + case BPF_JMP | BPF_JSLE | BPF_X: + case BPF_JMP | BPF_JSLE | BPF_K: + case BPF_JMP32 | BPF_JSLE | BPF_X: + case BPF_JMP32 | BPF_JSLE | BPF_K: + + case BPF_JMP | BPF_JSLT | BPF_X: + case BPF_JMP | BPF_JSLT | BPF_K: + case BPF_JMP32 | BPF_JSLT | BPF_X: + case BPF_JMP32 | BPF_JSLT | BPF_K: + + case BPF_JMP | BPF_JSGE | BPF_X: + case BPF_JMP | BPF_JSGE | BPF_K: + case BPF_JMP32 | BPF_JSGE | BPF_X: + case BPF_JMP32 | BPF_JSGE | BPF_K: + + case BPF_JMP | BPF_JSGT | BPF_X: + case BPF_JMP | BPF_JSGT | BPF_K: + case BPF_JMP32 | BPF_JSGT | BPF_X: + case BPF_JMP32 | BPF_JSGT | BPF_K: + + case BPF_JMP | BPF_JSET | BPF_X: + case BPF_JMP | BPF_JSET | BPF_K: + case BPF_JMP32 | BPF_JSET | BPF_X: + case BPF_JMP32 | BPF_JSET | BPF_K: + rvoff = rv_offset(i, off, ctx); + if (BPF_SRC(code) == BPF_K) { + s = ctx->ninsns; + emit_imm32(tmp2, imm, ctx); + src = tmp2; + e = ctx->ninsns; + rvoff -= (e - s) << 2; + } + + if (is64) + emit_branch_r64(dst, src, rvoff, ctx, BPF_OP(code)); + else + emit_branch_r32(dst, src, rvoff, ctx, BPF_OP(code)); + break; + + case BPF_JMP | BPF_EXIT: + if (i == ctx->prog->len - 1) + break; + + rvoff = epilogue_offset(ctx); + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); + break; + + case BPF_LD | BPF_IMM | BPF_DW: + { + struct bpf_insn insn1 = insn[1]; + s32 imm_lo = imm; + s32 imm_hi = insn1.imm; + const s8 *rd = bpf_get_reg64(dst, tmp1, ctx); + + emit_imm64(rd, imm_hi, imm_lo, ctx); + bpf_put_reg64(dst, rd, ctx); + return 1; + } + + case BPF_LDX | BPF_MEM | BPF_B: + case BPF_LDX | BPF_MEM | BPF_H: + case BPF_LDX | BPF_MEM | BPF_W: + case BPF_LDX | BPF_MEM | BPF_DW: + if (emit_load_r64(dst, src, off, ctx, BPF_SIZE(code))) + return -1; + break; + + case BPF_ST | BPF_MEM | BPF_B: + case BPF_ST | BPF_MEM | BPF_H: + case BPF_ST | BPF_MEM | BPF_W: + case BPF_ST | BPF_MEM | BPF_DW: + + case BPF_STX | BPF_MEM | BPF_B: + case BPF_STX | BPF_MEM | BPF_H: + case BPF_STX | BPF_MEM | BPF_W: + case BPF_STX | BPF_MEM | BPF_DW: + case BPF_STX | BPF_XADD | BPF_W: + if (BPF_CLASS(code) == BPF_ST) { + emit_imm32(tmp2, imm, ctx); + src = tmp2; + } + + if (emit_store_r64(dst, src, off, ctx, BPF_SIZE(code), + BPF_MODE(code))) + return -1; + break; + + /* No hardware support for 8-byte atomics in RV32. */ + case BPF_STX | BPF_XADD | BPF_DW: + /* Fallthrough. */ + +notsupported: + pr_info_once("bpf-jit: not supported: opcode %02x ***\n", code); + return -EFAULT; + + default: + pr_err("bpf-jit: unknown opcode %02x\n", code); + return -EINVAL; + } + + return 0; +} + +void bpf_jit_build_prologue(struct rv_jit_context *ctx) +{ + /* Make space to save 9 registers: ra, fp, s1--s7. */ + int stack_adjust = 9 * sizeof(u32), store_offset, bpf_stack_adjust; + const s8 *fp = bpf2rv32[BPF_REG_FP]; + const s8 *r1 = bpf2rv32[BPF_REG_1]; + + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + stack_adjust += bpf_stack_adjust; + + store_offset = stack_adjust - 4; + + stack_adjust += 4 * BPF_JIT_SCRATCH_REGS; + + /* + * The first instruction sets the tail-call-counter (TCC) register. + * This instruction is skipped by tail calls. + */ + emit(rv_addi(RV_REG_TCC, RV_REG_ZERO, MAX_TAIL_CALL_CNT), ctx); + + emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx); + + /* Save callee-save registers. */ + emit(rv_sw(RV_REG_SP, store_offset - 0, RV_REG_RA), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 4, RV_REG_FP), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 8, RV_REG_S1), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 12, RV_REG_S2), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 16, RV_REG_S3), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 20, RV_REG_S4), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 24, RV_REG_S5), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 28, RV_REG_S6), ctx); + emit(rv_sw(RV_REG_SP, store_offset - 32, RV_REG_S7), ctx); + + /* Set fp: used as the base address for stacked BPF registers. */ + emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx); + + /* Set up BPF stack pointer. */ + emit(rv_addi(lo(fp), RV_REG_SP, bpf_stack_adjust), ctx); + emit(rv_addi(hi(fp), RV_REG_ZERO, 0), ctx); + + /* Set up context pointer. */ + emit(rv_addi(lo(r1), RV_REG_A0, 0), ctx); + emit(rv_addi(hi(r1), RV_REG_ZERO, 0), ctx); + + ctx->stack_size = stack_adjust; +} + +void bpf_jit_build_epilogue(struct rv_jit_context *ctx) +{ + __build_epilogue(false, ctx); +} diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp64.c index 483f4ad7f4dc..cc1985d8750a 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -7,42 +7,7 @@ #include <linux/bpf.h> #include <linux/filter.h> -#include <asm/cacheflush.h> - -enum { - RV_REG_ZERO = 0, /* The constant value 0 */ - RV_REG_RA = 1, /* Return address */ - RV_REG_SP = 2, /* Stack pointer */ - RV_REG_GP = 3, /* Global pointer */ - RV_REG_TP = 4, /* Thread pointer */ - RV_REG_T0 = 5, /* Temporaries */ - RV_REG_T1 = 6, - RV_REG_T2 = 7, - RV_REG_FP = 8, - RV_REG_S1 = 9, /* Saved registers */ - RV_REG_A0 = 10, /* Function argument/return values */ - RV_REG_A1 = 11, /* Function arguments */ - RV_REG_A2 = 12, - RV_REG_A3 = 13, - RV_REG_A4 = 14, - RV_REG_A5 = 15, - RV_REG_A6 = 16, - RV_REG_A7 = 17, - RV_REG_S2 = 18, /* Saved registers */ - RV_REG_S3 = 19, - RV_REG_S4 = 20, - RV_REG_S5 = 21, - RV_REG_S6 = 22, - RV_REG_S7 = 23, - RV_REG_S8 = 24, - RV_REG_S9 = 25, - RV_REG_S10 = 26, - RV_REG_S11 = 27, - RV_REG_T3 = 28, /* Temporaries */ - RV_REG_T4 = 29, - RV_REG_T5 = 30, - RV_REG_T6 = 31, -}; +#include "bpf_jit.h" #define RV_REG_TCC RV_REG_A6 #define RV_REG_TCC_SAVED RV_REG_S6 /* Store A6 in S6 if program do calls */ @@ -73,22 +38,6 @@ enum { RV_CTX_F_SEEN_S6 = RV_REG_S6, }; -struct rv_jit_context { - struct bpf_prog *prog; - u32 *insns; /* RV insns */ - int ninsns; - int epilogue_offset; - int *offset; /* BPF to RV */ - unsigned long flags; - int stack_size; -}; - -struct rv_jit_data { - struct bpf_binary_header *header; - u8 *image; - struct rv_jit_context ctx; -}; - static u8 bpf_to_rv_reg(int bpf_reg, struct rv_jit_context *ctx) { u8 reg = regmap[bpf_reg]; @@ -156,346 +105,11 @@ static u8 rv_tail_call_reg(struct rv_jit_context *ctx) return RV_REG_A6; } -static void emit(const u32 insn, struct rv_jit_context *ctx) -{ - if (ctx->insns) - ctx->insns[ctx->ninsns] = insn; - - ctx->ninsns++; -} - -static u32 rv_r_insn(u8 funct7, u8 rs2, u8 rs1, u8 funct3, u8 rd, u8 opcode) -{ - return (funct7 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | - (rd << 7) | opcode; -} - -static u32 rv_i_insn(u16 imm11_0, u8 rs1, u8 funct3, u8 rd, u8 opcode) -{ - return (imm11_0 << 20) | (rs1 << 15) | (funct3 << 12) | (rd << 7) | - opcode; -} - -static u32 rv_s_insn(u16 imm11_0, u8 rs2, u8 rs1, u8 funct3, u8 opcode) -{ - u8 imm11_5 = imm11_0 >> 5, imm4_0 = imm11_0 & 0x1f; - - return (imm11_5 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | - (imm4_0 << 7) | opcode; -} - -static u32 rv_sb_insn(u16 imm12_1, u8 rs2, u8 rs1, u8 funct3, u8 opcode) -{ - u8 imm12 = ((imm12_1 & 0x800) >> 5) | ((imm12_1 & 0x3f0) >> 4); - u8 imm4_1 = ((imm12_1 & 0xf) << 1) | ((imm12_1 & 0x400) >> 10); - - return (imm12 << 25) | (rs2 << 20) | (rs1 << 15) | (funct3 << 12) | - (imm4_1 << 7) | opcode; -} - -static u32 rv_u_insn(u32 imm31_12, u8 rd, u8 opcode) -{ - return (imm31_12 << 12) | (rd << 7) | opcode; -} - -static u32 rv_uj_insn(u32 imm20_1, u8 rd, u8 opcode) -{ - u32 imm; - - imm = (imm20_1 & 0x80000) | ((imm20_1 & 0x3ff) << 9) | - ((imm20_1 & 0x400) >> 2) | ((imm20_1 & 0x7f800) >> 11); - - return (imm << 12) | (rd << 7) | opcode; -} - -static u32 rv_amo_insn(u8 funct5, u8 aq, u8 rl, u8 rs2, u8 rs1, - u8 funct3, u8 rd, u8 opcode) -{ - u8 funct7 = (funct5 << 2) | (aq << 1) | rl; - - return rv_r_insn(funct7, rs2, rs1, funct3, rd, opcode); -} - -static u32 rv_addiw(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 0, rd, 0x1b); -} - -static u32 rv_addi(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 0, rd, 0x13); -} - -static u32 rv_addw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 0, rd, 0x3b); -} - -static u32 rv_add(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 0, rd, 0x33); -} - -static u32 rv_subw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x3b); -} - -static u32 rv_sub(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0x20, rs2, rs1, 0, rd, 0x33); -} - -static u32 rv_and(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 7, rd, 0x33); -} - -static u32 rv_or(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 6, rd, 0x33); -} - -static u32 rv_xor(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 4, rd, 0x33); -} - -static u32 rv_mulw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 0, rd, 0x3b); -} - -static u32 rv_mul(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 0, rd, 0x33); -} - -static u32 rv_divuw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 5, rd, 0x3b); -} - -static u32 rv_divu(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 5, rd, 0x33); -} - -static u32 rv_remuw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 7, rd, 0x3b); -} - -static u32 rv_remu(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(1, rs2, rs1, 7, rd, 0x33); -} - -static u32 rv_sllw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 1, rd, 0x3b); -} - -static u32 rv_sll(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 1, rd, 0x33); -} - -static u32 rv_srlw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 5, rd, 0x3b); -} - -static u32 rv_srl(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0, rs2, rs1, 5, rd, 0x33); -} - -static u32 rv_sraw(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x3b); -} - -static u32 rv_sra(u8 rd, u8 rs1, u8 rs2) -{ - return rv_r_insn(0x20, rs2, rs1, 5, rd, 0x33); -} - -static u32 rv_lui(u8 rd, u32 imm31_12) -{ - return rv_u_insn(imm31_12, rd, 0x37); -} - -static u32 rv_slli(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 1, rd, 0x13); -} - -static u32 rv_andi(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 7, rd, 0x13); -} - -static u32 rv_ori(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 6, rd, 0x13); -} - -static u32 rv_xori(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 4, rd, 0x13); -} - -static u32 rv_slliw(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 1, rd, 0x1b); -} - -static u32 rv_srliw(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 5, rd, 0x1b); -} - -static u32 rv_srli(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 5, rd, 0x13); -} - -static u32 rv_sraiw(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x1b); -} - -static u32 rv_srai(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(0x400 | imm11_0, rs1, 5, rd, 0x13); -} - -static u32 rv_jal(u8 rd, u32 imm20_1) -{ - return rv_uj_insn(imm20_1, rd, 0x6f); -} - -static u32 rv_jalr(u8 rd, u8 rs1, u16 imm11_0) -{ - return rv_i_insn(imm11_0, rs1, 0, rd, 0x67); -} - -static u32 rv_beq(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 0, 0x63); -} - -static u32 rv_bltu(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 6, 0x63); -} - -static u32 rv_bgeu(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 7, 0x63); -} - -static u32 rv_bne(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 1, 0x63); -} - -static u32 rv_blt(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 4, 0x63); -} - -static u32 rv_bge(u8 rs1, u8 rs2, u16 imm12_1) -{ - return rv_sb_insn(imm12_1, rs2, rs1, 5, 0x63); -} - -static u32 rv_sb(u8 rs1, u16 imm11_0, u8 rs2) -{ - return rv_s_insn(imm11_0, rs2, rs1, 0, 0x23); -} - -static u32 rv_sh(u8 rs1, u16 imm11_0, u8 rs2) -{ - return rv_s_insn(imm11_0, rs2, rs1, 1, 0x23); -} - -static u32 rv_sw(u8 rs1, u16 imm11_0, u8 rs2) -{ - return rv_s_insn(imm11_0, rs2, rs1, 2, 0x23); -} - -static u32 rv_sd(u8 rs1, u16 imm11_0, u8 rs2) -{ - return rv_s_insn(imm11_0, rs2, rs1, 3, 0x23); -} - -static u32 rv_lbu(u8 rd, u16 imm11_0, u8 rs1) -{ - return rv_i_insn(imm11_0, rs1, 4, rd, 0x03); -} - -static u32 rv_lhu(u8 rd, u16 imm11_0, u8 rs1) -{ - return rv_i_insn(imm11_0, rs1, 5, rd, 0x03); -} - -static u32 rv_lwu(u8 rd, u16 imm11_0, u8 rs1) -{ - return rv_i_insn(imm11_0, rs1, 6, rd, 0x03); -} - -static u32 rv_ld(u8 rd, u16 imm11_0, u8 rs1) -{ - return rv_i_insn(imm11_0, rs1, 3, rd, 0x03); -} - -static u32 rv_amoadd_w(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) -{ - return rv_amo_insn(0, aq, rl, rs2, rs1, 2, rd, 0x2f); -} - -static u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) -{ - return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); -} - -static u32 rv_auipc(u8 rd, u32 imm31_12) -{ - return rv_u_insn(imm31_12, rd, 0x17); -} - -static bool is_12b_int(s64 val) -{ - return -(1 << 11) <= val && val < (1 << 11); -} - -static bool is_13b_int(s64 val) -{ - return -(1 << 12) <= val && val < (1 << 12); -} - -static bool is_21b_int(s64 val) -{ - return -(1L << 20) <= val && val < (1L << 20); -} - static bool is_32b_int(s64 val) { return -(1L << 31) <= val && val < (1L << 31); } -static int is_12b_check(int off, int insn) -{ - if (!is_12b_int(off)) { - pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n", - insn, (int)off); - return -1; - } - return 0; -} - static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) { /* Note that the immediate from the add is sign-extended, @@ -535,23 +149,6 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) emit(rv_addi(rd, rd, lower), ctx); } -static int rv_offset(int insn, int off, struct rv_jit_context *ctx) -{ - int from, to; - - off++; /* BPF branch is from PC+1, RV is from PC */ - from = (insn > 0) ? ctx->offset[insn - 1] : 0; - to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; - return (to - from) << 2; -} - -static int epilogue_offset(struct rv_jit_context *ctx) -{ - int to = ctx->epilogue_offset, from = ctx->ninsns; - - return (to - from) << 2; -} - static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) { int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8; @@ -596,34 +193,6 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) ctx); } -/* return -1 or inverted cond */ -static int invert_bpf_cond(u8 cond) -{ - switch (cond) { - case BPF_JEQ: - return BPF_JNE; - case BPF_JGT: - return BPF_JLE; - case BPF_JLT: - return BPF_JGE; - case BPF_JGE: - return BPF_JLT; - case BPF_JLE: - return BPF_JGT; - case BPF_JNE: - return BPF_JEQ; - case BPF_JSGT: - return BPF_JSLE; - case BPF_JSLT: - return BPF_JSGE; - case BPF_JSGE: - return BPF_JSLT; - case BPF_JSLE: - return BPF_JSGT; - } - return -1; -} - static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff, struct rv_jit_context *ctx) { @@ -855,8 +424,8 @@ static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) return 0; } -static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, - bool extra_pass) +int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, + bool extra_pass) { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || BPF_CLASS(insn->code) == BPF_JMP; @@ -1434,7 +1003,7 @@ out_be: return 0; } -static void build_prologue(struct rv_jit_context *ctx) +void bpf_jit_build_prologue(struct rv_jit_context *ctx) { int stack_adjust = 0, store_offset, bpf_stack_adjust; @@ -1515,175 +1084,11 @@ static void build_prologue(struct rv_jit_context *ctx) ctx->stack_size = stack_adjust; } -static void build_epilogue(struct rv_jit_context *ctx) +void bpf_jit_build_epilogue(struct rv_jit_context *ctx) { __build_epilogue(false, ctx); } -static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) -{ - const struct bpf_prog *prog = ctx->prog; - int i; - - for (i = 0; i < prog->len; i++) { - const struct bpf_insn *insn = &prog->insnsi[i]; - int ret; - - ret = emit_insn(insn, ctx, extra_pass); - if (ret > 0) { - i++; - if (offset) - offset[i] = ctx->ninsns; - continue; - } - if (offset) - offset[i] = ctx->ninsns; - if (ret) - return ret; - } - return 0; -} - -static void bpf_fill_ill_insns(void *area, unsigned int size) -{ - memset(area, 0, size); -} - -static void bpf_flush_icache(void *start, void *end) -{ - flush_icache_range((unsigned long)start, (unsigned long)end); -} - -bool bpf_jit_needs_zext(void) -{ - return true; -} - -struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) -{ - bool tmp_blinded = false, extra_pass = false; - struct bpf_prog *tmp, *orig_prog = prog; - int pass = 0, prev_ninsns = 0, i; - struct rv_jit_data *jit_data; - unsigned int image_size = 0; - struct rv_jit_context *ctx; - - if (!prog->jit_requested) - return orig_prog; - - tmp = bpf_jit_blind_constants(prog); - if (IS_ERR(tmp)) - return orig_prog; - if (tmp != prog) { - tmp_blinded = true; - prog = tmp; - } - - jit_data = prog->aux->jit_data; - if (!jit_data) { - jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); - if (!jit_data) { - prog = orig_prog; - goto out; - } - prog->aux->jit_data = jit_data; - } - - ctx = &jit_data->ctx; - - if (ctx->offset) { - extra_pass = true; - image_size = sizeof(u32) * ctx->ninsns; - goto skip_init_ctx; - } - - ctx->prog = prog; - ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); - if (!ctx->offset) { - prog = orig_prog; - goto out_offset; - } - for (i = 0; i < prog->len; i++) { - prev_ninsns += 32; - ctx->offset[i] = prev_ninsns; - } - - for (i = 0; i < 16; i++) { - pass++; - ctx->ninsns = 0; - if (build_body(ctx, extra_pass, ctx->offset)) { - prog = orig_prog; - goto out_offset; - } - build_prologue(ctx); - ctx->epilogue_offset = ctx->ninsns; - build_epilogue(ctx); - - if (ctx->ninsns == prev_ninsns) { - if (jit_data->header) - break; - - image_size = sizeof(u32) * ctx->ninsns; - jit_data->header = - bpf_jit_binary_alloc(image_size, - &jit_data->image, - sizeof(u32), - bpf_fill_ill_insns); - if (!jit_data->header) { - prog = orig_prog; - goto out_offset; - } - - ctx->insns = (u32 *)jit_data->image; - /* Now, when the image is allocated, the image - * can potentially shrink more (auipc/jalr -> - * jal). - */ - } - prev_ninsns = ctx->ninsns; - } - - if (i == 16) { - pr_err("bpf-jit: image did not converge in <%d passes!\n", i); - bpf_jit_binary_free(jit_data->header); - prog = orig_prog; - goto out_offset; - } - -skip_init_ctx: - pass++; - ctx->ninsns = 0; - - build_prologue(ctx); - if (build_body(ctx, extra_pass, NULL)) { - bpf_jit_binary_free(jit_data->header); - prog = orig_prog; - goto out_offset; - } - build_epilogue(ctx); - - if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, pass, ctx->insns); - - prog->bpf_func = (void *)ctx->insns; - prog->jited = 1; - prog->jited_len = image_size; - - bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); - - if (!prog->is_func || extra_pass) { -out_offset: - kfree(ctx->offset); - kfree(jit_data); - prog->aux->jit_data = NULL; - } -out: - if (tmp_blinded) - bpf_jit_prog_release_other(prog, prog == orig_prog ? - tmp : orig_prog); - return prog; -} - void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c new file mode 100644 index 000000000000..709b94ece3ed --- /dev/null +++ b/arch/riscv/net/bpf_jit_core.c @@ -0,0 +1,166 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Common functionality for RV32 and RV64 BPF JIT compilers + * + * Copyright (c) 2019 Björn Töpel <bjorn.topel@gmail.com> + * + */ + +#include <linux/bpf.h> +#include <linux/filter.h> +#include "bpf_jit.h" + +/* Number of iterations to try until offsets converge. */ +#define NR_JIT_ITERATIONS 16 + +static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) +{ + const struct bpf_prog *prog = ctx->prog; + int i; + + for (i = 0; i < prog->len; i++) { + const struct bpf_insn *insn = &prog->insnsi[i]; + int ret; + + ret = bpf_jit_emit_insn(insn, ctx, extra_pass); + /* BPF_LD | BPF_IMM | BPF_DW: skip the next instruction. */ + if (ret > 0) + i++; + if (offset) + offset[i] = ctx->ninsns; + if (ret < 0) + return ret; + } + return 0; +} + +bool bpf_jit_needs_zext(void) +{ + return true; +} + +struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) +{ + bool tmp_blinded = false, extra_pass = false; + struct bpf_prog *tmp, *orig_prog = prog; + int pass = 0, prev_ninsns = 0, i; + struct rv_jit_data *jit_data; + struct rv_jit_context *ctx; + unsigned int image_size = 0; + + if (!prog->jit_requested) + return orig_prog; + + tmp = bpf_jit_blind_constants(prog); + if (IS_ERR(tmp)) + return orig_prog; + if (tmp != prog) { + tmp_blinded = true; + prog = tmp; + } + + jit_data = prog->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + prog = orig_prog; + goto out; + } + prog->aux->jit_data = jit_data; + } + + ctx = &jit_data->ctx; + + if (ctx->offset) { + extra_pass = true; + image_size = sizeof(u32) * ctx->ninsns; + goto skip_init_ctx; + } + + ctx->prog = prog; + ctx->offset = kcalloc(prog->len, sizeof(int), GFP_KERNEL); + if (!ctx->offset) { + prog = orig_prog; + goto out_offset; + } + for (i = 0; i < prog->len; i++) { + prev_ninsns += 32; + ctx->offset[i] = prev_ninsns; + } + + for (i = 0; i < NR_JIT_ITERATIONS; i++) { + pass++; + ctx->ninsns = 0; + if (build_body(ctx, extra_pass, ctx->offset)) { + prog = orig_prog; + goto out_offset; + } + bpf_jit_build_prologue(ctx); + ctx->epilogue_offset = ctx->ninsns; + bpf_jit_build_epilogue(ctx); + + if (ctx->ninsns == prev_ninsns) { + if (jit_data->header) + break; + + image_size = sizeof(u32) * ctx->ninsns; + jit_data->header = + bpf_jit_binary_alloc(image_size, + &jit_data->image, + sizeof(u32), + bpf_fill_ill_insns); + if (!jit_data->header) { + prog = orig_prog; + goto out_offset; + } + + ctx->insns = (u32 *)jit_data->image; + /* + * Now, when the image is allocated, the image can + * potentially shrink more (auipc/jalr -> jal). + */ + } + prev_ninsns = ctx->ninsns; + } + + if (i == NR_JIT_ITERATIONS) { + pr_err("bpf-jit: image did not converge in <%d passes!\n", i); + bpf_jit_binary_free(jit_data->header); + prog = orig_prog; + goto out_offset; + } + +skip_init_ctx: + pass++; + ctx->ninsns = 0; + + bpf_jit_build_prologue(ctx); + if (build_body(ctx, extra_pass, NULL)) { + bpf_jit_binary_free(jit_data->header); + prog = orig_prog; + goto out_offset; + } + bpf_jit_build_epilogue(ctx); + + if (bpf_jit_enable > 1) + bpf_jit_dump(prog->len, image_size, pass, ctx->insns); + + prog->bpf_func = (void *)ctx->insns; + prog->jited = 1; + prog->jited_len = image_size; + + bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); + + if (!prog->is_func || extra_pass) { +out_offset: + kfree(ctx->offset); + kfree(jit_data); + prog->aux->jit_data = NULL; + } +out: + + if (tmp_blinded) + bpf_jit_prog_release_other(prog, prog == orig_prog ? + tmp : orig_prog); + return prog; +} |