summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-04-29 01:55:39 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2023-04-29 01:55:39 +0200
commit89d77f71f493a3663b10fa812d17f472935d24be (patch)
tree03a7e0b87dc1e922ba028e8e0c4a5816c3c0e40a
parentMerge tag 'm68knommu-for-v6.4' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff)
parentRISC-V: hwprobe: Explicity check for -1 in vdso init (diff)
downloadlinux-89d77f71f493a3663b10fa812d17f472935d24be.tar.xz
linux-89d77f71f493a3663b10fa812d17f472935d24be.zip
Merge tag 'riscv-for-linus-6.4-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux
Pull RISC-V updates from Palmer Dabbelt: - Support for runtime detection of the Svnapot extension - Support for Zicboz when clearing pages - We've moved to GENERIC_ENTRY - Support for !MMU on rv32 systems - The linear region is now mapped via huge pages - Support for building relocatable kernels - Support for the hwprobe interface - Various fixes and cleanups throughout the tree * tag 'riscv-for-linus-6.4-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (57 commits) RISC-V: hwprobe: Explicity check for -1 in vdso init RISC-V: hwprobe: There can only be one first riscv: Allow to downgrade paging mode from the command line dt-bindings: riscv: add sv57 mmu-type RISC-V: hwprobe: Remove __init on probe_vendor_features() riscv: Use --emit-relocs in order to move .rela.dyn in init riscv: Check relocations at compile time powerpc: Move script to check relocations at compile time in scripts/ riscv: Introduce CONFIG_RELOCATABLE riscv: Move .rela.dyn outside of init to avoid empty relocations riscv: Prepare EFI header for relocatable kernels riscv: Unconditionnally select KASAN_VMALLOC if KASAN riscv: Fix ptdump when KASAN is enabled riscv: Fix EFI stub usage of KASAN instrumented strcmp function riscv: Move DTB_EARLY_BASE_VA to the kernel address space riscv: Rework kasan population functions riscv: Split early and final KASAN population functions riscv: Use PUD/P4D/PGD pages for the linear mapping riscv: Move the linear mapping creation in its own function riscv: Get rid of riscv_pfn_base variable ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt5
-rw-r--r--Documentation/devicetree/bindings/riscv/cpus.yaml6
-rw-r--r--Documentation/riscv/hwprobe.rst86
-rw-r--r--Documentation/riscv/index.rst1
-rwxr-xr-xarch/powerpc/tools/relocs_check.sh18
-rw-r--r--arch/riscv/Kconfig78
-rw-r--r--arch/riscv/Kconfig.errata (renamed from arch/riscv/Kconfig.erratas)0
-rw-r--r--arch/riscv/Makefile12
-rw-r--r--arch/riscv/Makefile.postlink49
-rw-r--r--arch/riscv/boot/Makefile7
-rw-r--r--arch/riscv/errata/sifive/errata.c8
-rw-r--r--arch/riscv/errata/thead/errata.c14
-rw-r--r--arch/riscv/include/asm/alternative-macros.h72
-rw-r--r--arch/riscv/include/asm/alternative.h20
-rw-r--r--arch/riscv/include/asm/asm-prototypes.h2
-rw-r--r--arch/riscv/include/asm/asm.h61
-rw-r--r--arch/riscv/include/asm/cacheflush.h3
-rw-r--r--arch/riscv/include/asm/cpufeature.h23
-rw-r--r--arch/riscv/include/asm/csr.h1
-rw-r--r--arch/riscv/include/asm/entry-common.h11
-rw-r--r--arch/riscv/include/asm/hugetlb.h34
-rw-r--r--arch/riscv/include/asm/hwcap.h2
-rw-r--r--arch/riscv/include/asm/hwprobe.h13
-rw-r--r--arch/riscv/include/asm/insn-def.h4
-rw-r--r--arch/riscv/include/asm/page.h30
-rw-r--r--arch/riscv/include/asm/pgtable-64.h34
-rw-r--r--arch/riscv/include/asm/pgtable.h39
-rw-r--r--arch/riscv/include/asm/ptrace.h10
-rw-r--r--arch/riscv/include/asm/set_memory.h3
-rw-r--r--arch/riscv/include/asm/stacktrace.h5
-rw-r--r--arch/riscv/include/asm/syscall.h25
-rw-r--r--arch/riscv/include/asm/thread_info.h13
-rw-r--r--arch/riscv/include/asm/topology.h21
-rw-r--r--arch/riscv/include/asm/vdso/data.h17
-rw-r--r--arch/riscv/include/asm/vdso/gettimeofday.h8
-rw-r--r--arch/riscv/include/asm/vmalloc.h61
-rw-r--r--arch/riscv/include/uapi/asm/hwprobe.h37
-rw-r--r--arch/riscv/include/uapi/asm/kvm.h2
-rw-r--r--arch/riscv/include/uapi/asm/unistd.h9
-rw-r--r--arch/riscv/kernel/Makefile4
-rw-r--r--arch/riscv/kernel/alternative.c21
-rw-r--r--arch/riscv/kernel/cacheinfo.c66
-rw-r--r--arch/riscv/kernel/compat_vdso/Makefile2
-rw-r--r--arch/riscv/kernel/cpu.c10
-rw-r--r--arch/riscv/kernel/cpufeature.c67
-rw-r--r--arch/riscv/kernel/efi-header.S19
-rw-r--r--arch/riscv/kernel/entry.S321
-rw-r--r--arch/riscv/kernel/head.h1
-rw-r--r--arch/riscv/kernel/image-vars.h2
-rw-r--r--arch/riscv/kernel/mcount-dyn.S57
-rw-r--r--arch/riscv/kernel/pi/Makefile39
-rw-r--r--arch/riscv/kernel/pi/cmdline_early.c62
-rw-r--r--arch/riscv/kernel/process.c5
-rw-r--r--arch/riscv/kernel/ptrace.c44
-rw-r--r--arch/riscv/kernel/setup.c2
-rw-r--r--arch/riscv/kernel/signal.c29
-rw-r--r--arch/riscv/kernel/smpboot.c1
-rw-r--r--arch/riscv/kernel/sys_riscv.c230
-rw-r--r--arch/riscv/kernel/trace_irq.c27
-rw-r--r--arch/riscv/kernel/trace_irq.h11
-rw-r--r--arch/riscv/kernel/traps.c144
-rw-r--r--arch/riscv/kernel/vdso.c6
-rw-r--r--arch/riscv/kernel/vdso/Makefile4
-rw-r--r--arch/riscv/kernel/vdso/hwprobe.c52
-rw-r--r--arch/riscv/kernel/vdso/sys_hwprobe.S15
-rw-r--r--arch/riscv/kernel/vdso/vdso.lds.S3
-rw-r--r--arch/riscv/kernel/vmlinux.lds.S35
-rw-r--r--arch/riscv/kvm/vcpu.c11
-rw-r--r--arch/riscv/lib/Makefile1
-rw-r--r--arch/riscv/lib/clear_page.S74
-rw-r--r--arch/riscv/lib/memcpy.S2
-rw-r--r--arch/riscv/lib/memmove.S2
-rw-r--r--arch/riscv/lib/strcmp.S3
-rw-r--r--arch/riscv/lib/strlen.S4
-rw-r--r--arch/riscv/lib/strncmp.S3
-rw-r--r--arch/riscv/mm/Makefile4
-rw-r--r--arch/riscv/mm/cacheflush.c64
-rw-r--r--arch/riscv/mm/fault.c6
-rw-r--r--arch/riscv/mm/hugetlbpage.c301
-rw-r--r--arch/riscv/mm/init.c206
-rw-r--r--arch/riscv/mm/kasan_init.c516
-rw-r--r--arch/riscv/mm/physaddr.c16
-rw-r--r--arch/riscv/mm/ptdump.c24
-rwxr-xr-xarch/riscv/tools/relocs_check.sh26
-rw-r--r--drivers/of/fdt.c11
-rwxr-xr-xscripts/relocs_check.sh20
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/riscv/Makefile58
-rw-r--r--tools/testing/selftests/riscv/hwprobe/Makefile10
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.c90
-rw-r--r--tools/testing/selftests/riscv/hwprobe/sys_hwprobe.S12
91 files changed, 2595 insertions, 993 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b1453e0e3c95..9e5bab29685f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3612,7 +3612,10 @@
emulation library even if a 387 maths coprocessor
is present.
- no5lvl [X86-64] Disable 5-level paging mode. Forces
+ no4lvl [RISCV] Disable 4-level and 5-level paging modes. Forces
+ kernel to use 3-level paging instead.
+
+ no5lvl [X86-64,RISCV] Disable 5-level paging mode. Forces
kernel to use 4-level paging instead.
noaliencache [MM, NUMA, SLAB] Disables the allocation of alien
diff --git a/Documentation/devicetree/bindings/riscv/cpus.yaml b/Documentation/devicetree/bindings/riscv/cpus.yaml
index 14b5b7ea0ce0..25d6e8dbffb8 100644
--- a/Documentation/devicetree/bindings/riscv/cpus.yaml
+++ b/Documentation/devicetree/bindings/riscv/cpus.yaml
@@ -66,6 +66,7 @@ properties:
- riscv,sv32
- riscv,sv39
- riscv,sv48
+ - riscv,sv57
- riscv,none
riscv,cbom-block-size:
@@ -73,6 +74,11 @@ properties:
description:
The blocksize in bytes for the Zicbom cache operations.
+ riscv,cboz-block-size:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description:
+ The blocksize in bytes for the Zicboz cache operations.
+
riscv,isa:
description:
Identifies the specific RISC-V instruction set architecture
diff --git a/Documentation/riscv/hwprobe.rst b/Documentation/riscv/hwprobe.rst
new file mode 100644
index 000000000000..9f0dd62dcb5d
--- /dev/null
+++ b/Documentation/riscv/hwprobe.rst
@@ -0,0 +1,86 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+RISC-V Hardware Probing Interface
+---------------------------------
+
+The RISC-V hardware probing interface is based around a single syscall, which
+is defined in <asm/hwprobe.h>::
+
+ struct riscv_hwprobe {
+ __s64 key;
+ __u64 value;
+ };
+
+ long sys_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpu_count, cpu_set_t *cpus,
+ unsigned int flags);
+
+The arguments are split into three groups: an array of key-value pairs, a CPU
+set, and some flags. The key-value pairs are supplied with a count. Userspace
+must prepopulate the key field for each element, and the kernel will fill in the
+value if the key is recognized. If a key is unknown to the kernel, its key field
+will be cleared to -1, and its value set to 0. The CPU set is defined by
+CPU_SET(3). For value-like keys (eg. vendor/arch/impl), the returned value will
+be only be valid if all CPUs in the given set have the same value. Otherwise -1
+will be returned. For boolean-like keys, the value returned will be a logical
+AND of the values for the specified CPUs. Usermode can supply NULL for cpus and
+0 for cpu_count as a shortcut for all online CPUs. There are currently no flags,
+this value must be zero for future compatibility.
+
+On success 0 is returned, on failure a negative error code is returned.
+
+The following keys are defined:
+
+* :c:macro:`RISCV_HWPROBE_KEY_MVENDORID`: Contains the value of ``mvendorid``,
+ as defined by the RISC-V privileged architecture specification.
+
+* :c:macro:`RISCV_HWPROBE_KEY_MARCHID`: Contains the value of ``marchid``, as
+ defined by the RISC-V privileged architecture specification.
+
+* :c:macro:`RISCV_HWPROBE_KEY_MIMPLID`: Contains the value of ``mimplid``, as
+ defined by the RISC-V privileged architecture specification.
+
+* :c:macro:`RISCV_HWPROBE_KEY_BASE_BEHAVIOR`: A bitmask containing the base
+ user-visible behavior that this kernel supports. The following base user ABIs
+ are defined:
+
+ * :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`: Support for rv32ima or
+ rv64ima, as defined by version 2.2 of the user ISA and version 1.10 of the
+ privileged ISA, with the following known exceptions (more exceptions may be
+ added, but only if it can be demonstrated that the user ABI is not broken):
+
+ * The :fence.i: instruction cannot be directly executed by userspace
+ programs (it may still be executed in userspace via a
+ kernel-controlled mechanism such as the vDSO).
+
+* :c:macro:`RISCV_HWPROBE_KEY_IMA_EXT_0`: A bitmask containing the extensions
+ that are compatible with the :c:macro:`RISCV_HWPROBE_BASE_BEHAVIOR_IMA`:
+ base system behavior.
+
+ * :c:macro:`RISCV_HWPROBE_IMA_FD`: The F and D extensions are supported, as
+ defined by commit cd20cee ("FMIN/FMAX now implement
+ minimumNumber/maximumNumber, not minNum/maxNum") of the RISC-V ISA manual.
+
+ * :c:macro:`RISCV_HWPROBE_IMA_C`: The C extension is supported, as defined
+ by version 2.2 of the RISC-V ISA manual.
+
+* :c:macro:`RISCV_HWPROBE_KEY_CPUPERF_0`: A bitmask that contains performance
+ information about the selected set of processors.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNKNOWN`: The performance of misaligned
+ accesses is unknown.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_EMULATED`: Misaligned accesses are
+ emulated via software, either in or below the kernel. These accesses are
+ always extremely slow.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_SLOW`: Misaligned accesses are supported
+ in hardware, but are slower than the cooresponding aligned accesses
+ sequences.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_FAST`: Misaligned accesses are supported
+ in hardware and are faster than the cooresponding aligned accesses
+ sequences.
+
+ * :c:macro:`RISCV_HWPROBE_MISALIGNED_UNSUPPORTED`: Misaligned accesses are
+ not supported at all and will generate a misaligned address fault.
diff --git a/Documentation/riscv/index.rst b/Documentation/riscv/index.rst
index 2e5b18fbb145..175a91db0200 100644
--- a/Documentation/riscv/index.rst
+++ b/Documentation/riscv/index.rst
@@ -7,6 +7,7 @@ RISC-V architecture
boot-image-header
vm-layout
+ hwprobe
patch-acceptance
uabi
diff --git a/arch/powerpc/tools/relocs_check.sh b/arch/powerpc/tools/relocs_check.sh
index 63792af00417..6b350e75014c 100755
--- a/arch/powerpc/tools/relocs_check.sh
+++ b/arch/powerpc/tools/relocs_check.sh
@@ -15,21 +15,8 @@ if [ $# -lt 3 ]; then
exit 1
fi
-# Have Kbuild supply the path to objdump and nm so we handle cross compilation.
-objdump="$1"
-nm="$2"
-vmlinux="$3"
-
-# Remove from the bad relocations those that match an undefined weak symbol
-# which will result in an absolute relocation to 0.
-# Weak unresolved symbols are of that form in nm output:
-# " w _binary__btf_vmlinux_bin_end"
-undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }')
-
bad_relocs=$(
-$objdump -R "$vmlinux" |
- # Only look at relocation lines.
- grep -E '\<R_' |
+${srctree}/scripts/relocs_check.sh "$@" |
# These relocations are okay
# On PPC64:
# R_PPC64_RELATIVE, R_PPC64_NONE
@@ -44,8 +31,7 @@ R_PPC_ADDR16_LO
R_PPC_ADDR16_HI
R_PPC_ADDR16_HA
R_PPC_RELATIVE
-R_PPC_NONE' |
- ([ "$undef_weak_symbols" ] && grep -F -w -v "$undef_weak_symbols" || cat)
+R_PPC_NONE'
)
if [ -z "$bad_relocs" ]; then
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index 6adea68a2c05..5c88ac4b52be 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -33,6 +33,7 @@ config RISCV
select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UBSAN_SANITIZE_ALL
+ select ARCH_HAS_VDSO_DATA
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
select ARCH_STACKWALK
@@ -44,7 +45,7 @@ config RISCV
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
- select ARCH_WANT_GENERAL_HUGETLB
+ select ARCH_WANT_GENERAL_HUGETLB if !RISCV_ISA_SVNAPOT
select ARCH_WANT_HUGETLB_PAGE_OPTIMIZE_VMEMMAP
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select ARCH_WANT_LD_ORPHAN_WARN if !XIP_KERNEL
@@ -60,6 +61,7 @@ config RISCV
select GENERIC_ATOMIC64 if !64BIT
select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_EARLY_IOREMAP
+ select GENERIC_ENTRY
select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
select GENERIC_IDLE_POLL_SETUP
select GENERIC_IOREMAP if MMU
@@ -121,6 +123,7 @@ config RISCV
select HAVE_SYSCALL_TRACEPOINTS
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
+ select KASAN_VMALLOC if KASAN
select MODULES_USE_ELF_RELA if MODULES
select MODULE_SECTIONS if MODULES
select OF
@@ -181,8 +184,8 @@ config MMU
config PAGE_OFFSET
hex
- default 0xC0000000 if 32BIT
- default 0x80000000 if 64BIT && !MMU
+ default 0xC0000000 if 32BIT && MMU
+ default 0x80000000 if !MMU
default 0xff60000000000000 if 64BIT
config KASAN_SHADOW_OFFSET
@@ -249,7 +252,7 @@ config AS_HAS_INSN
def_bool $(as-instr,.insn r 51$(comma) 0$(comma) 0$(comma) t0$(comma) t0$(comma) zero)
source "arch/riscv/Kconfig.socs"
-source "arch/riscv/Kconfig.erratas"
+source "arch/riscv/Kconfig.errata"
menu "Platform type"
@@ -283,7 +286,6 @@ config ARCH_RV32I
select GENERIC_LIB_ASHRDI3
select GENERIC_LIB_LSHRDI3
select GENERIC_LIB_UCMPDI2
- select MMU
config ARCH_RV64I
bool "RV64I"
@@ -324,6 +326,14 @@ config SMP
If you don't know what to do here, say N.
+config SCHED_MC
+ bool "Multi-core scheduler support"
+ depends on SMP
+ help
+ Multi-core scheduler support improves the CPU scheduler's decision
+ making when dealing with multi-core CPU chips at a cost of slightly
+ increased overhead in some places. If unsure say N here.
+
config NR_CPUS
int "Maximum number of CPUs (2-512)"
depends on SMP
@@ -382,9 +392,9 @@ config RISCV_ALTERNATIVE
depends on !XIP_KERNEL
help
This Kconfig allows the kernel to automatically patch the
- errata required by the execution platform at run time. The
- code patching is performed once in the boot stages. It means
- that the overhead from this mechanism is just taken once.
+ erratum or cpufeature required by the execution platform at run
+ time. The code patching overhead is minimal, as it's only done
+ once at boot and once on each module load.
config RISCV_ALTERNATIVE_EARLY
bool
@@ -402,13 +412,32 @@ config RISCV_ISA_C
If you don't know what to do here, say Y.
+config RISCV_ISA_SVNAPOT
+ bool "Svnapot extension support for supervisor mode NAPOT pages"
+ depends on 64BIT && MMU
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Allow kernel to detect the Svnapot ISA-extension dynamically at boot
+ time and enable its usage.
+
+ The Svnapot extension is used to mark contiguous PTEs as a range
+ of contiguous virtual-to-physical translations for a naturally
+ aligned power-of-2 (NAPOT) granularity larger than the base 4KB page
+ size. When HUGETLBFS is also selected this option unconditionally
+ allocates some memory for each NAPOT page size supported by the kernel.
+ When optimizing for low memory consumption and for platforms without
+ the Svnapot extension, it may be better to say N here.
+
+ If you don't know what to do here, say Y.
+
config RISCV_ISA_SVPBMT
- bool "SVPBMT extension support"
+ bool "Svpbmt extension support for supervisor mode page-based memory types"
depends on 64BIT && MMU
depends on RISCV_ALTERNATIVE
default y
help
- Adds support to dynamically detect the presence of the SVPBMT
+ Adds support to dynamically detect the presence of the Svpbmt
ISA-extension (Supervisor-mode: page-based memory types) and
enable its usage.
@@ -416,7 +445,7 @@ config RISCV_ISA_SVPBMT
that indicate the cacheability, idempotency, and ordering
properties for access to that page.
- The SVPBMT extension is only available on 64Bit cpus.
+ The Svpbmt extension is only available on 64-bit cpus.
If you don't know what to do here, say Y.
@@ -460,6 +489,19 @@ config RISCV_ISA_ZICBOM
If you don't know what to do here, say Y.
+config RISCV_ISA_ZICBOZ
+ bool "Zicboz extension support for faster zeroing of memory"
+ depends on MMU
+ depends on RISCV_ALTERNATIVE
+ default y
+ help
+ Enable the use of the Zicboz extension (cbo.zero instruction)
+ when available.
+
+ The Zicboz extension is used for faster zeroing of memory.
+
+ If you don't know what to do here, say Y.
+
config TOOLCHAIN_HAS_ZIHINTPAUSE
bool
default y
@@ -586,6 +628,20 @@ config COMPAT
If you want to execute 32-bit userspace applications, say Y.
+config RELOCATABLE
+ bool "Build a relocatable kernel"
+ depends on MMU && 64BIT && !XIP_KERNEL
+ help
+ This builds a kernel as a Position Independent Executable (PIE),
+ which retains all relocation metadata required to relocate the
+ kernel binary at runtime to a different virtual address than the
+ address it was linked at.
+ Since RISCV uses the RELA relocation format, this requires a
+ relocation pass at runtime even if the kernel is loaded at the
+ same address it was linked at.
+
+ If unsure, say N.
+
endmenu # "Kernel features"
menu "Boot options"
diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.errata
index 0c8f4652cd82..0c8f4652cd82 100644
--- a/arch/riscv/Kconfig.erratas
+++ b/arch/riscv/Kconfig.errata
diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile
index b05e833a022d..0fb256bf8270 100644
--- a/arch/riscv/Makefile
+++ b/arch/riscv/Makefile
@@ -7,9 +7,13 @@
#
OBJCOPYFLAGS := -O binary
-LDFLAGS_vmlinux :=
+LDFLAGS_vmlinux := -z norelro
+ifeq ($(CONFIG_RELOCATABLE),y)
+ LDFLAGS_vmlinux += -shared -Bsymbolic -z notext --emit-relocs
+ KBUILD_CFLAGS += -fPIE
+endif
ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
- LDFLAGS_vmlinux := --no-relax
+ LDFLAGS_vmlinux += --no-relax
KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
ifeq ($(CONFIG_RISCV_ISA_C),y)
CC_FLAGS_FTRACE := -fpatchable-function-entry=4
@@ -183,3 +187,7 @@ rv64_randconfig:
PHONY += rv32_defconfig
rv32_defconfig:
$(Q)$(MAKE) -f $(srctree)/Makefile defconfig 32-bit.config
+
+PHONY += rv32_nommu_virt_defconfig
+rv32_nommu_virt_defconfig:
+ $(Q)$(MAKE) -f $(srctree)/Makefile nommu_virt_defconfig 32-bit.config
diff --git a/arch/riscv/Makefile.postlink b/arch/riscv/Makefile.postlink
new file mode 100644
index 000000000000..a46fc578b30b
--- /dev/null
+++ b/arch/riscv/Makefile.postlink
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0
+# ===========================================================================
+# Post-link riscv pass
+# ===========================================================================
+#
+# Check that vmlinux relocations look sane
+
+PHONY := __archpost
+__archpost:
+
+-include include/config/auto.conf
+include $(srctree)/scripts/Kbuild.include
+
+quiet_cmd_relocs_check = CHKREL $@
+cmd_relocs_check = \
+ $(CONFIG_SHELL) $(srctree)/arch/riscv/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@"
+
+ifdef CONFIG_RELOCATABLE
+quiet_cmd_cp_vmlinux_relocs = CPREL vmlinux.relocs
+cmd_cp_vmlinux_relocs = cp vmlinux vmlinux.relocs
+
+quiet_cmd_relocs_strip = STRIPREL $@
+cmd_relocs_strip = $(OBJCOPY) --remove-section='.rel.*' \
+ --remove-section='.rel__*' \
+ --remove-section='.rela.*' \
+ --remove-section='.rela__*' $@
+endif
+
+# `@true` prevents complaint when there is nothing to be done
+
+vmlinux: FORCE
+ @true
+ifdef CONFIG_RELOCATABLE
+ $(call if_changed,relocs_check)
+ $(call if_changed,cp_vmlinux_relocs)
+ $(call if_changed,relocs_strip)
+endif
+
+%.ko: FORCE
+ @true
+
+clean:
+ @true
+
+PHONY += FORCE clean
+
+FORCE:
+
+.PHONY: $(PHONY)
diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile
index c72de7232abb..22b13947bd13 100644
--- a/arch/riscv/boot/Makefile
+++ b/arch/riscv/boot/Makefile
@@ -33,7 +33,14 @@ $(obj)/xipImage: vmlinux FORCE
endif
+ifdef CONFIG_RELOCATABLE
+vmlinux.relocs: vmlinux
+ @ (! [ -f vmlinux.relocs ] && echo "vmlinux.relocs can't be found, please remove vmlinux and try again") || true
+
+$(obj)/Image: vmlinux.relocs FORCE
+else
$(obj)/Image: vmlinux FORCE
+endif
$(call if_changed,objcopy)
$(obj)/Image.gz: $(obj)/Image FORCE
diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c
index 31d2ebea4286..8d8301d7b1ec 100644
--- a/arch/riscv/errata/sifive/errata.c
+++ b/arch/riscv/errata/sifive/errata.c
@@ -14,7 +14,7 @@
#include <asm/errata_list.h>
struct errata_info_t {
- char name[ERRATA_STRING_LENGTH_MAX];
+ char name[32];
bool (*check_func)(unsigned long arch_id, unsigned long impid);
};
@@ -101,12 +101,12 @@ void __init_or_module sifive_errata_patch_func(struct alt_entry *begin,
for (alt = begin; alt < end; alt++) {
if (alt->vendor_id != SIFIVE_VENDOR_ID)
continue;
- if (alt->errata_id >= ERRATA_SIFIVE_NUMBER) {
- WARN(1, "This errata id:%d is not in kernel errata list", alt->errata_id);
+ if (alt->patch_id >= ERRATA_SIFIVE_NUMBER) {
+ WARN(1, "This errata id:%d is not in kernel errata list", alt->patch_id);
continue;
}
- tmp = (1U << alt->errata_id);
+ tmp = (1U << alt->patch_id);
if (cpu_req_errata & tmp) {
mutex_lock(&text_mutex);
patch_text_nosync(ALT_OLD_PTR(alt), ALT_ALT_PTR(alt),
diff --git a/arch/riscv/errata/thead/errata.c b/arch/riscv/errata/thead/errata.c
index 3b96a06d3c54..a86c4facc2a6 100644
--- a/arch/riscv/errata/thead/errata.c
+++ b/arch/riscv/errata/thead/errata.c
@@ -11,7 +11,9 @@
#include <linux/uaccess.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
#include <asm/errata_list.h>
+#include <asm/hwprobe.h>
#include <asm/patch.h>
#include <asm/vendorid_list.h>
@@ -93,10 +95,10 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
for (alt = begin; alt < end; alt++) {
if (alt->vendor_id != THEAD_VENDOR_ID)
continue;
- if (alt->errata_id >= ERRATA_THEAD_NUMBER)
+ if (alt->patch_id >= ERRATA_THEAD_NUMBER)
continue;
- tmp = (1U << alt->errata_id);
+ tmp = (1U << alt->patch_id);
if (cpu_req_errata & tmp) {
oldptr = ALT_OLD_PTR(alt);
altptr = ALT_ALT_PTR(alt);
@@ -115,3 +117,11 @@ void __init_or_module thead_errata_patch_func(struct alt_entry *begin, struct al
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
local_flush_icache_all();
}
+
+void thead_feature_probe_func(unsigned int cpu,
+ unsigned long archid,
+ unsigned long impid)
+{
+ if ((archid == 0) && (impid == 0))
+ per_cpu(misaligned_access_speed, cpu) = RISCV_HWPROBE_MISALIGNED_FAST;
+}
diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h
index 51c6867e02f3..b8c55fb3ab2c 100644
--- a/arch/riscv/include/asm/alternative-macros.h
+++ b/arch/riscv/include/asm/alternative-macros.h
@@ -6,18 +6,18 @@
#ifdef __ASSEMBLY__
-.macro ALT_ENTRY oldptr newptr vendor_id errata_id new_len
+.macro ALT_ENTRY oldptr newptr vendor_id patch_id new_len
.4byte \oldptr - .
.4byte \newptr - .
.2byte \vendor_id
.2byte \new_len
- .4byte \errata_id
+ .4byte \patch_id
.endm
-.macro ALT_NEW_CONTENT vendor_id, errata_id, enable = 1, new_c : vararg
+.macro ALT_NEW_CONTENT vendor_id, patch_id, enable = 1, new_c
.if \enable
.pushsection .alternative, "a"
- ALT_ENTRY 886b, 888f, \vendor_id, \errata_id, 889f - 888f
+ ALT_ENTRY 886b, 888f, \vendor_id, \patch_id, 889f - 888f
.popsection
.subsection 1
888 :
@@ -33,7 +33,7 @@
.endif
.endm
-.macro ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable
+.macro ALTERNATIVE_CFG old_c, new_c, vendor_id, patch_id, enable
886 :
.option push
.option norvc
@@ -41,13 +41,13 @@
\old_c
.option pop
887 :
- ALT_NEW_CONTENT \vendor_id, \errata_id, \enable, \new_c
+ ALT_NEW_CONTENT \vendor_id, \patch_id, \enable, "\new_c"
.endm
-.macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \
- new_c_2, vendor_id_2, errata_id_2, enable_2
- ALTERNATIVE_CFG "\old_c", "\new_c_1", \vendor_id_1, \errata_id_1, \enable_1
- ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2
+.macro ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, patch_id_1, enable_1, \
+ new_c_2, vendor_id_2, patch_id_2, enable_2
+ ALTERNATIVE_CFG "\old_c", "\new_c_1", \vendor_id_1, \patch_id_1, \enable_1
+ ALT_NEW_CONTENT \vendor_id_2, \patch_id_2, \enable_2, "\new_c_2"
.endm
#define __ALTERNATIVE_CFG(...) ALTERNATIVE_CFG __VA_ARGS__
@@ -58,17 +58,17 @@
#include <asm/asm.h>
#include <linux/stringify.h>
-#define ALT_ENTRY(oldptr, newptr, vendor_id, errata_id, newlen) \
+#define ALT_ENTRY(oldptr, newptr, vendor_id, patch_id, newlen) \
".4byte ((" oldptr ") - .) \n" \
".4byte ((" newptr ") - .) \n" \
".2byte " vendor_id "\n" \
".2byte " newlen "\n" \
- ".4byte " errata_id "\n"
+ ".4byte " patch_id "\n"
-#define ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c) \
+#define ALT_NEW_CONTENT(vendor_id, patch_id, enable, new_c) \
".if " __stringify(enable) " == 1\n" \
".pushsection .alternative, \"a\"\n" \
- ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \
+ ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(patch_id), "889f - 888f") \
".popsection\n" \
".subsection 1\n" \
"888 :\n" \
@@ -83,7 +83,7 @@
".previous\n" \
".endif\n"
-#define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \
+#define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, enable) \
"886 :\n" \
".option push\n" \
".option norvc\n" \
@@ -91,22 +91,22 @@
old_c "\n" \
".option pop\n" \
"887 :\n" \
- ALT_NEW_CONTENT(vendor_id, errata_id, enable, new_c)
+ ALT_NEW_CONTENT(vendor_id, patch_id, enable, new_c)
-#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \
- new_c_2, vendor_id_2, errata_id_2, enable_2) \
- __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, errata_id_1, enable_1) \
- ALT_NEW_CONTENT(vendor_id_2, errata_id_2, enable_2, new_c_2)
+#define __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, patch_id_1, enable_1, \
+ new_c_2, vendor_id_2, patch_id_2, enable_2) \
+ __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, patch_id_1, enable_1) \
+ ALT_NEW_CONTENT(vendor_id_2, patch_id_2, enable_2, new_c_2)
#endif /* __ASSEMBLY__ */
-#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
- __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k))
+#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, CONFIG_k) \
+ __ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, IS_ENABLED(CONFIG_k))
-#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, CONFIG_k_1, \
- new_c_2, vendor_id_2, errata_id_2, CONFIG_k_2) \
- __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, IS_ENABLED(CONFIG_k_1), \
- new_c_2, vendor_id_2, errata_id_2, IS_ENABLED(CONFIG_k_2))
+#define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, patch_id_1, CONFIG_k_1, \
+ new_c_2, vendor_id_2, patch_id_2, CONFIG_k_2) \
+ __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, patch_id_1, IS_ENABLED(CONFIG_k_1), \
+ new_c_2, vendor_id_2, patch_id_2, IS_ENABLED(CONFIG_k_2))
#else /* CONFIG_RISCV_ALTERNATIVE */
#ifdef __ASSEMBLY__
@@ -137,19 +137,19 @@
/*
* Usage:
- * ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k)
+ * ALTERNATIVE(old_content, new_content, vendor_id, patch_id, CONFIG_k)
* in the assembly code. Otherwise,
- * asm(ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k));
+ * asm(ALTERNATIVE(old_content, new_content, vendor_id, patch_id, CONFIG_k));
*
* old_content: The old content which is probably replaced with new content.
* new_content: The new content.
* vendor_id: The CPU vendor ID.
- * errata_id: The errata ID.
- * CONFIG_k: The Kconfig of this errata. When Kconfig is disabled, the old
+ * patch_id: The patch ID (erratum ID or cpufeature ID).
+ * CONFIG_k: The Kconfig of this patch ID. When Kconfig is disabled, the old
* content will alwyas be executed.
*/
-#define ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k) \
- _ALTERNATIVE_CFG(old_content, new_content, vendor_id, errata_id, CONFIG_k)
+#define ALTERNATIVE(old_content, new_content, vendor_id, patch_id, CONFIG_k) \
+ _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k)
/*
* A vendor wants to replace an old_content, but another vendor has used
@@ -158,9 +158,9 @@
* on the following sample code and then replace ALTERNATIVE() with
* ALTERNATIVE_2() to append its customized content.
*/
-#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \
- new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) \
- _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \
- new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2)
+#define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \
+ new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2) \
+ _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \
+ new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2)
#endif
diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h
index b8648d4f2ac1..6a41537826a7 100644
--- a/arch/riscv/include/asm/alternative.h
+++ b/arch/riscv/include/asm/alternative.h
@@ -6,8 +6,6 @@
#ifndef __ASM_ALTERNATIVE_H
#define __ASM_ALTERNATIVE_H
-#define ERRATA_STRING_LENGTH_MAX 32
-
#include <asm/alternative-macros.h>
#ifndef __ASSEMBLY__
@@ -15,10 +13,14 @@
#ifdef CONFIG_RISCV_ALTERNATIVE
#include <linux/init.h>
+#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/stddef.h>
#include <asm/hwcap.h>
+#define PATCH_ID_CPUFEATURE_ID(p) lower_16_bits(p)
+#define PATCH_ID_CPUFEATURE_VALUE(p) upper_16_bits(p)
+
#define RISCV_ALTERNATIVES_BOOT 0 /* alternatives applied during regular boot */
#define RISCV_ALTERNATIVES_MODULE 1 /* alternatives applied during module-init */
#define RISCV_ALTERNATIVES_EARLY_BOOT 2 /* alternatives applied before mmu start */
@@ -28,6 +30,7 @@
#define ALT_OLD_PTR(a) __ALT_PTR(a, old_offset)
#define ALT_ALT_PTR(a) __ALT_PTR(a, alt_offset)
+void probe_vendor_features(unsigned int cpu);
void __init apply_boot_alternatives(void);
void __init apply_early_boot_alternatives(void);
void apply_module_alternatives(void *start, size_t length);
@@ -38,14 +41,9 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len,
struct alt_entry {
s32 old_offset; /* offset relative to original instruction or data */
s32 alt_offset; /* offset relative to replacement instruction or data */
- u16 vendor_id; /* cpu vendor id */
+ u16 vendor_id; /* CPU vendor ID */
u16 alt_len; /* The replacement size */
- u32 errata_id; /* The errata id */
-};
-
-struct errata_checkfunc_id {
- unsigned long vendor_id;
- bool (*func)(struct alt_entry *alt);
+ u32 patch_id; /* The patch ID (erratum ID or cpufeature ID) */
};
void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
@@ -55,11 +53,15 @@ void thead_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
unsigned long archid, unsigned long impid,
unsigned int stage);
+void thead_feature_probe_func(unsigned int cpu, unsigned long archid,
+ unsigned long impid);
+
void riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end,
unsigned int stage);
#else /* CONFIG_RISCV_ALTERNATIVE */
+static inline void probe_vendor_features(unsigned int cpu) { }
static inline void apply_boot_alternatives(void) { }
static inline void apply_early_boot_alternatives(void) { }
static inline void apply_module_alternatives(void *start, size_t length) { }
diff --git a/arch/riscv/include/asm/asm-prototypes.h b/arch/riscv/include/asm/asm-prototypes.h
index ef386fcf3939..61ba8ed43d8f 100644
--- a/arch/riscv/include/asm/asm-prototypes.h
+++ b/arch/riscv/include/asm/asm-prototypes.h
@@ -27,5 +27,7 @@ DECLARE_DO_ERROR_INFO(do_trap_break);
asmlinkage unsigned long get_overflow_stack(void);
asmlinkage void handle_bad_stack(struct pt_regs *regs);
+asmlinkage void do_page_fault(struct pt_regs *regs);
+asmlinkage void do_irq(struct pt_regs *regs);
#endif /* _ASM_RISCV_PROTOTYPES_H */
diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h
index 816e753de636..114bbadaef41 100644
--- a/arch/riscv/include/asm/asm.h
+++ b/arch/riscv/include/asm/asm.h
@@ -69,6 +69,7 @@
#endif
#ifdef __ASSEMBLY__
+#include <asm/asm-offsets.h>
/* Common assembly source macros */
@@ -81,6 +82,66 @@
.endr
.endm
+ /* save all GPs except x1 ~ x5 */
+ .macro save_from_x6_to_x31
+ REG_S x6, PT_T1(sp)
+ REG_S x7, PT_T2(sp)
+ REG_S x8, PT_S0(sp)
+ REG_S x9, PT_S1(sp)
+ REG_S x10, PT_A0(sp)
+ REG_S x11, PT_A1(sp)
+ REG_S x12, PT_A2(sp)
+ REG_S x13, PT_A3(sp)
+ REG_S x14, PT_A4(sp)
+ REG_S x15, PT_A5(sp)
+ REG_S x16, PT_A6(sp)
+ REG_S x17, PT_A7(sp)
+ REG_S x18, PT_S2(sp)
+ REG_S x19, PT_S3(sp)
+ REG_S x20, PT_S4(sp)
+ REG_S x21, PT_S5(sp)
+ REG_S x22, PT_S6(sp)
+ REG_S x23, PT_S7(sp)
+ REG_S x24, PT_S8(sp)
+ REG_S x25, PT_S9(sp)
+ REG_S x26, PT_S10(sp)
+ REG_S x27, PT_S11(sp)
+ REG_S x28, PT_T3(sp)
+ REG_S x29, PT_T4(sp)
+ REG_S x30, PT_T5(sp)
+ REG_S x31, PT_T6(sp)
+ .endm
+
+ /* restore all GPs except x1 ~ x5 */
+ .macro restore_from_x6_to_x31
+ REG_L x6, PT_T1(sp)
+ REG_L x7, PT_T2(sp)
+ REG_L x8, PT_S0(sp)
+ REG_L x9, PT_S1(sp)
+ REG_L x10, PT_A0(sp)
+ REG_L x11, PT_A1(sp)
+ REG_L x12, PT_A2(sp)
+ REG_L x13, PT_A3(sp)
+ REG_L x14, PT_A4(sp)
+ REG_L x15, PT_A5(sp)
+ REG_L x16, PT_A6(sp)
+ REG_L x17, PT_A7(sp)
+ REG_L x18, PT_S2(sp)
+ REG_L x19, PT_S3(sp)
+ REG_L x20, PT_S4(sp)
+ REG_L x21, PT_S5(sp)
+ REG_L x22, PT_S6(sp)
+ REG_L x23, PT_S7(sp)
+ REG_L x24, PT_S8(sp)
+ REG_L x25, PT_S9(sp)
+ REG_L x26, PT_S10(sp)
+ REG_L x27, PT_S11(sp)
+ REG_L x28, PT_T3(sp)
+ REG_L x29, PT_T4(sp)
+ REG_L x30, PT_T5(sp)
+ REG_L x31, PT_T6(sp)
+ .endm
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_ASM_H */
diff --git a/arch/riscv/include/asm/cacheflush.h b/arch/riscv/include/asm/cacheflush.h
index 03e3b95ae6da..8091b8bf4883 100644
--- a/arch/riscv/include/asm/cacheflush.h
+++ b/arch/riscv/include/asm/cacheflush.h
@@ -50,7 +50,8 @@ void flush_icache_mm(struct mm_struct *mm, bool local);
#endif /* CONFIG_SMP */
extern unsigned int riscv_cbom_block_size;
-void riscv_init_cbom_blocksize(void);
+extern unsigned int riscv_cboz_block_size;
+void riscv_init_cbo_blocksizes(void);
#ifdef CONFIG_RISCV_DMA_NONCOHERENT
void riscv_noncoherent_supported(void);
diff --git a/arch/riscv/include/asm/cpufeature.h b/arch/riscv/include/asm/cpufeature.h
new file mode 100644
index 000000000000..808d5403f2ac
--- /dev/null
+++ b/arch/riscv/include/asm/cpufeature.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2022-2023 Rivos, Inc
+ */
+
+#ifndef _ASM_CPUFEATURE_H
+#define _ASM_CPUFEATURE_H
+
+/*
+ * These are probed via a device_initcall(), via either the SBI or directly
+ * from the corresponding CSRs.
+ */
+struct riscv_cpuinfo {
+ unsigned long mvendorid;
+ unsigned long marchid;
+ unsigned long mimpid;
+};
+
+DECLARE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
+
+DECLARE_PER_CPU(long, misaligned_access_speed);
+
+#endif
diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h
index 0e571f6483d9..7c2b8cdb7b77 100644
--- a/arch/riscv/include/asm/csr.h
+++ b/arch/riscv/include/asm/csr.h
@@ -40,7 +40,6 @@
#define SR_UXL _AC(0x300000000, UL) /* XLEN mask for U-mode */
#define SR_UXL_32 _AC(0x100000000, UL) /* XLEN = 32 for U-mode */
#define SR_UXL_64 _AC(0x200000000, UL) /* XLEN = 64 for U-mode */
-#define SR_UXL_SHIFT 32
#endif
/* SATP flags */
diff --git a/arch/riscv/include/asm/entry-common.h b/arch/riscv/include/asm/entry-common.h
new file mode 100644
index 000000000000..6e4dee49d84b
--- /dev/null
+++ b/arch/riscv/include/asm/entry-common.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_ENTRY_COMMON_H
+#define _ASM_RISCV_ENTRY_COMMON_H
+
+#include <asm/stacktrace.h>
+
+void handle_page_fault(struct pt_regs *regs);
+void handle_break(struct pt_regs *regs);
+
+#endif /* _ASM_RISCV_ENTRY_COMMON_H */
diff --git a/arch/riscv/include/asm/hugetlb.h b/arch/riscv/include/asm/hugetlb.h
index ec19d6afc896..fe6f23006641 100644
--- a/arch/riscv/include/asm/hugetlb.h
+++ b/arch/riscv/include/asm/hugetlb.h
@@ -2,7 +2,6 @@
#ifndef _ASM_RISCV_HUGETLB_H
#define _ASM_RISCV_HUGETLB_H
-#include <asm-generic/hugetlb.h>
#include <asm/page.h>
static inline void arch_clear_hugepage_flags(struct page *page)
@@ -11,4 +10,37 @@ static inline void arch_clear_hugepage_flags(struct page *page)
}
#define arch_clear_hugepage_flags arch_clear_hugepage_flags
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+#define __HAVE_ARCH_HUGE_PTE_CLEAR
+void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep, unsigned long sz);
+
+#define __HAVE_ARCH_HUGE_SET_HUGE_PTE_AT
+void set_huge_pte_at(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep, pte_t pte);
+
+#define __HAVE_ARCH_HUGE_PTEP_GET_AND_CLEAR
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
+
+#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH
+pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep);
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT
+void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr, pte_t *ptep);
+
+#define __HAVE_ARCH_HUGE_PTEP_SET_ACCESS_FLAGS
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *ptep,
+ pte_t pte, int dirty);
+
+pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags);
+#define arch_make_huge_pte arch_make_huge_pte
+
+#endif /*CONFIG_RISCV_ISA_SVNAPOT*/
+
+#include <asm-generic/hugetlb.h>
+
#endif /* _ASM_RISCV_HUGETLB_H */
diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h
index 6263a0de1c6a..9af793970855 100644
--- a/arch/riscv/include/asm/hwcap.h
+++ b/arch/riscv/include/asm/hwcap.h
@@ -42,6 +42,8 @@
#define RISCV_ISA_EXT_ZBB 30
#define RISCV_ISA_EXT_ZICBOM 31
#define RISCV_ISA_EXT_ZIHINTPAUSE 32
+#define RISCV_ISA_EXT_SVNAPOT 33
+#define RISCV_ISA_EXT_ZICBOZ 34
#define RISCV_ISA_EXT_MAX 64
#define RISCV_ISA_EXT_NAME_LEN_MAX 32
diff --git a/arch/riscv/include/asm/hwprobe.h b/arch/riscv/include/asm/hwprobe.h
new file mode 100644
index 000000000000..78936f4ff513
--- /dev/null
+++ b/arch/riscv/include/asm/hwprobe.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright 2023 Rivos, Inc
+ */
+
+#ifndef _ASM_HWPROBE_H
+#define _ASM_HWPROBE_H
+
+#include <uapi/asm/hwprobe.h>
+
+#define RISCV_HWPROBE_MAX_KEY 5
+
+#endif
diff --git a/arch/riscv/include/asm/insn-def.h b/arch/riscv/include/asm/insn-def.h
index e01ab51f50d2..6960beb75f32 100644
--- a/arch/riscv/include/asm/insn-def.h
+++ b/arch/riscv/include/asm/insn-def.h
@@ -192,4 +192,8 @@
INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
RS1(base), SIMM12(2))
+#define CBO_zero(base) \
+ INSN_I(OPCODE_MISC_MEM, FUNC3(2), __RD(0), \
+ RS1(base), SIMM12(4))
+
#endif /* __ASM_INSN_DEF_H */
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h
index 7fed7c431928..b55ba20903ec 100644
--- a/arch/riscv/include/asm/page.h
+++ b/arch/riscv/include/asm/page.h
@@ -16,11 +16,6 @@
#define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT)
#define PAGE_MASK (~(PAGE_SIZE - 1))
-#ifdef CONFIG_64BIT
-#define HUGE_MAX_HSTATE 2
-#else
-#define HUGE_MAX_HSTATE 1
-#endif
#define HPAGE_SHIFT PMD_SHIFT
#define HPAGE_SIZE (_AC(1, UL) << HPAGE_SHIFT)
#define HPAGE_MASK (~(HPAGE_SIZE - 1))
@@ -49,10 +44,14 @@
#ifndef __ASSEMBLY__
+#ifdef CONFIG_RISCV_ISA_ZICBOZ
+void clear_page(void *page);
+#else
#define clear_page(pgaddr) memset((pgaddr), 0, PAGE_SIZE)
+#endif
#define copy_page(to, from) memcpy((to), (from), PAGE_SIZE)
-#define clear_user_page(pgaddr, vaddr, page) memset((pgaddr), 0, PAGE_SIZE)
+#define clear_user_page(pgaddr, vaddr, page) clear_page(pgaddr)
#define copy_user_page(vto, vfrom, vaddr, topg) \
memcpy((vto), (vfrom), PAGE_SIZE)
@@ -90,9 +89,16 @@ typedef struct page *pgtable_t;
#define PTE_FMT "%08lx"
#endif
+#ifdef CONFIG_64BIT
+/*
+ * We override this value as its generic definition uses __pa too early in
+ * the boot process (before kernel_map.va_pa_offset is set).
+ */
+#define MIN_MEMBLOCK_ADDR 0
+#endif
+
#ifdef CONFIG_MMU
-extern unsigned long riscv_pfn_base;
-#define ARCH_PFN_OFFSET (riscv_pfn_base)
+#define ARCH_PFN_OFFSET (PFN_DOWN((unsigned long)phys_ram_base))
#else
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
#endif /* CONFIG_MMU */
@@ -122,7 +128,11 @@ extern phys_addr_t phys_ram_base;
#define is_linear_mapping(x) \
((x) >= PAGE_OFFSET && (!IS_ENABLED(CONFIG_64BIT) || (x) < PAGE_OFFSET + KERN_VIRT_SIZE))
+#ifndef CONFIG_DEBUG_VIRTUAL
#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + kernel_map.va_pa_offset))
+#else
+void *linear_mapping_pa_to_va(unsigned long x);
+#endif
#define kernel_mapping_pa_to_va(y) ({ \
unsigned long _y = (unsigned long)(y); \
(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \
@@ -131,7 +141,11 @@ extern phys_addr_t phys_ram_base;
})
#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x)
+#ifndef CONFIG_DEBUG_VIRTUAL
#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - kernel_map.va_pa_offset)
+#else
+phys_addr_t linear_mapping_va_to_pa(unsigned long x);
+#endif
#define kernel_mapping_va_to_pa(y) ({ \
unsigned long _y = (unsigned long)(y); \
(IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \
diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h
index 42a042c0e13e..7a5097202e15 100644
--- a/arch/riscv/include/asm/pgtable-64.h
+++ b/arch/riscv/include/asm/pgtable-64.h
@@ -79,6 +79,40 @@ typedef struct {
#define _PAGE_PFN_MASK GENMASK(53, 10)
/*
+ * [63] Svnapot definitions:
+ * 0 Svnapot disabled
+ * 1 Svnapot enabled
+ */
+#define _PAGE_NAPOT_SHIFT 63
+#define _PAGE_NAPOT BIT(_PAGE_NAPOT_SHIFT)
+/*
+ * Only 64KB (order 4) napot ptes supported.
+ */
+#define NAPOT_CONT_ORDER_BASE 4
+enum napot_cont_order {
+ NAPOT_CONT64KB_ORDER = NAPOT_CONT_ORDER_BASE,
+ NAPOT_ORDER_MAX,
+};
+
+#define for_each_napot_order(order) \
+ for (order = NAPOT_CONT_ORDER_BASE; order < NAPOT_ORDER_MAX; order++)
+#define for_each_napot_order_rev(order) \
+ for (order = NAPOT_ORDER_MAX - 1; \
+ order >= NAPOT_CONT_ORDER_BASE; order--)
+#define napot_cont_order(val) (__builtin_ctzl((val.pte >> _PAGE_PFN_SHIFT) << 1))
+
+#define napot_cont_shift(order) ((order) + PAGE_SHIFT)
+#define napot_cont_size(order) BIT(napot_cont_shift(order))
+#define napot_cont_mask(order) (~(napot_cont_size(order) - 1UL))
+#define napot_pte_num(order) BIT(order)
+
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+#define HUGE_MAX_HSTATE (2 + (NAPOT_ORDER_MAX - NAPOT_CONT_ORDER_BASE))
+#else
+#define HUGE_MAX_HSTATE 2
+#endif
+
+/*
* [62:61] Svpbmt Memory Type definitions:
*
* 00 - PMA Normal Cacheable, No change to implied PMA memory type
diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h
index f641837ccf31..2258b27173b0 100644
--- a/arch/riscv/include/asm/pgtable.h
+++ b/arch/riscv/include/asm/pgtable.h
@@ -268,10 +268,47 @@ static inline pte_t pud_pte(pud_t pud)
return __pte(pud_val(pud));
}
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+
+static __always_inline bool has_svnapot(void)
+{
+ return riscv_has_extension_likely(RISCV_ISA_EXT_SVNAPOT);
+}
+
+static inline unsigned long pte_napot(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_NAPOT;
+}
+
+static inline pte_t pte_mknapot(pte_t pte, unsigned int order)
+{
+ int pos = order - 1 + _PAGE_PFN_SHIFT;
+ unsigned long napot_bit = BIT(pos);
+ unsigned long napot_mask = ~GENMASK(pos, _PAGE_PFN_SHIFT);
+
+ return __pte((pte_val(pte) & napot_mask) | napot_bit | _PAGE_NAPOT);
+}
+
+#else
+
+static __always_inline bool has_svnapot(void) { return false; }
+
+static inline unsigned long pte_napot(pte_t pte)
+{
+ return 0;
+}
+
+#endif /* CONFIG_RISCV_ISA_SVNAPOT */
+
/* Yields the page frame number (PFN) of a page table entry */
static inline unsigned long pte_pfn(pte_t pte)
{
- return __page_val_to_pfn(pte_val(pte));
+ unsigned long res = __page_val_to_pfn(pte_val(pte));
+
+ if (has_svnapot() && pte_napot(pte))
+ res = res & (res - 1UL);
+
+ return res;
}
#define pte_page(x) pfn_to_page(pte_pfn(x))
diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h
index 6ecd461129d2..b5b0adcc85c1 100644
--- a/arch/riscv/include/asm/ptrace.h
+++ b/arch/riscv/include/asm/ptrace.h
@@ -53,6 +53,9 @@ struct pt_regs {
unsigned long orig_a0;
};
+#define PTRACE_SYSEMU 0x1f
+#define PTRACE_SYSEMU_SINGLESTEP 0x20
+
#ifdef CONFIG_64BIT
#define REG_FMT "%016lx"
#else
@@ -121,8 +124,6 @@ extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
unsigned long frame_pointer);
-int do_syscall_trace_enter(struct pt_regs *regs);
-void do_syscall_trace_exit(struct pt_regs *regs);
/**
* regs_get_register() - get register value from its offset
@@ -172,6 +173,11 @@ static inline unsigned long regs_get_kernel_argument(struct pt_regs *regs,
return 0;
}
+static inline int regs_irqs_disabled(struct pt_regs *regs)
+{
+ return !(regs->status & SR_PIE);
+}
+
#endif /* __ASSEMBLY__ */
#endif /* _ASM_RISCV_PTRACE_H */
diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h
index a2c14d4b3993..ec11001c3fe0 100644
--- a/arch/riscv/include/asm/set_memory.h
+++ b/arch/riscv/include/asm/set_memory.h
@@ -56,4 +56,7 @@ bool kernel_page_present(struct page *page);
#define SECTION_ALIGN L1_CACHE_BYTES
#endif /* CONFIG_STRICT_KERNEL_RWX */
+#define PECOFF_SECTION_ALIGNMENT 0x1000
+#define PECOFF_FILE_ALIGNMENT 0x200
+
#endif /* _ASM_RISCV_SET_MEMORY_H */
diff --git a/arch/riscv/include/asm/stacktrace.h b/arch/riscv/include/asm/stacktrace.h
index 3450c1912afd..f7e8ef2418b9 100644
--- a/arch/riscv/include/asm/stacktrace.h
+++ b/arch/riscv/include/asm/stacktrace.h
@@ -16,4 +16,9 @@ extern void notrace walk_stackframe(struct task_struct *task, struct pt_regs *re
extern void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
const char *loglvl);
+static inline bool on_thread_stack(void)
+{
+ return !(((unsigned long)(current->stack) ^ current_stack_pointer) & ~(THREAD_SIZE - 1));
+}
+
#endif /* _ASM_RISCV_STACKTRACE_H */
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 384a63b86420..0148c6bd9675 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -10,6 +10,7 @@
#ifndef _ASM_RISCV_SYSCALL_H
#define _ASM_RISCV_SYSCALL_H
+#include <asm/hwprobe.h>
#include <uapi/linux/audit.h>
#include <linux/sched.h>
#include <linux/err.h>
@@ -74,5 +75,29 @@ static inline int syscall_get_arch(struct task_struct *task)
#endif
}
+typedef long (*syscall_t)(ulong, ulong, ulong, ulong, ulong, ulong, ulong);
+static inline void syscall_handler(struct pt_regs *regs, ulong syscall)
+{
+ syscall_t fn;
+
+#ifdef CONFIG_COMPAT
+ if ((regs->status & SR_UXL) == SR_UXL_32)
+ fn = compat_sys_call_table[syscall];
+ else
+#endif
+ fn = sys_call_table[syscall];
+
+ regs->a0 = fn(regs->orig_a0, regs->a1, regs->a2,
+ regs->a3, regs->a4, regs->a5, regs->a6);
+}
+
+static inline bool arch_syscall_is_vdso_sigreturn(struct pt_regs *regs)
+{
+ return false;
+}
+
asmlinkage long sys_riscv_flush_icache(uintptr_t, uintptr_t, uintptr_t);
+
+asmlinkage long sys_riscv_hwprobe(struct riscv_hwprobe *, size_t, size_t,
+ unsigned long *, unsigned int);
#endif /* _ASM_RISCV_SYSCALL_H */
diff --git a/arch/riscv/include/asm/thread_info.h b/arch/riscv/include/asm/thread_info.h
index f704c8dd57e0..e0d202134b44 100644
--- a/arch/riscv/include/asm/thread_info.h
+++ b/arch/riscv/include/asm/thread_info.h
@@ -67,6 +67,7 @@ struct thread_info {
long kernel_sp; /* Kernel stack pointer */
long user_sp; /* User stack pointer */
int cpu;
+ unsigned long syscall_work; /* SYSCALL_WORK_ flags */
};
/*
@@ -89,26 +90,18 @@ struct thread_info {
* - pending work-to-be-done flags are in lowest half-word
* - other flags in upper half-word(s)
*/
-#define TIF_SYSCALL_TRACE 0 /* syscall trace active */
#define TIF_NOTIFY_RESUME 1 /* callback before returning to user */
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_RESTORE_SIGMASK 4 /* restore signal mask in do_signal() */
#define TIF_MEMDIE 5 /* is terminating due to OOM killer */
-#define TIF_SYSCALL_TRACEPOINT 6 /* syscall tracepoint instrumentation */
-#define TIF_SYSCALL_AUDIT 7 /* syscall auditing */
-#define TIF_SECCOMP 8 /* syscall secure computing */
#define TIF_NOTIFY_SIGNAL 9 /* signal notifications exist */
#define TIF_UPROBE 10 /* uprobe breakpoint or singlestep */
#define TIF_32BIT 11 /* compat-mode 32bit process */
-#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
-#define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT)
-#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
-#define _TIF_SECCOMP (1 << TIF_SECCOMP)
#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL)
#define _TIF_UPROBE (1 << TIF_UPROBE)
@@ -116,8 +109,4 @@ struct thread_info {
(_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_SIGNAL | _TIF_UPROBE)
-#define _TIF_SYSCALL_WORK \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \
- _TIF_SECCOMP)
-
#endif /* _ASM_RISCV_THREAD_INFO_H */
diff --git a/arch/riscv/include/asm/topology.h b/arch/riscv/include/asm/topology.h
new file mode 100644
index 000000000000..e316ab3b77f3
--- /dev/null
+++ b/arch/riscv/include/asm/topology.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_RISCV_TOPOLOGY_H
+#define _ASM_RISCV_TOPOLOGY_H
+
+#include <linux/arch_topology.h>
+
+/* Replace task scheduler's default frequency-invariant accounting */
+#define arch_scale_freq_tick topology_scale_freq_tick
+#define arch_set_freq_scale topology_set_freq_scale
+#define arch_scale_freq_capacity topology_get_freq_scale
+#define arch_scale_freq_invariant topology_scale_freq_invariant
+
+/* Replace task scheduler's default cpu-invariant accounting */
+#define arch_scale_cpu_capacity topology_get_cpu_scale
+
+/* Enable topology flag updates */
+#define arch_update_cpu_topology topology_update_cpu_topology
+
+#include <asm-generic/topology.h>
+
+#endif /* _ASM_RISCV_TOPOLOGY_H */
diff --git a/arch/riscv/include/asm/vdso/data.h b/arch/riscv/include/asm/vdso/data.h
new file mode 100644
index 000000000000..dc2f76f58b76
--- /dev/null
+++ b/arch/riscv/include/asm/vdso/data.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __RISCV_ASM_VDSO_DATA_H
+#define __RISCV_ASM_VDSO_DATA_H
+
+#include <linux/types.h>
+#include <vdso/datapage.h>
+#include <asm/hwprobe.h>
+
+struct arch_vdso_data {
+ /* Stash static answers to the hwprobe queries when all CPUs are selected. */
+ __u64 all_cpu_hwprobe_values[RISCV_HWPROBE_MAX_KEY + 1];
+
+ /* Boolean indicating all CPUs have the same static hwprobe values. */
+ __u8 homogeneous_cpus;
+};
+
+#endif /* __RISCV_ASM_VDSO_DATA_H */
diff --git a/arch/riscv/include/asm/vdso/gettimeofday.h b/arch/riscv/include/asm/vdso/gettimeofday.h
index 77d9c2f721c4..ba3283cf7acc 100644
--- a/arch/riscv/include/asm/vdso/gettimeofday.h
+++ b/arch/riscv/include/asm/vdso/gettimeofday.h
@@ -9,6 +9,12 @@
#include <asm/csr.h>
#include <uapi/linux/time.h>
+/*
+ * 32-bit land is lacking generic time vsyscalls as well as the legacy 32-bit
+ * time syscalls like gettimeofday. Skip these definitions since on 32-bit.
+ */
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL
+
#define VDSO_HAS_CLOCK_GETRES 1
static __always_inline
@@ -60,6 +66,8 @@ int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
return ret;
}
+#endif /* CONFIG_GENERIC_TIME_VSYSCALL */
+
static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
const struct vdso_data *vd)
{
diff --git a/arch/riscv/include/asm/vmalloc.h b/arch/riscv/include/asm/vmalloc.h
index 48da5371f1e9..58d3e447f191 100644
--- a/arch/riscv/include/asm/vmalloc.h
+++ b/arch/riscv/include/asm/vmalloc.h
@@ -17,6 +17,65 @@ static inline bool arch_vmap_pmd_supported(pgprot_t prot)
return true;
}
-#endif
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+#include <linux/pgtable.h>
+#define arch_vmap_pte_range_map_size arch_vmap_pte_range_map_size
+static inline unsigned long arch_vmap_pte_range_map_size(unsigned long addr, unsigned long end,
+ u64 pfn, unsigned int max_page_shift)
+{
+ unsigned long map_size = PAGE_SIZE;
+ unsigned long size, order;
+
+ if (!has_svnapot())
+ return map_size;
+
+ for_each_napot_order_rev(order) {
+ if (napot_cont_shift(order) > max_page_shift)
+ continue;
+
+ size = napot_cont_size(order);
+ if (end - addr < size)
+ continue;
+
+ if (!IS_ALIGNED(addr, size))
+ continue;
+
+ if (!IS_ALIGNED(PFN_PHYS(pfn), size))
+ continue;
+
+ map_size = size;
+ break;
+ }
+
+ return map_size;
+}
+
+#define arch_vmap_pte_supported_shift arch_vmap_pte_supported_shift
+static inline int arch_vmap_pte_supported_shift(unsigned long size)
+{
+ int shift = PAGE_SHIFT;
+ unsigned long order;
+
+ if (!has_svnapot())
+ return shift;
+
+ WARN_ON_ONCE(size >= PMD_SIZE);
+
+ for_each_napot_order_rev(order) {
+ if (napot_cont_size(order) > size)
+ continue;
+
+ if (!IS_ALIGNED(size, napot_cont_size(order)))
+ continue;
+
+ shift = napot_cont_shift(order);
+ break;
+ }
+
+ return shift;
+}
+
+#endif /* CONFIG_RISCV_ISA_SVNAPOT */
+#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */
#endif /* _ASM_RISCV_VMALLOC_H */
diff --git a/arch/riscv/include/uapi/asm/hwprobe.h b/arch/riscv/include/uapi/asm/hwprobe.h
new file mode 100644
index 000000000000..8d745a4ad8a2
--- /dev/null
+++ b/arch/riscv/include/uapi/asm/hwprobe.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright 2023 Rivos, Inc
+ */
+
+#ifndef _UAPI_ASM_HWPROBE_H
+#define _UAPI_ASM_HWPROBE_H
+
+#include <linux/types.h>
+
+/*
+ * Interface for probing hardware capabilities from userspace, see
+ * Documentation/riscv/hwprobe.rst for more information.
+ */
+struct riscv_hwprobe {
+ __s64 key;
+ __u64 value;
+};
+
+#define RISCV_HWPROBE_KEY_MVENDORID 0
+#define RISCV_HWPROBE_KEY_MARCHID 1
+#define RISCV_HWPROBE_KEY_MIMPID 2
+#define RISCV_HWPROBE_KEY_BASE_BEHAVIOR 3
+#define RISCV_HWPROBE_BASE_BEHAVIOR_IMA (1 << 0)
+#define RISCV_HWPROBE_KEY_IMA_EXT_0 4
+#define RISCV_HWPROBE_IMA_FD (1 << 0)
+#define RISCV_HWPROBE_IMA_C (1 << 1)
+#define RISCV_HWPROBE_KEY_CPUPERF_0 5
+#define RISCV_HWPROBE_MISALIGNED_UNKNOWN (0 << 0)
+#define RISCV_HWPROBE_MISALIGNED_EMULATED (1 << 0)
+#define RISCV_HWPROBE_MISALIGNED_SLOW (2 << 0)
+#define RISCV_HWPROBE_MISALIGNED_FAST (3 << 0)
+#define RISCV_HWPROBE_MISALIGNED_UNSUPPORTED (4 << 0)
+#define RISCV_HWPROBE_MISALIGNED_MASK (7 << 0)
+/* Increase RISCV_HWPROBE_MAX_KEY when adding items. */
+
+#endif
diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h
index 92af6f3f057c..e44c1e90eaa7 100644
--- a/arch/riscv/include/uapi/asm/kvm.h
+++ b/arch/riscv/include/uapi/asm/kvm.h
@@ -52,6 +52,7 @@ struct kvm_riscv_config {
unsigned long mvendorid;
unsigned long marchid;
unsigned long mimpid;
+ unsigned long zicboz_block_size;
};
/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
@@ -105,6 +106,7 @@ enum KVM_RISCV_ISA_EXT_ID {
KVM_RISCV_ISA_EXT_SVINVAL,
KVM_RISCV_ISA_EXT_ZIHINTPAUSE,
KVM_RISCV_ISA_EXT_ZICBOM,
+ KVM_RISCV_ISA_EXT_ZICBOZ,
KVM_RISCV_ISA_EXT_MAX,
};
diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h
index 73d7cdd2ec49..950ab3fd4409 100644
--- a/arch/riscv/include/uapi/asm/unistd.h
+++ b/arch/riscv/include/uapi/asm/unistd.h
@@ -43,3 +43,12 @@
#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15)
#endif
__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache)
+
+/*
+ * Allows userspace to query the kernel for CPU architecture and
+ * microarchitecture details across a given set of CPUs.
+ */
+#ifndef __NR_riscv_hwprobe
+#define __NR_riscv_hwprobe (__NR_arch_specific_syscall + 14)
+#endif
+__SYSCALL(__NR_riscv_hwprobe, sys_riscv_hwprobe)
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile
index 67f542be1bea..571f05958b41 100644
--- a/arch/riscv/kernel/Makefile
+++ b/arch/riscv/kernel/Makefile
@@ -68,8 +68,6 @@ obj-$(CONFIG_CPU_PM) += suspend_entry.o suspend.o
obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o
obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o
-obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o
-
obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o
obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o
obj-$(CONFIG_RISCV_SBI) += sbi.o
@@ -90,3 +88,5 @@ obj-$(CONFIG_EFI) += efi.o
obj-$(CONFIG_COMPAT) += compat_syscall_table.o
obj-$(CONFIG_COMPAT) += compat_signal.o
obj-$(CONFIG_COMPAT) += compat_vdso/
+
+obj-$(CONFIG_64BIT) += pi/
diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c
index 2354c69dc7d1..6b75788c18e6 100644
--- a/arch/riscv/kernel/alternative.c
+++ b/arch/riscv/kernel/alternative.c
@@ -27,9 +27,11 @@ struct cpu_manufacturer_info_t {
void (*patch_func)(struct alt_entry *begin, struct alt_entry *end,
unsigned long archid, unsigned long impid,
unsigned int stage);
+ void (*feature_probe_func)(unsigned int cpu, unsigned long archid,
+ unsigned long impid);
};
-static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info)
+static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info)
{
#ifdef CONFIG_RISCV_M_MODE
cpu_mfr_info->vendor_id = csr_read(CSR_MVENDORID);
@@ -41,6 +43,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
cpu_mfr_info->imp_id = sbi_get_mimpid();
#endif
+ cpu_mfr_info->feature_probe_func = NULL;
switch (cpu_mfr_info->vendor_id) {
#ifdef CONFIG_ERRATA_SIFIVE
case SIFIVE_VENDOR_ID:
@@ -50,6 +53,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf
#ifdef CONFIG_ERRATA_THEAD
case THEAD_VENDOR_ID:
cpu_mfr_info->patch_func = thead_errata_patch_func;
+ cpu_mfr_info->feature_probe_func = thead_feature_probe_func;
break;
#endif
default:
@@ -139,6 +143,20 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len,
}
}
+/* Called on each CPU as it starts */
+void probe_vendor_features(unsigned int cpu)
+{
+ struct cpu_manufacturer_info_t cpu_mfr_info;
+
+ riscv_fill_cpu_mfr_info(&cpu_mfr_info);
+ if (!cpu_mfr_info.feature_probe_func)
+ return;
+
+ cpu_mfr_info.feature_probe_func(cpu,
+ cpu_mfr_info.arch_id,
+ cpu_mfr_info.imp_id);
+}
+
/*
* This is called very early in the boot process (directly after we run
* a feature detect on the boot CPU). No need to worry about other CPUs
@@ -193,6 +211,7 @@ void __init apply_boot_alternatives(void)
/* If called on non-boot cpu things could go wrong */
WARN_ON(smp_processor_id() != 0);
+ probe_vendor_features(0);
_apply_alternatives((struct alt_entry *)__alt_start,
(struct alt_entry *)__alt_end,
RISCV_ALTERNATIVES_BOOT);
diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
index e3829d2de5d9..09e9b88110d1 100644
--- a/arch/riscv/kernel/cacheinfo.c
+++ b/arch/riscv/kernel/cacheinfo.c
@@ -63,53 +63,12 @@ uintptr_t get_cache_geometry(u32 level, enum cache_type type)
0;
}
-static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type,
- unsigned int level, unsigned int size,
- unsigned int sets, unsigned int line_size)
+static void ci_leaf_init(struct cacheinfo *this_leaf,
+ struct device_node *node,
+ enum cache_type type, unsigned int level)
{
this_leaf->level = level;
this_leaf->type = type;
- this_leaf->size = size;
- this_leaf->number_of_sets = sets;
- this_leaf->coherency_line_size = line_size;
-
- /*
- * If the cache is fully associative, there is no need to
- * check the other properties.
- */
- if (sets == 1)
- return;
-
- /*
- * Set the ways number for n-ways associative, make sure
- * all properties are big than zero.
- */
- if (sets > 0 && size > 0 && line_size > 0)
- this_leaf->ways_of_associativity = (size / sets) / line_size;
-}
-
-static void fill_cacheinfo(struct cacheinfo **this_leaf,
- struct device_node *node, unsigned int level)
-{
- unsigned int size, sets, line_size;
-
- if (!of_property_read_u32(node, "cache-size", &size) &&
- !of_property_read_u32(node, "cache-block-size", &line_size) &&
- !of_property_read_u32(node, "cache-sets", &sets)) {
- ci_leaf_init((*this_leaf)++, CACHE_TYPE_UNIFIED, level, size, sets, line_size);
- }
-
- if (!of_property_read_u32(node, "i-cache-size", &size) &&
- !of_property_read_u32(node, "i-cache-sets", &sets) &&
- !of_property_read_u32(node, "i-cache-block-size", &line_size)) {
- ci_leaf_init((*this_leaf)++, CACHE_TYPE_INST, level, size, sets, line_size);
- }
-
- if (!of_property_read_u32(node, "d-cache-size", &size) &&
- !of_property_read_u32(node, "d-cache-sets", &sets) &&
- !of_property_read_u32(node, "d-cache-block-size", &line_size)) {
- ci_leaf_init((*this_leaf)++, CACHE_TYPE_DATA, level, size, sets, line_size);
- }
}
int populate_cache_leaves(unsigned int cpu)
@@ -120,24 +79,29 @@ int populate_cache_leaves(unsigned int cpu)
struct device_node *prev = NULL;
int levels = 1, level = 1;
- /* Level 1 caches in cpu node */
- fill_cacheinfo(&this_leaf, np, level);
+ if (of_property_read_bool(np, "cache-size"))
+ ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
+ if (of_property_read_bool(np, "i-cache-size"))
+ ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
+ if (of_property_read_bool(np, "d-cache-size"))
+ ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
- /* Next level caches in cache nodes */
prev = np;
while ((np = of_find_next_cache_node(np))) {
of_node_put(prev);
prev = np;
-
if (!of_device_is_compatible(np, "cache"))
break;
if (of_property_read_u32(np, "cache-level", &level))
break;
if (level <= levels)
break;
-
- fill_cacheinfo(&this_leaf, np, level);
-
+ if (of_property_read_bool(np, "cache-size"))
+ ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level);
+ if (of_property_read_bool(np, "i-cache-size"))
+ ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level);
+ if (of_property_read_bool(np, "d-cache-size"))
+ ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level);
levels = level;
}
of_node_put(np);
diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile
index 7f34f3c7c882..189345773e7e 100644
--- a/arch/riscv/kernel/compat_vdso/Makefile
+++ b/arch/riscv/kernel/compat_vdso/Makefile
@@ -26,7 +26,7 @@ targets := $(obj-compat_vdso) compat_vdso.so compat_vdso.so.dbg compat_vdso.lds
obj-compat_vdso := $(addprefix $(obj)/, $(obj-compat_vdso))
obj-y += compat_vdso.o
-CPPFLAGS_compat_vdso.lds += -P -C -U$(ARCH)
+CPPFLAGS_compat_vdso.lds += -P -C -DCOMPAT_VDSO -U$(ARCH)
# Disable profiling and instrumentation for VDSO code
GCOV_PROFILE := n
diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c
index 8400f0cc9704..3df38052dcbd 100644
--- a/arch/riscv/kernel/cpu.c
+++ b/arch/riscv/kernel/cpu.c
@@ -7,6 +7,7 @@
#include <linux/init.h>
#include <linux/seq_file.h>
#include <linux/of.h>
+#include <asm/cpufeature.h>
#include <asm/csr.h>
#include <asm/hwcap.h>
#include <asm/sbi.h>
@@ -70,12 +71,7 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid)
return -1;
}
-struct riscv_cpuinfo {
- unsigned long mvendorid;
- unsigned long marchid;
- unsigned long mimpid;
-};
-static DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
+DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo);
unsigned long riscv_cached_mvendorid(unsigned int cpu_id)
{
@@ -186,11 +182,13 @@ arch_initcall(riscv_cpuinfo_init);
*/
static struct riscv_isa_ext_data isa_ext_arr[] = {
__RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM),
+ __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ),
__RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE),
__RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB),
__RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF),
__RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC),
__RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL),
+ __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT),
__RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT),
__RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX),
};
diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c
index 59d58ee0f68d..52585e088873 100644
--- a/arch/riscv/kernel/cpufeature.c
+++ b/arch/riscv/kernel/cpufeature.c
@@ -8,20 +8,15 @@
#include <linux/bitmap.h>
#include <linux/ctype.h>
-#include <linux/libfdt.h>
#include <linux/log2.h>
#include <linux/memory.h>
#include <linux/module.h>
#include <linux/of.h>
#include <asm/alternative.h>
#include <asm/cacheflush.h>
-#include <asm/errata_list.h>
#include <asm/hwcap.h>
#include <asm/patch.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
-#include <asm/smp.h>
-#include <asm/switch_to.h>
#define NUM_ALPHA_EXTS ('z' - 'a' + 1)
@@ -30,6 +25,9 @@ unsigned long elf_hwcap __read_mostly;
/* Host ISA bitmap */
static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly;
+/* Performance information */
+DEFINE_PER_CPU(long, misaligned_access_speed);
+
/**
* riscv_isa_extension_base() - Get base extension word
*
@@ -79,6 +77,15 @@ static bool riscv_isa_extension_check(int id)
return false;
}
return true;
+ case RISCV_ISA_EXT_ZICBOZ:
+ if (!riscv_cboz_block_size) {
+ pr_err("Zicboz detected in ISA string, but no cboz-block-size found\n");
+ return false;
+ } else if (!is_power_of_2(riscv_cboz_block_size)) {
+ pr_err("cboz-block-size present, but is not a power-of-2\n");
+ return false;
+ }
+ return true;
}
return true;
@@ -224,9 +231,11 @@ void __init riscv_fill_hwcap(void)
SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF);
SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC);
SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL);
+ SET_ISA_EXT_MAP("svnapot", RISCV_ISA_EXT_SVNAPOT);
SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT);
SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB);
SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM);
+ SET_ISA_EXT_MAP("zicboz", RISCV_ISA_EXT_ZICBOZ);
SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE);
}
#undef SET_ISA_EXT_MAP
@@ -269,12 +278,46 @@ void __init riscv_fill_hwcap(void)
}
#ifdef CONFIG_RISCV_ALTERNATIVE
+/*
+ * Alternative patch sites consider 48 bits when determining when to patch
+ * the old instruction sequence with the new. These bits are broken into a
+ * 16-bit vendor ID and a 32-bit patch ID. A non-zero vendor ID means the
+ * patch site is for an erratum, identified by the 32-bit patch ID. When
+ * the vendor ID is zero, the patch site is for a cpufeature. cpufeatures
+ * further break down patch ID into two 16-bit numbers. The lower 16 bits
+ * are the cpufeature ID and the upper 16 bits are used for a value specific
+ * to the cpufeature and patch site. If the upper 16 bits are zero, then it
+ * implies no specific value is specified. cpufeatures that want to control
+ * patching on a per-site basis will provide non-zero values and implement
+ * checks here. The checks return true when patching should be done, and
+ * false otherwise.
+ */
+static bool riscv_cpufeature_patch_check(u16 id, u16 value)
+{
+ if (!value)
+ return true;
+
+ switch (id) {
+ case RISCV_ISA_EXT_ZICBOZ:
+ /*
+ * Zicboz alternative applications provide the maximum
+ * supported block size order, or zero when it doesn't
+ * matter. If the current block size exceeds the maximum,
+ * then the alternative cannot be applied.
+ */
+ return riscv_cboz_block_size <= (1U << value);
+ }
+
+ return false;
+}
+
void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
struct alt_entry *end,
unsigned int stage)
{
struct alt_entry *alt;
void *oldptr, *altptr;
+ u16 id, value;
if (stage == RISCV_ALTERNATIVES_EARLY_BOOT)
return;
@@ -282,13 +325,19 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin,
for (alt = begin; alt < end; alt++) {
if (alt->vendor_id != 0)
continue;
- if (alt->errata_id >= RISCV_ISA_EXT_MAX) {
- WARN(1, "This extension id:%d is not in ISA extension list",
- alt->errata_id);
+
+ id = PATCH_ID_CPUFEATURE_ID(alt->patch_id);
+
+ if (id >= RISCV_ISA_EXT_MAX) {
+ WARN(1, "This extension id:%d is not in ISA extension list", id);
continue;
}
- if (!__riscv_isa_extension_available(NULL, alt->errata_id))
+ if (!__riscv_isa_extension_available(NULL, id))
+ continue;
+
+ value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id);
+ if (!riscv_cpufeature_patch_check(id, value))
continue;
oldptr = ALT_OLD_PTR(alt);
diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S
index 8e733aa48ba6..515b2dfbca75 100644
--- a/arch/riscv/kernel/efi-header.S
+++ b/arch/riscv/kernel/efi-header.S
@@ -6,6 +6,7 @@
#include <linux/pe.h>
#include <linux/sizes.h>
+#include <asm/set_memory.h>
.macro __EFI_PE_HEADER
.long PE_MAGIC
@@ -33,7 +34,11 @@ optional_header:
.byte 0x02 // MajorLinkerVersion
.byte 0x14 // MinorLinkerVersion
.long __pecoff_text_end - efi_header_end // SizeOfCode
- .long __pecoff_data_virt_size // SizeOfInitializedData
+#ifdef __clang__
+ .long __pecoff_data_virt_size // SizeOfInitializedData
+#else
+ .long __pecoff_data_virt_end - __pecoff_text_end // SizeOfInitializedData
+#endif
.long 0 // SizeOfUninitializedData
.long __efistub_efi_pe_entry - _start // AddressOfEntryPoint
.long efi_header_end - _start // BaseOfCode
@@ -91,9 +96,17 @@ section_table:
IMAGE_SCN_MEM_EXECUTE // Characteristics
.ascii ".data\0\0\0"
- .long __pecoff_data_virt_size // VirtualSize
+#ifdef __clang__
+ .long __pecoff_data_virt_size // VirtualSize
+#else
+ .long __pecoff_data_virt_end - __pecoff_text_end // VirtualSize
+#endif
.long __pecoff_text_end - _start // VirtualAddress
- .long __pecoff_data_raw_size // SizeOfRawData
+#ifdef __clang__
+ .long __pecoff_data_raw_size // SizeOfRawData
+#else
+ .long __pecoff_data_raw_end - __pecoff_text_end // SizeOfRawData
+#endif
.long __pecoff_text_end - _start // PointerToRawData
.long 0 // PointerToRelocations
diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S
index 99d38fdf8b18..3fbb100bc9e4 100644
--- a/arch/riscv/kernel/entry.S
+++ b/arch/riscv/kernel/entry.S
@@ -14,11 +14,7 @@
#include <asm/asm-offsets.h>
#include <asm/errata_list.h>
-#if !IS_ENABLED(CONFIG_PREEMPTION)
-.set resume_kernel, restore_all
-#endif
-
-ENTRY(handle_exception)
+SYM_CODE_START(handle_exception)
/*
* If coming from userspace, preserve the user thread pointer and load
* the kernel thread pointer. If we came from the kernel, the scratch
@@ -46,32 +42,7 @@ _save_context:
REG_S x1, PT_RA(sp)
REG_S x3, PT_GP(sp)
REG_S x5, PT_T0(sp)
- REG_S x6, PT_T1(sp)
- REG_S x7, PT_T2(sp)
- REG_S x8, PT_S0(sp)
- REG_S x9, PT_S1(sp)
- REG_S x10, PT_A0(sp)
- REG_S x11, PT_A1(sp)
- REG_S x12, PT_A2(sp)
- REG_S x13, PT_A3(sp)
- REG_S x14, PT_A4(sp)
- REG_S x15, PT_A5(sp)
- REG_S x16, PT_A6(sp)
- REG_S x17, PT_A7(sp)
- REG_S x18, PT_S2(sp)
- REG_S x19, PT_S3(sp)
- REG_S x20, PT_S4(sp)
- REG_S x21, PT_S5(sp)
- REG_S x22, PT_S6(sp)
- REG_S x23, PT_S7(sp)
- REG_S x24, PT_S8(sp)
- REG_S x25, PT_S9(sp)
- REG_S x26, PT_S10(sp)
- REG_S x27, PT_S11(sp)
- REG_S x28, PT_T3(sp)
- REG_S x29, PT_T4(sp)
- REG_S x30, PT_T5(sp)
- REG_S x31, PT_T6(sp)
+ save_from_x6_to_x31
/*
* Disable user-mode memory access as it should only be set in the
@@ -106,19 +77,8 @@ _save_context:
.option norelax
la gp, __global_pointer$
.option pop
-
-#ifdef CONFIG_TRACE_IRQFLAGS
- call __trace_hardirqs_off
-#endif
-
-#ifdef CONFIG_CONTEXT_TRACKING_USER
- /* If previous state is in user mode, call user_exit_callable(). */
- li a0, SR_PP
- and a0, s1, a0
- bnez a0, skip_context_tracking
- call user_exit_callable
-skip_context_tracking:
-#endif
+ move a0, sp /* pt_regs */
+ la ra, ret_from_exception
/*
* MSB of cause differentiates between
@@ -126,38 +86,13 @@ skip_context_tracking:
*/
bge s4, zero, 1f
- la ra, ret_from_exception
-
/* Handle interrupts */
- move a0, sp /* pt_regs */
- la a1, generic_handle_arch_irq
- jr a1
+ tail do_irq
1:
- /*
- * Exceptions run with interrupts enabled or disabled depending on the
- * state of SR_PIE in m/sstatus.
- */
- andi t0, s1, SR_PIE
- beqz t0, 1f
- /* kprobes, entered via ebreak, must have interrupts disabled. */
- li t0, EXC_BREAKPOINT
- beq s4, t0, 1f
-#ifdef CONFIG_TRACE_IRQFLAGS
- call __trace_hardirqs_on
-#endif
- csrs CSR_STATUS, SR_IE
-
-1:
- la ra, ret_from_exception
- /* Handle syscalls */
- li t0, EXC_SYSCALL
- beq s4, t0, handle_syscall
-
/* Handle other exceptions */
slli t0, s4, RISCV_LGPTR
la t1, excp_vect_table
la t2, excp_vect_table_end
- move a0, sp /* pt_regs */
add t0, t1, t0
/* Check if exception code lies within bounds */
bgeu t0, t2, 1f
@@ -165,95 +100,16 @@ skip_context_tracking:
jr t0
1:
tail do_trap_unknown
+SYM_CODE_END(handle_exception)
-handle_syscall:
-#ifdef CONFIG_RISCV_M_MODE
- /*
- * When running is M-Mode (no MMU config), MPIE does not get set.
- * As a result, we need to force enable interrupts here because
- * handle_exception did not do set SR_IE as it always sees SR_PIE
- * being cleared.
- */
- csrs CSR_STATUS, SR_IE
-#endif
-#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING_USER)
- /* Recover a0 - a7 for system calls */
- REG_L a0, PT_A0(sp)
- REG_L a1, PT_A1(sp)
- REG_L a2, PT_A2(sp)
- REG_L a3, PT_A3(sp)
- REG_L a4, PT_A4(sp)
- REG_L a5, PT_A5(sp)
- REG_L a6, PT_A6(sp)
- REG_L a7, PT_A7(sp)
-#endif
- /* save the initial A0 value (needed in signal handlers) */
- REG_S a0, PT_ORIG_A0(sp)
- /*
- * Advance SEPC to avoid executing the original
- * scall instruction on sret
- */
- addi s2, s2, 0x4
- REG_S s2, PT_EPC(sp)
- /* Trace syscalls, but only if requested by the user. */
- REG_L t0, TASK_TI_FLAGS(tp)
- andi t0, t0, _TIF_SYSCALL_WORK
- bnez t0, handle_syscall_trace_enter
-check_syscall_nr:
- /* Check to make sure we don't jump to a bogus syscall number. */
- li t0, __NR_syscalls
- la s0, sys_ni_syscall
- /*
- * Syscall number held in a7.
- * If syscall number is above allowed value, redirect to ni_syscall.
- */
- bgeu a7, t0, 3f
-#ifdef CONFIG_COMPAT
- REG_L s0, PT_STATUS(sp)
- srli s0, s0, SR_UXL_SHIFT
- andi s0, s0, (SR_UXL >> SR_UXL_SHIFT)
- li t0, (SR_UXL_32 >> SR_UXL_SHIFT)
- sub t0, s0, t0
- bnez t0, 1f
-
- /* Call compat_syscall */
- la s0, compat_sys_call_table
- j 2f
-1:
-#endif
- /* Call syscall */
- la s0, sys_call_table
-2:
- slli t0, a7, RISCV_LGPTR
- add s0, s0, t0
- REG_L s0, 0(s0)
-3:
- jalr s0
-
-ret_from_syscall:
- /* Set user a0 to kernel a0 */
- REG_S a0, PT_A0(sp)
- /*
- * We didn't execute the actual syscall.
- * Seccomp already set return value for the current task pt_regs.
- * (If it was configured with SECCOMP_RET_ERRNO/TRACE)
- */
-ret_from_syscall_rejected:
-#ifdef CONFIG_DEBUG_RSEQ
- move a0, sp
- call rseq_syscall
-#endif
- /* Trace syscalls, but only if requested by the user. */
- REG_L t0, TASK_TI_FLAGS(tp)
- andi t0, t0, _TIF_SYSCALL_WORK
- bnez t0, handle_syscall_trace_exit
-
+/*
+ * The ret_from_exception must be called with interrupt disabled. Here is the
+ * caller list:
+ * - handle_exception
+ * - ret_from_fork
+ */
SYM_CODE_START_NOALIGN(ret_from_exception)
REG_L s0, PT_STATUS(sp)
- csrc CSR_STATUS, SR_IE
-#ifdef CONFIG_TRACE_IRQFLAGS
- call __trace_hardirqs_off
-#endif
#ifdef CONFIG_RISCV_M_MODE
/* the MPP value is too large to be used as an immediate arg for addi */
li t0, SR_MPP
@@ -261,17 +117,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception)
#else
andi s0, s0, SR_SPP
#endif
- bnez s0, resume_kernel
-SYM_CODE_END(ret_from_exception)
-
- /* Interrupts must be disabled here so flags are checked atomically */
- REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */
- andi s1, s0, _TIF_WORK_MASK
- bnez s1, resume_userspace_slow
-resume_userspace:
-#ifdef CONFIG_CONTEXT_TRACKING_USER
- call user_enter_callable
-#endif
+ bnez s0, 1f
/* Save unwound kernel stack pointer in thread_info */
addi s0, sp, PT_SIZE_ON_STACK
@@ -282,18 +128,7 @@ resume_userspace:
* structures again.
*/
csrw CSR_SCRATCH, tp
-
-restore_all:
-#ifdef CONFIG_TRACE_IRQFLAGS
- REG_L s1, PT_STATUS(sp)
- andi t0, s1, SR_PIE
- beqz t0, 1f
- call __trace_hardirqs_on
- j 2f
1:
- call __trace_hardirqs_off
-2:
-#endif
REG_L a0, PT_STATUS(sp)
/*
* The current load reservation is effectively part of the processor's
@@ -322,32 +157,7 @@ restore_all:
REG_L x3, PT_GP(sp)
REG_L x4, PT_TP(sp)
REG_L x5, PT_T0(sp)
- REG_L x6, PT_T1(sp)
- REG_L x7, PT_T2(sp)
- REG_L x8, PT_S0(sp)
- REG_L x9, PT_S1(sp)
- REG_L x10, PT_A0(sp)
- REG_L x11, PT_A1(sp)
- REG_L x12, PT_A2(sp)
- REG_L x13, PT_A3(sp)
- REG_L x14, PT_A4(sp)
- REG_L x15, PT_A5(sp)
- REG_L x16, PT_A6(sp)
- REG_L x17, PT_A7(sp)
- REG_L x18, PT_S2(sp)
- REG_L x19, PT_S3(sp)
- REG_L x20, PT_S4(sp)
- REG_L x21, PT_S5(sp)
- REG_L x22, PT_S6(sp)
- REG_L x23, PT_S7(sp)
- REG_L x24, PT_S8(sp)
- REG_L x25, PT_S9(sp)
- REG_L x26, PT_S10(sp)
- REG_L x27, PT_S11(sp)
- REG_L x28, PT_T3(sp)
- REG_L x29, PT_T4(sp)
- REG_L x30, PT_T5(sp)
- REG_L x31, PT_T6(sp)
+ restore_from_x6_to_x31
REG_L x2, PT_SP(sp)
@@ -356,47 +166,10 @@ restore_all:
#else
sret
#endif
-
-#if IS_ENABLED(CONFIG_PREEMPTION)
-resume_kernel:
- REG_L s0, TASK_TI_PREEMPT_COUNT(tp)
- bnez s0, restore_all
- REG_L s0, TASK_TI_FLAGS(tp)
- andi s0, s0, _TIF_NEED_RESCHED
- beqz s0, restore_all
- call preempt_schedule_irq
- j restore_all
-#endif
-
-resume_userspace_slow:
- /* Enter slow path for supplementary processing */
- move a0, sp /* pt_regs */
- move a1, s0 /* current_thread_info->flags */
- call do_work_pending
- j resume_userspace
-
-/* Slow paths for ptrace. */
-handle_syscall_trace_enter:
- move a0, sp
- call do_syscall_trace_enter
- move t0, a0
- REG_L a0, PT_A0(sp)
- REG_L a1, PT_A1(sp)
- REG_L a2, PT_A2(sp)
- REG_L a3, PT_A3(sp)
- REG_L a4, PT_A4(sp)
- REG_L a5, PT_A5(sp)
- REG_L a6, PT_A6(sp)
- REG_L a7, PT_A7(sp)
- bnez t0, ret_from_syscall_rejected
- j check_syscall_nr
-handle_syscall_trace_exit:
- move a0, sp
- call do_syscall_trace_exit
- j ret_from_exception
+SYM_CODE_END(ret_from_exception)
#ifdef CONFIG_VMAP_STACK
-handle_kernel_stack_overflow:
+SYM_CODE_START_LOCAL(handle_kernel_stack_overflow)
/*
* Takes the psuedo-spinlock for the shadow stack, in case multiple
* harts are concurrently overflowing their kernel stacks. We could
@@ -464,32 +237,7 @@ restore_caller_reg:
REG_S x1, PT_RA(sp)
REG_S x3, PT_GP(sp)
REG_S x5, PT_T0(sp)
- REG_S x6, PT_T1(sp)
- REG_S x7, PT_T2(sp)
- REG_S x8, PT_S0(sp)
- REG_S x9, PT_S1(sp)
- REG_S x10, PT_A0(sp)
- REG_S x11, PT_A1(sp)
- REG_S x12, PT_A2(sp)
- REG_S x13, PT_A3(sp)
- REG_S x14, PT_A4(sp)
- REG_S x15, PT_A5(sp)
- REG_S x16, PT_A6(sp)
- REG_S x17, PT_A7(sp)
- REG_S x18, PT_S2(sp)
- REG_S x19, PT_S3(sp)
- REG_S x20, PT_S4(sp)
- REG_S x21, PT_S5(sp)
- REG_S x22, PT_S6(sp)
- REG_S x23, PT_S7(sp)
- REG_S x24, PT_S8(sp)
- REG_S x25, PT_S9(sp)
- REG_S x26, PT_S10(sp)
- REG_S x27, PT_S11(sp)
- REG_S x28, PT_T3(sp)
- REG_S x29, PT_T4(sp)
- REG_S x30, PT_T5(sp)
- REG_S x31, PT_T6(sp)
+ save_from_x6_to_x31
REG_L s0, TASK_TI_KERNEL_SP(tp)
csrr s1, CSR_STATUS
@@ -505,23 +253,20 @@ restore_caller_reg:
REG_S s5, PT_TP(sp)
move a0, sp
tail handle_bad_stack
+SYM_CODE_END(handle_kernel_stack_overflow)
#endif
-END(handle_exception)
-
-ENTRY(ret_from_fork)
- la ra, ret_from_exception
- tail schedule_tail
-ENDPROC(ret_from_fork)
-
-ENTRY(ret_from_kernel_thread)
+SYM_CODE_START(ret_from_fork)
call schedule_tail
+ beqz s0, 1f /* not from kernel thread */
/* Call fn(arg) */
- la ra, ret_from_exception
move a0, s1
- jr s0
-ENDPROC(ret_from_kernel_thread)
-
+ jalr s0
+1:
+ move a0, sp /* pt_regs */
+ la ra, ret_from_exception
+ tail syscall_exit_to_user_mode
+SYM_CODE_END(ret_from_fork)
/*
* Integer register context switch
@@ -533,7 +278,7 @@ ENDPROC(ret_from_kernel_thread)
* The value of a0 and a1 must be preserved by this function, as that's how
* arguments are passed to schedule_tail.
*/
-ENTRY(__switch_to)
+SYM_FUNC_START(__switch_to)
/* Save context into prev->thread */
li a4, TASK_THREAD_RA
add a3, a0, a4
@@ -570,7 +315,7 @@ ENTRY(__switch_to)
/* The offset of thread_info in task_struct is zero. */
move tp, a1
ret
-ENDPROC(__switch_to)
+SYM_FUNC_END(__switch_to)
#ifndef CONFIG_MMU
#define do_page_fault do_trap_unknown
@@ -579,7 +324,7 @@ ENDPROC(__switch_to)
.section ".rodata"
.align LGREG
/* Exception vector table */
-ENTRY(excp_vect_table)
+SYM_CODE_START(excp_vect_table)
RISCV_PTR do_trap_insn_misaligned
ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault)
RISCV_PTR do_trap_insn_illegal
@@ -588,7 +333,7 @@ ENTRY(excp_vect_table)
RISCV_PTR do_trap_load_fault
RISCV_PTR do_trap_store_misaligned
RISCV_PTR do_trap_store_fault
- RISCV_PTR do_trap_ecall_u /* system call, gets intercepted */
+ RISCV_PTR do_trap_ecall_u /* system call */
RISCV_PTR do_trap_ecall_s
RISCV_PTR do_trap_unknown
RISCV_PTR do_trap_ecall_m
@@ -598,11 +343,11 @@ ENTRY(excp_vect_table)
RISCV_PTR do_trap_unknown
RISCV_PTR do_page_fault /* store page fault */
excp_vect_table_end:
-END(excp_vect_table)
+SYM_CODE_END(excp_vect_table)
#ifndef CONFIG_MMU
-ENTRY(__user_rt_sigreturn)
+SYM_CODE_START(__user_rt_sigreturn)
li a7, __NR_rt_sigreturn
scall
-END(__user_rt_sigreturn)
+SYM_CODE_END(__user_rt_sigreturn)
#endif
diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h
index 726731ada534..a556fdaafed9 100644
--- a/arch/riscv/kernel/head.h
+++ b/arch/riscv/kernel/head.h
@@ -10,7 +10,6 @@
extern atomic_t hart_lottery;
-asmlinkage void do_page_fault(struct pt_regs *regs);
asmlinkage void __init setup_vm(uintptr_t dtb_pa);
#ifdef CONFIG_XIP_KERNEL
asmlinkage void __init __copy_data(void);
diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h
index 7e2962ef73f9..15616155008c 100644
--- a/arch/riscv/kernel/image-vars.h
+++ b/arch/riscv/kernel/image-vars.h
@@ -23,8 +23,6 @@
* linked at. The routines below are all implemented in assembler in a
* position independent manner
*/
-__efistub_strcmp = strcmp;
-
__efistub__start = _start;
__efistub__start_kernel = _start_kernel;
__efistub__end = _end;
diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S
index 125de818d1ba..669b8697aa38 100644
--- a/arch/riscv/kernel/mcount-dyn.S
+++ b/arch/riscv/kernel/mcount-dyn.S
@@ -66,66 +66,17 @@
REG_S x3, PT_GP(sp)
REG_S x4, PT_TP(sp)
REG_S x5, PT_T0(sp)
- REG_S x6, PT_T1(sp)
- REG_S x7, PT_T2(sp)
- REG_S x8, PT_S0(sp)
- REG_S x9, PT_S1(sp)
- REG_S x10, PT_A0(sp)
- REG_S x11, PT_A1(sp)
- REG_S x12, PT_A2(sp)
- REG_S x13, PT_A3(sp)
- REG_S x14, PT_A4(sp)
- REG_S x15, PT_A5(sp)
- REG_S x16, PT_A6(sp)
- REG_S x17, PT_A7(sp)
- REG_S x18, PT_S2(sp)
- REG_S x19, PT_S3(sp)
- REG_S x20, PT_S4(sp)
- REG_S x21, PT_S5(sp)
- REG_S x22, PT_S6(sp)
- REG_S x23, PT_S7(sp)
- REG_S x24, PT_S8(sp)
- REG_S x25, PT_S9(sp)
- REG_S x26, PT_S10(sp)
- REG_S x27, PT_S11(sp)
- REG_S x28, PT_T3(sp)
- REG_S x29, PT_T4(sp)
- REG_S x30, PT_T5(sp)
- REG_S x31, PT_T6(sp)
+ save_from_x6_to_x31
.endm
.macro RESTORE_ALL
- REG_L t0, PT_EPC(sp)
REG_L x1, PT_RA(sp)
REG_L x2, PT_SP(sp)
REG_L x3, PT_GP(sp)
REG_L x4, PT_TP(sp)
- REG_L x6, PT_T1(sp)
- REG_L x7, PT_T2(sp)
- REG_L x8, PT_S0(sp)
- REG_L x9, PT_S1(sp)
- REG_L x10, PT_A0(sp)
- REG_L x11, PT_A1(sp)
- REG_L x12, PT_A2(sp)
- REG_L x13, PT_A3(sp)
- REG_L x14, PT_A4(sp)
- REG_L x15, PT_A5(sp)
- REG_L x16, PT_A6(sp)
- REG_L x17, PT_A7(sp)
- REG_L x18, PT_S2(sp)
- REG_L x19, PT_S3(sp)
- REG_L x20, PT_S4(sp)
- REG_L x21, PT_S5(sp)
- REG_L x22, PT_S6(sp)
- REG_L x23, PT_S7(sp)
- REG_L x24, PT_S8(sp)
- REG_L x25, PT_S9(sp)
- REG_L x26, PT_S10(sp)
- REG_L x27, PT_S11(sp)
- REG_L x28, PT_T3(sp)
- REG_L x29, PT_T4(sp)
- REG_L x30, PT_T5(sp)
- REG_L x31, PT_T6(sp)
+ /* Restore t0 with PT_EPC */
+ REG_L x5, PT_EPC(sp)
+ restore_from_x6_to_x31
addi sp, sp, PT_SIZE_ON_STACK
.endm
diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile
new file mode 100644
index 000000000000..5d7cb991f2b8
--- /dev/null
+++ b/arch/riscv/kernel/pi/Makefile
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+# This file was copied from arm64/kernel/pi/Makefile.
+
+KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \
+ -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \
+ $(call cc-option,-mbranch-protection=none) \
+ -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \
+ -D__DISABLE_EXPORTS -ffreestanding \
+ -fno-asynchronous-unwind-tables -fno-unwind-tables \
+ $(call cc-option,-fno-addrsig)
+
+KBUILD_CFLAGS += -mcmodel=medany
+
+CFLAGS_cmdline_early.o += -D__NO_FORTIFY
+CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY
+
+GCOV_PROFILE := n
+KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
+UBSAN_SANITIZE := n
+KCOV_INSTRUMENT := n
+
+$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \
+ --remove-section=.note.gnu.property \
+ --prefix-alloc-sections=.init
+$(obj)/%.pi.o: $(obj)/%.o FORCE
+ $(call if_changed,objcopy)
+
+$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
+$(obj)/string.o: $(srctree)/lib/string.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
+$(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE
+ $(call if_changed_rule,cc_o_c)
+
+obj-y := cmdline_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o
+extra-y := $(patsubst %.pi.o,%.o,$(obj-y))
diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c
new file mode 100644
index 000000000000..05652d13c746
--- /dev/null
+++ b/arch/riscv/kernel/pi/cmdline_early.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/libfdt.h>
+#include <linux/string.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+
+static char early_cmdline[COMMAND_LINE_SIZE];
+
+/*
+ * Declare the functions that are exported (but prefixed) here so that LLVM
+ * does not complain it lacks the 'static' keyword (which, if added, makes
+ * LLVM complain because the function is actually unused in this file).
+ */
+u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa);
+
+static char *get_early_cmdline(uintptr_t dtb_pa)
+{
+ const char *fdt_cmdline = NULL;
+ unsigned int fdt_cmdline_size = 0;
+ int chosen_node;
+
+ if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
+ chosen_node = fdt_path_offset((void *)dtb_pa, "/chosen");
+ if (chosen_node >= 0) {
+ fdt_cmdline = fdt_getprop((void *)dtb_pa, chosen_node,
+ "bootargs", NULL);
+ if (fdt_cmdline) {
+ fdt_cmdline_size = strlen(fdt_cmdline);
+ strscpy(early_cmdline, fdt_cmdline,
+ COMMAND_LINE_SIZE);
+ }
+ }
+ }
+
+ if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) ||
+ IS_ENABLED(CONFIG_CMDLINE_FORCE) ||
+ fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) {
+ strncat(early_cmdline, CONFIG_CMDLINE,
+ COMMAND_LINE_SIZE - fdt_cmdline_size);
+ }
+
+ return early_cmdline;
+}
+
+static u64 match_noXlvl(char *cmdline)
+{
+ if (strstr(cmdline, "no4lvl"))
+ return SATP_MODE_48;
+ else if (strstr(cmdline, "no5lvl"))
+ return SATP_MODE_57;
+
+ return 0;
+}
+
+u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa)
+{
+ char *cmdline = get_early_cmdline(dtb_pa);
+
+ return match_noXlvl(cmdline);
+}
diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
index 774ffde386ab..e2a060066730 100644
--- a/arch/riscv/kernel/process.c
+++ b/arch/riscv/kernel/process.c
@@ -34,7 +34,6 @@ EXPORT_SYMBOL(__stack_chk_guard);
#endif
extern asmlinkage void ret_from_fork(void);
-extern asmlinkage void ret_from_kernel_thread(void);
void arch_cpu_idle(void)
{
@@ -173,7 +172,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
/* Supervisor/Machine, irqs on: */
childregs->status = SR_PP | SR_PIE;
- p->thread.ra = (unsigned long)ret_from_kernel_thread;
p->thread.s[0] = (unsigned long)args->fn;
p->thread.s[1] = (unsigned long)args->fn_arg;
} else {
@@ -183,8 +181,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
if (clone_flags & CLONE_SETTLS)
childregs->tp = tls;
childregs->a0 = 0; /* Return value of fork() */
- p->thread.ra = (unsigned long)ret_from_fork;
+ p->thread.s[0] = 0;
}
+ p->thread.ra = (unsigned long)ret_from_fork;
p->thread.sp = (unsigned long)childregs; /* kernel sp */
return 0;
}
diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c
index 2ae8280ae475..23c48b14a0e7 100644
--- a/arch/riscv/kernel/ptrace.c
+++ b/arch/riscv/kernel/ptrace.c
@@ -19,9 +19,6 @@
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
-#define CREATE_TRACE_POINTS
-#include <trace/events/syscalls.h>
-
enum riscv_regset {
REGSET_X,
#ifdef CONFIG_FPU
@@ -212,7 +209,6 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
void ptrace_disable(struct task_struct *child)
{
- clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
}
long arch_ptrace(struct task_struct *child, long request,
@@ -229,46 +225,6 @@ long arch_ptrace(struct task_struct *child, long request,
return ret;
}
-/*
- * Allows PTRACE_SYSCALL to work. These are called from entry.S in
- * {handle,ret_from}_syscall.
- */
-__visible int do_syscall_trace_enter(struct pt_regs *regs)
-{
- if (test_thread_flag(TIF_SYSCALL_TRACE))
- if (ptrace_report_syscall_entry(regs))
- return -1;
-
- /*
- * Do the secure computing after ptrace; failures should be fast.
- * If this fails we might have return value in a0 from seccomp
- * (via SECCOMP_RET_ERRNO/TRACE).
- */
- if (secure_computing() == -1)
- return -1;
-
-#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
- if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
- trace_sys_enter(regs, syscall_get_nr(current, regs));
-#endif
-
- audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3);
- return 0;
-}
-
-__visible void do_syscall_trace_exit(struct pt_regs *regs)
-{
- audit_syscall_exit(regs);
-
- if (test_thread_flag(TIF_SYSCALL_TRACE))
- ptrace_report_syscall_exit(regs, 0);
-
-#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS
- if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
- trace_sys_exit(regs, regs_return_value(regs));
-#endif
-}
-
#ifdef CONFIG_COMPAT
static int compat_riscv_gpr_get(struct task_struct *target,
const struct user_regset *regset,
diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c
index 3020f44dcf58..36b026057503 100644
--- a/arch/riscv/kernel/setup.c
+++ b/arch/riscv/kernel/setup.c
@@ -293,7 +293,7 @@ void __init setup_arch(char **cmdline_p)
setup_smp();
#endif
- riscv_init_cbom_blocksize();
+ riscv_init_cbo_blocksizes();
riscv_fill_hwcap();
apply_boot_alternatives();
if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) &&
diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c
index dee66c9290cc..9aff9d720590 100644
--- a/arch/riscv/kernel/signal.c
+++ b/arch/riscv/kernel/signal.c
@@ -12,6 +12,7 @@
#include <linux/syscalls.h>
#include <linux/resume_user_mode.h>
#include <linux/linkage.h>
+#include <linux/entry-common.h>
#include <asm/ucontext.h>
#include <asm/vdso.h>
@@ -281,7 +282,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs)
signal_setup_done(ret, ksig, 0);
}
-static void do_signal(struct pt_regs *regs)
+void arch_do_signal_or_restart(struct pt_regs *regs)
{
struct ksignal ksig;
@@ -318,29 +319,3 @@ static void do_signal(struct pt_regs *regs)
*/
restore_saved_sigmask();
}
-
-/*
- * Handle any pending work on the resume-to-userspace path, as indicated by
- * _TIF_WORK_MASK. Entered from assembly with IRQs off.
- */
-asmlinkage __visible void do_work_pending(struct pt_regs *regs,
- unsigned long thread_info_flags)
-{
- do {
- if (thread_info_flags & _TIF_NEED_RESCHED) {
- schedule();
- } else {
- local_irq_enable();
- if (thread_info_flags & _TIF_UPROBE)
- uprobe_notify_resume(regs);
- /* Handle pending signal delivery */
- if (thread_info_flags & (_TIF_SIGPENDING |
- _TIF_NOTIFY_SIGNAL))
- do_signal(regs);
- if (thread_info_flags & _TIF_NOTIFY_RESUME)
- resume_user_mode_work(regs);
- }
- local_irq_disable();
- thread_info_flags = read_thread_flags();
- } while (thread_info_flags & _TIF_WORK_MASK);
-}
diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c
index 00b53913d4c6..445a4efee267 100644
--- a/arch/riscv/kernel/smpboot.c
+++ b/arch/riscv/kernel/smpboot.c
@@ -167,6 +167,7 @@ asmlinkage __visible void smp_callin(void)
notify_cpu_starting(curr_cpuid);
numa_add_cpu(curr_cpuid);
set_cpu_online(curr_cpuid, 1);
+ probe_vendor_features(curr_cpuid);
/*
* Remote TLB flushes are ignored while the CPU is offline, so emit
diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c
index 5d3f2fbeb33c..5db29683ebee 100644
--- a/arch/riscv/kernel/sys_riscv.c
+++ b/arch/riscv/kernel/sys_riscv.c
@@ -6,9 +6,15 @@
*/
#include <linux/syscalls.h>
-#include <asm/unistd.h>
#include <asm/cacheflush.h>
+#include <asm/cpufeature.h>
+#include <asm/hwprobe.h>
+#include <asm/sbi.h>
+#include <asm/switch_to.h>
+#include <asm/uaccess.h>
+#include <asm/unistd.h>
#include <asm-generic/mman-common.h>
+#include <vdso/vsyscall.h>
static long riscv_sys_mmap(unsigned long addr, unsigned long len,
unsigned long prot, unsigned long flags,
@@ -69,3 +75,225 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end,
return 0;
}
+
+/*
+ * The hwprobe interface, for allowing userspace to probe to see which features
+ * are supported by the hardware. See Documentation/riscv/hwprobe.rst for more
+ * details.
+ */
+static void hwprobe_arch_id(struct riscv_hwprobe *pair,
+ const struct cpumask *cpus)
+{
+ u64 id = -1ULL;
+ bool first = true;
+ int cpu;
+
+ for_each_cpu(cpu, cpus) {
+ u64 cpu_id;
+
+ switch (pair->key) {
+ case RISCV_HWPROBE_KEY_MVENDORID:
+ cpu_id = riscv_cached_mvendorid(cpu);
+ break;
+ case RISCV_HWPROBE_KEY_MIMPID:
+ cpu_id = riscv_cached_mimpid(cpu);
+ break;
+ case RISCV_HWPROBE_KEY_MARCHID:
+ cpu_id = riscv_cached_marchid(cpu);
+ break;
+ }
+
+ if (first) {
+ id = cpu_id;
+ first = false;
+ }
+
+ /*
+ * If there's a mismatch for the given set, return -1 in the
+ * value.
+ */
+ if (id != cpu_id) {
+ id = -1ULL;
+ break;
+ }
+ }
+
+ pair->value = id;
+}
+
+static u64 hwprobe_misaligned(const struct cpumask *cpus)
+{
+ int cpu;
+ u64 perf = -1ULL;
+
+ for_each_cpu(cpu, cpus) {
+ int this_perf = per_cpu(misaligned_access_speed, cpu);
+
+ if (perf == -1ULL)
+ perf = this_perf;
+
+ if (perf != this_perf) {
+ perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+ break;
+ }
+ }
+
+ if (perf == -1ULL)
+ return RISCV_HWPROBE_MISALIGNED_UNKNOWN;
+
+ return perf;
+}
+
+static void hwprobe_one_pair(struct riscv_hwprobe *pair,
+ const struct cpumask *cpus)
+{
+ switch (pair->key) {
+ case RISCV_HWPROBE_KEY_MVENDORID:
+ case RISCV_HWPROBE_KEY_MARCHID:
+ case RISCV_HWPROBE_KEY_MIMPID:
+ hwprobe_arch_id(pair, cpus);
+ break;
+ /*
+ * The kernel already assumes that the base single-letter ISA
+ * extensions are supported on all harts, and only supports the
+ * IMA base, so just cheat a bit here and tell that to
+ * userspace.
+ */
+ case RISCV_HWPROBE_KEY_BASE_BEHAVIOR:
+ pair->value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA;
+ break;
+
+ case RISCV_HWPROBE_KEY_IMA_EXT_0:
+ pair->value = 0;
+ if (has_fpu())
+ pair->value |= RISCV_HWPROBE_IMA_FD;
+
+ if (riscv_isa_extension_available(NULL, c))
+ pair->value |= RISCV_HWPROBE_IMA_C;
+
+ break;
+
+ case RISCV_HWPROBE_KEY_CPUPERF_0:
+ pair->value = hwprobe_misaligned(cpus);
+ break;
+
+ /*
+ * For forward compatibility, unknown keys don't fail the whole
+ * call, but get their element key set to -1 and value set to 0
+ * indicating they're unrecognized.
+ */
+ default:
+ pair->key = -1;
+ pair->value = 0;
+ break;
+ }
+}
+
+static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs,
+ size_t pair_count, size_t cpu_count,
+ unsigned long __user *cpus_user,
+ unsigned int flags)
+{
+ size_t out;
+ int ret;
+ cpumask_t cpus;
+
+ /* Check the reserved flags. */
+ if (flags != 0)
+ return -EINVAL;
+
+ /*
+ * The interface supports taking in a CPU mask, and returns values that
+ * are consistent across that mask. Allow userspace to specify NULL and
+ * 0 as a shortcut to all online CPUs.
+ */
+ cpumask_clear(&cpus);
+ if (!cpu_count && !cpus_user) {
+ cpumask_copy(&cpus, cpu_online_mask);
+ } else {
+ if (cpu_count > cpumask_size())
+ cpu_count = cpumask_size();
+
+ ret = copy_from_user(&cpus, cpus_user, cpu_count);
+ if (ret)
+ return -EFAULT;
+
+ /*
+ * Userspace must provide at least one online CPU, without that
+ * there's no way to define what is supported.
+ */
+ cpumask_and(&cpus, &cpus, cpu_online_mask);
+ if (cpumask_empty(&cpus))
+ return -EINVAL;
+ }
+
+ for (out = 0; out < pair_count; out++, pairs++) {
+ struct riscv_hwprobe pair;
+
+ if (get_user(pair.key, &pairs->key))
+ return -EFAULT;
+
+ pair.value = 0;
+ hwprobe_one_pair(&pair, &cpus);
+ ret = put_user(pair.key, &pairs->key);
+ if (ret == 0)
+ ret = put_user(pair.value, &pairs->value);
+
+ if (ret)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_MMU
+
+static int __init init_hwprobe_vdso_data(void)
+{
+ struct vdso_data *vd = __arch_get_k_vdso_data();
+ struct arch_vdso_data *avd = &vd->arch_data;
+ u64 id_bitsmash = 0;
+ struct riscv_hwprobe pair;
+ int key;
+
+ /*
+ * Initialize vDSO data with the answers for the "all CPUs" case, to
+ * save a syscall in the common case.
+ */
+ for (key = 0; key <= RISCV_HWPROBE_MAX_KEY; key++) {
+ pair.key = key;
+ hwprobe_one_pair(&pair, cpu_online_mask);
+
+ WARN_ON_ONCE(pair.key < 0);
+
+ avd->all_cpu_hwprobe_values[key] = pair.value;
+ /*
+ * Smash together the vendor, arch, and impl IDs to see if
+ * they're all 0 or any negative.
+ */
+ if (key <= RISCV_HWPROBE_KEY_MIMPID)
+ id_bitsmash |= pair.value;
+ }
+
+ /*
+ * If the arch, vendor, and implementation ID are all the same across
+ * all harts, then assume all CPUs are the same, and allow the vDSO to
+ * answer queries for arbitrary masks. However if all values are 0 (not
+ * populated) or any value returns -1 (varies across CPUs), then the
+ * vDSO should defer to the kernel for exotic cpu masks.
+ */
+ avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1;
+ return 0;
+}
+
+arch_initcall_sync(init_hwprobe_vdso_data);
+
+#endif /* CONFIG_MMU */
+
+SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs,
+ size_t, pair_count, size_t, cpu_count, unsigned long __user *,
+ cpus, unsigned int, flags)
+{
+ return do_riscv_hwprobe(pairs, pair_count, cpu_count,
+ cpus, flags);
+}
diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c
deleted file mode 100644
index 095ac976d7da..000000000000
--- a/arch/riscv/kernel/trace_irq.c
+++ /dev/null
@@ -1,27 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
- */
-
-#include <linux/irqflags.h>
-#include <linux/kprobes.h>
-#include "trace_irq.h"
-
-/*
- * trace_hardirqs_on/off require the caller to setup frame pointer properly.
- * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel.
- * Here we add one extra level so they can be safely called by low
- * level entry code which $fp is used for other purpose.
- */
-
-void __trace_hardirqs_on(void)
-{
- trace_hardirqs_on();
-}
-NOKPROBE_SYMBOL(__trace_hardirqs_on);
-
-void __trace_hardirqs_off(void)
-{
- trace_hardirqs_off();
-}
-NOKPROBE_SYMBOL(__trace_hardirqs_off);
diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h
deleted file mode 100644
index 99fe67377e5e..000000000000
--- a/arch/riscv/kernel/trace_irq.h
+++ /dev/null
@@ -1,11 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com>
- */
-#ifndef __TRACE_IRQ_H
-#define __TRACE_IRQ_H
-
-void __trace_hardirqs_on(void);
-void __trace_hardirqs_off(void);
-
-#endif /* __TRACE_IRQ_H */
diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c
index f6fda94e8e59..8c258b78c925 100644
--- a/arch/riscv/kernel/traps.c
+++ b/arch/riscv/kernel/traps.c
@@ -17,12 +17,14 @@
#include <linux/module.h>
#include <linux/irq.h>
#include <linux/kexec.h>
+#include <linux/entry-common.h>
#include <asm/asm-prototypes.h>
#include <asm/bug.h>
#include <asm/csr.h>
#include <asm/processor.h>
#include <asm/ptrace.h>
+#include <asm/syscall.h>
#include <asm/thread_info.h>
int show_unhandled_signals = 1;
@@ -119,14 +121,22 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code,
}
#if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_RISCV_ALTERNATIVE)
-#define __trap_section __section(".xip.traps")
+#define __trap_section __noinstr_section(".xip.traps")
#else
-#define __trap_section
+#define __trap_section noinstr
#endif
-#define DO_ERROR_INFO(name, signo, code, str) \
-asmlinkage __visible __trap_section void name(struct pt_regs *regs) \
-{ \
- do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \
+#define DO_ERROR_INFO(name, signo, code, str) \
+asmlinkage __visible __trap_section void name(struct pt_regs *regs) \
+{ \
+ if (user_mode(regs)) { \
+ irqentry_enter_from_user_mode(regs); \
+ do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \
+ irqentry_exit_to_user_mode(regs); \
+ } else { \
+ irqentry_state_t state = irqentry_nmi_enter(regs); \
+ do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \
+ irqentry_nmi_exit(regs, state); \
+ } \
}
DO_ERROR_INFO(do_trap_unknown,
@@ -148,26 +158,50 @@ DO_ERROR_INFO(do_trap_store_misaligned,
int handle_misaligned_load(struct pt_regs *regs);
int handle_misaligned_store(struct pt_regs *regs);
-asmlinkage void __trap_section do_trap_load_misaligned(struct pt_regs *regs)
+asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs)
{
- if (!handle_misaligned_load(regs))
- return;
- do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
- "Oops - load address misaligned");
+ if (user_mode(regs)) {
+ irqentry_enter_from_user_mode(regs);
+
+ if (handle_misaligned_load(regs))
+ do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
+ "Oops - load address misaligned");
+
+ irqentry_exit_to_user_mode(regs);
+ } else {
+ irqentry_state_t state = irqentry_nmi_enter(regs);
+
+ if (handle_misaligned_load(regs))
+ do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
+ "Oops - load address misaligned");
+
+ irqentry_nmi_exit(regs, state);
+ }
}
-asmlinkage void __trap_section do_trap_store_misaligned(struct pt_regs *regs)
+asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs)
{
- if (!handle_misaligned_store(regs))
- return;
- do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
- "Oops - store (or AMO) address misaligned");
+ if (user_mode(regs)) {
+ irqentry_enter_from_user_mode(regs);
+
+ if (handle_misaligned_store(regs))
+ do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
+ "Oops - store (or AMO) address misaligned");
+
+ irqentry_exit_to_user_mode(regs);
+ } else {
+ irqentry_state_t state = irqentry_nmi_enter(regs);
+
+ if (handle_misaligned_store(regs))
+ do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc,
+ "Oops - store (or AMO) address misaligned");
+
+ irqentry_nmi_exit(regs, state);
+ }
}
#endif
DO_ERROR_INFO(do_trap_store_fault,
SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault");
-DO_ERROR_INFO(do_trap_ecall_u,
- SIGILL, ILL_ILLTRP, "environment call from U-mode");
DO_ERROR_INFO(do_trap_ecall_s,
SIGILL, ILL_ILLTRP, "environment call from S-mode");
DO_ERROR_INFO(do_trap_ecall_m,
@@ -183,7 +217,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc)
return GET_INSN_LENGTH(insn);
}
-asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
+void handle_break(struct pt_regs *regs)
{
#ifdef CONFIG_KPROBES
if (kprobe_single_step_handler(regs))
@@ -213,7 +247,77 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
else
die(regs, "Kernel BUG");
}
-NOKPROBE_SYMBOL(do_trap_break);
+
+asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs)
+{
+ if (user_mode(regs)) {
+ irqentry_enter_from_user_mode(regs);
+
+ handle_break(regs);
+
+ irqentry_exit_to_user_mode(regs);
+ } else {
+ irqentry_state_t state = irqentry_nmi_enter(regs);
+
+ handle_break(regs);
+
+ irqentry_nmi_exit(regs, state);
+ }
+}
+
+asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs)
+{
+ if (user_mode(regs)) {
+ ulong syscall = regs->a7;
+
+ regs->epc += 4;
+ regs->orig_a0 = regs->a0;
+
+ syscall = syscall_enter_from_user_mode(regs, syscall);
+
+ if (syscall < NR_syscalls)
+ syscall_handler(regs, syscall);
+ else
+ regs->a0 = -ENOSYS;
+
+ syscall_exit_to_user_mode(regs);
+ } else {
+ irqentry_state_t state = irqentry_nmi_enter(regs);
+
+ do_trap_error(regs, SIGILL, ILL_ILLTRP, regs->epc,
+ "Oops - environment call from U-mode");
+
+ irqentry_nmi_exit(regs, state);
+ }
+
+}
+
+#ifdef CONFIG_MMU
+asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs)
+{
+ irqentry_state_t state = irqentry_enter(regs);
+
+ handle_page_fault(regs);
+
+ local_irq_disable();
+
+ irqentry_exit(regs, state);
+}
+#endif
+
+asmlinkage __visible noinstr void do_irq(struct pt_regs *regs)
+{
+ struct pt_regs *old_regs;
+ irqentry_state_t state = irqentry_enter(regs);
+
+ irq_enter_rcu();
+ old_regs = set_irq_regs(regs);
+ handle_arch_irq(regs);
+ set_irq_regs(old_regs);
+ irq_exit_rcu();
+
+ irqentry_exit(regs, state);
+}
#ifdef CONFIG_GENERIC_BUG
int is_valid_bugaddr(unsigned long pc)
diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c
index cc2d1e8c8736..9a68e7eaae4d 100644
--- a/arch/riscv/kernel/vdso.c
+++ b/arch/riscv/kernel/vdso.c
@@ -14,13 +14,7 @@
#include <asm/page.h>
#include <asm/vdso.h>
#include <linux/time_namespace.h>
-
-#ifdef CONFIG_GENERIC_TIME_VSYSCALL
#include <vdso/datapage.h>
-#else
-struct vdso_data {
-};
-#endif
enum vvar_pages {
VVAR_DATA_PAGE_OFFSET,
diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
index a04b3bc35ca2..6b1dba11bf6d 100644
--- a/arch/riscv/kernel/vdso/Makefile
+++ b/arch/riscv/kernel/vdso/Makefile
@@ -10,6 +10,8 @@ vdso-syms += vgettimeofday
endif
vdso-syms += getcpu
vdso-syms += flush_icache
+vdso-syms += hwprobe
+vdso-syms += sys_hwprobe
# Files to link into the vdso
obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
@@ -21,6 +23,8 @@ ifneq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
endif
+CFLAGS_hwprobe.o += -fPIC
+
# Build rules
targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c
new file mode 100644
index 000000000000..d40bec6ac078
--- /dev/null
+++ b/arch/riscv/kernel/vdso/hwprobe.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2023 Rivos, Inc
+ */
+
+#include <linux/types.h>
+#include <vdso/datapage.h>
+#include <vdso/helpers.h>
+
+extern int riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpu_count, unsigned long *cpus,
+ unsigned int flags);
+
+/* Add a prototype to avoid -Wmissing-prototypes warning. */
+int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpu_count, unsigned long *cpus,
+ unsigned int flags);
+
+int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpu_count, unsigned long *cpus,
+ unsigned int flags)
+{
+ const struct vdso_data *vd = __arch_get_vdso_data();
+ const struct arch_vdso_data *avd = &vd->arch_data;
+ bool all_cpus = !cpu_count && !cpus;
+ struct riscv_hwprobe *p = pairs;
+ struct riscv_hwprobe *end = pairs + pair_count;
+
+ /*
+ * Defer to the syscall for exotic requests. The vdso has answers
+ * stashed away only for the "all cpus" case. If all CPUs are
+ * homogeneous, then this function can handle requests for arbitrary
+ * masks.
+ */
+ if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus))
+ return riscv_hwprobe(pairs, pair_count, cpu_count, cpus, flags);
+
+ /* This is something we can handle, fill out the pairs. */
+ while (p < end) {
+ if (p->key <= RISCV_HWPROBE_MAX_KEY) {
+ p->value = avd->all_cpu_hwprobe_values[p->key];
+
+ } else {
+ p->key = -1;
+ p->value = 0;
+ }
+
+ p++;
+ }
+
+ return 0;
+}
diff --git a/arch/riscv/kernel/vdso/sys_hwprobe.S b/arch/riscv/kernel/vdso/sys_hwprobe.S
new file mode 100644
index 000000000000..4e704146c77a
--- /dev/null
+++ b/arch/riscv/kernel/vdso/sys_hwprobe.S
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2023 Rivos, Inc */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+
+.text
+ENTRY(riscv_hwprobe)
+ .cfi_startproc
+ li a7, __NR_riscv_hwprobe
+ ecall
+ ret
+
+ .cfi_endproc
+ENDPROC(riscv_hwprobe)
diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S
index 4a0606633290..82ce64900f3d 100644
--- a/arch/riscv/kernel/vdso/vdso.lds.S
+++ b/arch/riscv/kernel/vdso/vdso.lds.S
@@ -82,6 +82,9 @@ VERSION
#endif
__vdso_getcpu;
__vdso_flush_icache;
+#ifndef COMPAT_VDSO
+ __vdso_riscv_hwprobe;
+#endif
local: *;
};
}
diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S
index 53a8ad65b255..305877d85e96 100644
--- a/arch/riscv/kernel/vmlinux.lds.S
+++ b/arch/riscv/kernel/vmlinux.lds.S
@@ -27,9 +27,6 @@ ENTRY(_start)
jiffies = jiffies_64;
-PECOFF_SECTION_ALIGNMENT = 0x1000;
-PECOFF_FILE_ALIGNMENT = 0x200;
-
SECTIONS
{
/* Beginning of code and text segment */
@@ -86,6 +83,14 @@ SECTIONS
/* Start of init data section */
__init_data_begin = .;
INIT_DATA_SECTION(16)
+
+ /* Those sections result from the compilation of kernel/pi/string.c */
+ .init.pidata : {
+ *(.init.srodata.cst8*)
+ *(.init__bug_table*)
+ *(.init.sdata*)
+ }
+
.init.bss : {
*(.init.bss) /* from the EFI stub */
}
@@ -99,10 +104,6 @@ SECTIONS
*(.rel.dyn*)
}
- .rela.dyn : {
- *(.rela*)
- }
-
__init_data_end = .;
. = ALIGN(8);
@@ -129,9 +130,28 @@ SECTIONS
*(.sdata*)
}
+ .rela.dyn : ALIGN(8) {
+ __rela_dyn_start = .;
+ *(.rela .rela*)
+ __rela_dyn_end = .;
+ }
+
+ .got : { *(.got*) }
+
+#ifdef CONFIG_RELOCATABLE
+ .data.rel : { *(.data.rel*) }
+ .plt : { *(.plt) }
+ .dynamic : { *(.dynamic) }
+ .dynsym : { *(.dynsym) }
+ .dynstr : { *(.dynstr) }
+ .hash : { *(.hash) }
+ .gnu.hash : { *(.gnu.hash) }
+#endif
+
#ifdef CONFIG_EFI
.pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); }
__pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end);
+ __pecoff_data_raw_end = ABSOLUTE(.);
#endif
/* End of data section */
@@ -142,6 +162,7 @@ SECTIONS
#ifdef CONFIG_EFI
. = ALIGN(PECOFF_SECTION_ALIGNMENT);
__pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end);
+ __pecoff_data_virt_end = ABSOLUTE(.);
#endif
_end = .;
diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c
index 7d010b0be54e..6adb1b6112a1 100644
--- a/arch/riscv/kvm/vcpu.c
+++ b/arch/riscv/kvm/vcpu.c
@@ -63,6 +63,7 @@ static const unsigned long kvm_isa_ext_arr[] = {
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZICBOM),
+ KVM_ISA_EXT_ARR(ZICBOZ),
};
static unsigned long kvm_riscv_vcpu_base2isa_ext(unsigned long base_ext)
@@ -283,6 +284,11 @@ static int kvm_riscv_vcpu_get_reg_config(struct kvm_vcpu *vcpu,
return -EINVAL;
reg_val = riscv_cbom_block_size;
break;
+ case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+ if (!riscv_isa_extension_available(vcpu->arch.isa, ZICBOZ))
+ return -EINVAL;
+ reg_val = riscv_cboz_block_size;
+ break;
case KVM_REG_RISCV_CONFIG_REG(mvendorid):
reg_val = vcpu->arch.mvendorid;
break;
@@ -354,6 +360,8 @@ static int kvm_riscv_vcpu_set_reg_config(struct kvm_vcpu *vcpu,
break;
case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
return -EOPNOTSUPP;
+ case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+ return -EOPNOTSUPP;
case KVM_REG_RISCV_CONFIG_REG(mvendorid):
if (!vcpu->arch.ran_atleast_once)
vcpu->arch.mvendorid = reg_val;
@@ -865,6 +873,9 @@ static void kvm_riscv_vcpu_update_config(const unsigned long *isa)
if (riscv_isa_extension_available(isa, ZICBOM))
henvcfg |= (ENVCFG_CBIE | ENVCFG_CBCFE);
+ if (riscv_isa_extension_available(isa, ZICBOZ))
+ henvcfg |= ENVCFG_CBZE;
+
csr_write(CSR_HENVCFG, henvcfg);
#ifdef CONFIG_32BIT
csr_write(CSR_HENVCFGH, henvcfg >> 32);
diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile
index 6c74b0bedd60..26cb2502ecf8 100644
--- a/arch/riscv/lib/Makefile
+++ b/arch/riscv/lib/Makefile
@@ -8,5 +8,6 @@ lib-y += strlen.o
lib-y += strncmp.o
lib-$(CONFIG_MMU) += uaccess.o
lib-$(CONFIG_64BIT) += tishift.o
+lib-$(CONFIG_RISCV_ISA_ZICBOZ) += clear_page.o
obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/riscv/lib/clear_page.S b/arch/riscv/lib/clear_page.S
new file mode 100644
index 000000000000..d7a256eb53f4
--- /dev/null
+++ b/arch/riscv/lib/clear_page.S
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/alternative-macros.h>
+#include <asm-generic/export.h>
+#include <asm/hwcap.h>
+#include <asm/insn-def.h>
+#include <asm/page.h>
+
+#define CBOZ_ALT(order, old, new) \
+ ALTERNATIVE(old, new, 0, \
+ ((order) << 16) | RISCV_ISA_EXT_ZICBOZ, \
+ CONFIG_RISCV_ISA_ZICBOZ)
+
+/* void clear_page(void *page) */
+SYM_FUNC_START(clear_page)
+ li a2, PAGE_SIZE
+
+ /*
+ * If Zicboz isn't present, or somehow has a block
+ * size larger than 4K, then fallback to memset.
+ */
+ CBOZ_ALT(12, "j .Lno_zicboz", "nop")
+
+ lw a1, riscv_cboz_block_size
+ add a2, a0, a2
+.Lzero_loop:
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBOZ_ALT(11, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBOZ_ALT(10, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBOZ_ALT(9, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBOZ_ALT(8, "bltu a0, a2, .Lzero_loop; ret", "nop; nop")
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBO_zero(a0)
+ add a0, a0, a1
+ CBO_zero(a0)
+ add a0, a0, a1
+ bltu a0, a2, .Lzero_loop
+ ret
+.Lno_zicboz:
+ li a1, 0
+ tail __memset
+SYM_FUNC_END(clear_page)
+EXPORT_SYMBOL(clear_page)
diff --git a/arch/riscv/lib/memcpy.S b/arch/riscv/lib/memcpy.S
index 51ab716253fa..1a40d01a9543 100644
--- a/arch/riscv/lib/memcpy.S
+++ b/arch/riscv/lib/memcpy.S
@@ -106,3 +106,5 @@ WEAK(memcpy)
6:
ret
END(__memcpy)
+SYM_FUNC_ALIAS(__pi_memcpy, __memcpy)
+SYM_FUNC_ALIAS(__pi___memcpy, __memcpy)
diff --git a/arch/riscv/lib/memmove.S b/arch/riscv/lib/memmove.S
index e0609e1f0864..838ff2022fe3 100644
--- a/arch/riscv/lib/memmove.S
+++ b/arch/riscv/lib/memmove.S
@@ -314,3 +314,5 @@ return_from_memmove:
SYM_FUNC_END(memmove)
SYM_FUNC_END(__memmove)
+SYM_FUNC_ALIAS(__pi_memmove, __memmove)
+SYM_FUNC_ALIAS(__pi___memmove, __memmove)
diff --git a/arch/riscv/lib/strcmp.S b/arch/riscv/lib/strcmp.S
index c42a8412547f..687b2bea5c43 100644
--- a/arch/riscv/lib/strcmp.S
+++ b/arch/riscv/lib/strcmp.S
@@ -2,9 +2,8 @@
#include <linux/linkage.h>
#include <asm/asm.h>
-#include <asm-generic/export.h>
#include <asm/alternative-macros.h>
-#include <asm/errata_list.h>
+#include <asm/hwcap.h>
/* int strcmp(const char *cs, const char *ct) */
SYM_FUNC_START(strcmp)
diff --git a/arch/riscv/lib/strlen.S b/arch/riscv/lib/strlen.S
index 15bb8f3aa959..8ae3064e45ff 100644
--- a/arch/riscv/lib/strlen.S
+++ b/arch/riscv/lib/strlen.S
@@ -2,9 +2,8 @@
#include <linux/linkage.h>
#include <asm/asm.h>
-#include <asm-generic/export.h>
#include <asm/alternative-macros.h>
-#include <asm/errata_list.h>
+#include <asm/hwcap.h>
/* int strlen(const char *s) */
SYM_FUNC_START(strlen)
@@ -131,3 +130,4 @@ strlen_zbb:
.option pop
#endif
SYM_FUNC_END(strlen)
+SYM_FUNC_ALIAS(__pi_strlen, strlen)
diff --git a/arch/riscv/lib/strncmp.S b/arch/riscv/lib/strncmp.S
index 7ac2f667285a..aba5b3148621 100644
--- a/arch/riscv/lib/strncmp.S
+++ b/arch/riscv/lib/strncmp.S
@@ -2,9 +2,8 @@
#include <linux/linkage.h>
#include <asm/asm.h>
-#include <asm-generic/export.h>
#include <asm/alternative-macros.h>
-#include <asm/errata_list.h>
+#include <asm/hwcap.h>
/* int strncmp(const char *cs, const char *ct, size_t count) */
SYM_FUNC_START(strncmp)
diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile
index 2ac177c05352..b85e9e82f082 100644
--- a/arch/riscv/mm/Makefile
+++ b/arch/riscv/mm/Makefile
@@ -1,6 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS_init.o := -mcmodel=medany
+ifdef CONFIG_RELOCATABLE
+CFLAGS_init.o += -fno-pie
+endif
+
ifdef CONFIG_FTRACE
CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/riscv/mm/cacheflush.c b/arch/riscv/mm/cacheflush.c
index 20cec5e7cdbf..fca532ddf3ec 100644
--- a/arch/riscv/mm/cacheflush.c
+++ b/arch/riscv/mm/cacheflush.c
@@ -101,36 +101,48 @@ void flush_icache_pte(pte_t pte)
unsigned int riscv_cbom_block_size;
EXPORT_SYMBOL_GPL(riscv_cbom_block_size);
-void riscv_init_cbom_blocksize(void)
+unsigned int riscv_cboz_block_size;
+EXPORT_SYMBOL_GPL(riscv_cboz_block_size);
+
+static void cbo_get_block_size(struct device_node *node,
+ const char *name, u32 *block_size,
+ unsigned long *first_hartid)
{
+ unsigned long hartid;
+ u32 val;
+
+ if (riscv_of_processor_hartid(node, &hartid))
+ return;
+
+ if (of_property_read_u32(node, name, &val))
+ return;
+
+ if (!*block_size) {
+ *block_size = val;
+ *first_hartid = hartid;
+ } else if (*block_size != val) {
+ pr_warn("%s mismatched between harts %lu and %lu\n",
+ name, *first_hartid, hartid);
+ }
+}
+
+void riscv_init_cbo_blocksizes(void)
+{
+ unsigned long cbom_hartid, cboz_hartid;
+ u32 cbom_block_size = 0, cboz_block_size = 0;
struct device_node *node;
- unsigned long cbom_hartid;
- u32 val, probed_block_size;
- int ret;
- probed_block_size = 0;
for_each_of_cpu_node(node) {
- unsigned long hartid;
-
- ret = riscv_of_processor_hartid(node, &hartid);
- if (ret)
- continue;
-
- /* set block-size for cbom extension if available */
- ret = of_property_read_u32(node, "riscv,cbom-block-size", &val);
- if (ret)
- continue;
-
- if (!probed_block_size) {
- probed_block_size = val;
- cbom_hartid = hartid;
- } else {
- if (probed_block_size != val)
- pr_warn("cbom-block-size mismatched between harts %lu and %lu\n",
- cbom_hartid, hartid);
- }
+ /* set block-size for cbom and/or cboz extension if available */
+ cbo_get_block_size(node, "riscv,cbom-block-size",
+ &cbom_block_size, &cbom_hartid);
+ cbo_get_block_size(node, "riscv,cboz-block-size",
+ &cboz_block_size, &cboz_hartid);
}
- if (probed_block_size)
- riscv_cbom_block_size = probed_block_size;
+ if (cbom_block_size)
+ riscv_cbom_block_size = cbom_block_size;
+
+ if (cboz_block_size)
+ riscv_cboz_block_size = cboz_block_size;
}
diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c
index d5f3e501dffb..8685f85a7474 100644
--- a/arch/riscv/mm/fault.c
+++ b/arch/riscv/mm/fault.c
@@ -15,6 +15,7 @@
#include <linux/uaccess.h>
#include <linux/kprobes.h>
#include <linux/kfence.h>
+#include <linux/entry-common.h>
#include <asm/ptrace.h>
#include <asm/tlbflush.h>
@@ -209,7 +210,7 @@ static inline bool access_error(unsigned long cause, struct vm_area_struct *vma)
* This routine handles page faults. It determines the address and the
* problem, and then passes it off to one of the appropriate routines.
*/
-asmlinkage void do_page_fault(struct pt_regs *regs)
+void handle_page_fault(struct pt_regs *regs)
{
struct task_struct *tsk;
struct vm_area_struct *vma;
@@ -256,7 +257,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
}
#endif
/* Enable interrupts if they were enabled in the parent context. */
- if (likely(regs->status & SR_PIE))
+ if (!regs_irqs_disabled(regs))
local_irq_enable();
/*
@@ -361,4 +362,3 @@ good_area:
}
return;
}
-NOKPROBE_SYMBOL(do_page_fault);
diff --git a/arch/riscv/mm/hugetlbpage.c b/arch/riscv/mm/hugetlbpage.c
index 932dadfdca54..a163a3e0f0d4 100644
--- a/arch/riscv/mm/hugetlbpage.c
+++ b/arch/riscv/mm/hugetlbpage.c
@@ -2,6 +2,305 @@
#include <linux/hugetlb.h>
#include <linux/err.h>
+#ifdef CONFIG_RISCV_ISA_SVNAPOT
+pte_t *huge_pte_alloc(struct mm_struct *mm,
+ struct vm_area_struct *vma,
+ unsigned long addr,
+ unsigned long sz)
+{
+ unsigned long order;
+ pte_t *pte = NULL;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset(mm, addr);
+ p4d = p4d_alloc(mm, pgd, addr);
+ if (!p4d)
+ return NULL;
+
+ pud = pud_alloc(mm, p4d, addr);
+ if (!pud)
+ return NULL;
+
+ if (sz == PUD_SIZE) {
+ pte = (pte_t *)pud;
+ goto out;
+ }
+
+ if (sz == PMD_SIZE) {
+ if (want_pmd_share(vma, addr) && pud_none(*pud))
+ pte = huge_pmd_share(mm, vma, addr, pud);
+ else
+ pte = (pte_t *)pmd_alloc(mm, pud, addr);
+ goto out;
+ }
+
+ pmd = pmd_alloc(mm, pud, addr);
+ if (!pmd)
+ return NULL;
+
+ for_each_napot_order(order) {
+ if (napot_cont_size(order) == sz) {
+ pte = pte_alloc_map(mm, pmd, addr & napot_cont_mask(order));
+ break;
+ }
+ }
+
+out:
+ WARN_ON_ONCE(pte && pte_present(*pte) && !pte_huge(*pte));
+ return pte;
+}
+
+pte_t *huge_pte_offset(struct mm_struct *mm,
+ unsigned long addr,
+ unsigned long sz)
+{
+ unsigned long order;
+ pte_t *pte = NULL;
+ pgd_t *pgd;
+ p4d_t *p4d;
+ pud_t *pud;
+ pmd_t *pmd;
+
+ pgd = pgd_offset(mm, addr);
+ if (!pgd_present(*pgd))
+ return NULL;
+
+ p4d = p4d_offset(pgd, addr);
+ if (!p4d_present(*p4d))
+ return NULL;
+
+ pud = pud_offset(p4d, addr);
+ if (sz == PUD_SIZE)
+ /* must be pud huge, non-present or none */
+ return (pte_t *)pud;
+
+ if (!pud_present(*pud))
+ return NULL;
+
+ pmd = pmd_offset(pud, addr);
+ if (sz == PMD_SIZE)
+ /* must be pmd huge, non-present or none */
+ return (pte_t *)pmd;
+
+ if (!pmd_present(*pmd))
+ return NULL;
+
+ for_each_napot_order(order) {
+ if (napot_cont_size(order) == sz) {
+ pte = pte_offset_kernel(pmd, addr & napot_cont_mask(order));
+ break;
+ }
+ }
+ return pte;
+}
+
+static pte_t get_clear_contig(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep,
+ unsigned long pte_num)
+{
+ pte_t orig_pte = ptep_get(ptep);
+ unsigned long i;
+
+ for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++) {
+ pte_t pte = ptep_get_and_clear(mm, addr, ptep);
+
+ if (pte_dirty(pte))
+ orig_pte = pte_mkdirty(orig_pte);
+
+ if (pte_young(pte))
+ orig_pte = pte_mkyoung(orig_pte);
+ }
+
+ return orig_pte;
+}
+
+static pte_t get_clear_contig_flush(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep,
+ unsigned long pte_num)
+{
+ pte_t orig_pte = get_clear_contig(mm, addr, ptep, pte_num);
+ struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
+ bool valid = !pte_none(orig_pte);
+
+ if (valid)
+ flush_tlb_range(&vma, addr, addr + (PAGE_SIZE * pte_num));
+
+ return orig_pte;
+}
+
+pte_t arch_make_huge_pte(pte_t entry, unsigned int shift, vm_flags_t flags)
+{
+ unsigned long order;
+
+ for_each_napot_order(order) {
+ if (shift == napot_cont_shift(order)) {
+ entry = pte_mknapot(entry, order);
+ break;
+ }
+ }
+ if (order == NAPOT_ORDER_MAX)
+ entry = pte_mkhuge(entry);
+
+ return entry;
+}
+
+void set_huge_pte_at(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep,
+ pte_t pte)
+{
+ int i, pte_num;
+
+ if (!pte_napot(pte)) {
+ set_pte_at(mm, addr, ptep, pte);
+ return;
+ }
+
+ pte_num = napot_pte_num(napot_cont_order(pte));
+ for (i = 0; i < pte_num; i++, ptep++, addr += PAGE_SIZE)
+ set_pte_at(mm, addr, ptep, pte);
+}
+
+int huge_ptep_set_access_flags(struct vm_area_struct *vma,
+ unsigned long addr,
+ pte_t *ptep,
+ pte_t pte,
+ int dirty)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long order;
+ pte_t orig_pte;
+ int i, pte_num;
+
+ if (!pte_napot(pte))
+ return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
+
+ order = napot_cont_order(pte);
+ pte_num = napot_pte_num(order);
+ ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
+ orig_pte = get_clear_contig_flush(mm, addr, ptep, pte_num);
+
+ if (pte_dirty(orig_pte))
+ pte = pte_mkdirty(pte);
+
+ if (pte_young(orig_pte))
+ pte = pte_mkyoung(pte);
+
+ for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
+ set_pte_at(mm, addr, ptep, pte);
+
+ return true;
+}
+
+pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep)
+{
+ pte_t orig_pte = ptep_get(ptep);
+ int pte_num;
+
+ if (!pte_napot(orig_pte))
+ return ptep_get_and_clear(mm, addr, ptep);
+
+ pte_num = napot_pte_num(napot_cont_order(orig_pte));
+
+ return get_clear_contig(mm, addr, ptep, pte_num);
+}
+
+void huge_ptep_set_wrprotect(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep)
+{
+ pte_t pte = ptep_get(ptep);
+ unsigned long order;
+ int i, pte_num;
+
+ if (!pte_napot(pte)) {
+ ptep_set_wrprotect(mm, addr, ptep);
+ return;
+ }
+
+ order = napot_cont_order(pte);
+ pte_num = napot_pte_num(order);
+ ptep = huge_pte_offset(mm, addr, napot_cont_size(order));
+
+ for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
+ ptep_set_wrprotect(mm, addr, ptep);
+}
+
+pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
+ unsigned long addr,
+ pte_t *ptep)
+{
+ pte_t pte = ptep_get(ptep);
+ int pte_num;
+
+ if (!pte_napot(pte))
+ return ptep_clear_flush(vma, addr, ptep);
+
+ pte_num = napot_pte_num(napot_cont_order(pte));
+
+ return get_clear_contig_flush(vma->vm_mm, addr, ptep, pte_num);
+}
+
+void huge_pte_clear(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep,
+ unsigned long sz)
+{
+ pte_t pte = READ_ONCE(*ptep);
+ int i, pte_num;
+
+ if (!pte_napot(pte)) {
+ pte_clear(mm, addr, ptep);
+ return;
+ }
+
+ pte_num = napot_pte_num(napot_cont_order(pte));
+ for (i = 0; i < pte_num; i++, addr += PAGE_SIZE, ptep++)
+ pte_clear(mm, addr, ptep);
+}
+
+static __init bool is_napot_size(unsigned long size)
+{
+ unsigned long order;
+
+ if (!has_svnapot())
+ return false;
+
+ for_each_napot_order(order) {
+ if (size == napot_cont_size(order))
+ return true;
+ }
+ return false;
+}
+
+static __init int napot_hugetlbpages_init(void)
+{
+ if (has_svnapot()) {
+ unsigned long order;
+
+ for_each_napot_order(order)
+ hugetlb_add_hstate(order);
+ }
+ return 0;
+}
+arch_initcall(napot_hugetlbpages_init);
+
+#else
+
+static __init bool is_napot_size(unsigned long size)
+{
+ return false;
+}
+
+#endif /*CONFIG_RISCV_ISA_SVNAPOT*/
+
int pud_huge(pud_t pud)
{
return pud_leaf(pud);
@@ -18,6 +317,8 @@ bool __init arch_hugetlb_valid_size(unsigned long size)
return true;
else if (IS_ENABLED(CONFIG_64BIT) && size == PUD_SIZE)
return true;
+ else if (is_napot_size(size))
+ return true;
else
return false;
}
diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 0f14f4a8d179..a39fe42baf55 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -20,6 +20,9 @@
#include <linux/dma-map-ops.h>
#include <linux/crash_dump.h>
#include <linux/hugetlb.h>
+#ifdef CONFIG_RELOCATABLE
+#include <linux/elf.h>
+#endif
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
@@ -145,7 +148,7 @@ static void __init print_vm_layout(void)
print_ml("kasan", KASAN_SHADOW_START, KASAN_SHADOW_END);
#endif
- print_ml("kernel", (unsigned long)KERNEL_LINK_ADDR,
+ print_ml("kernel", (unsigned long)kernel_map.virt_addr,
(unsigned long)ADDRESS_SPACE_END);
}
}
@@ -212,6 +215,14 @@ static void __init setup_bootmem(void)
phys_ram_end = memblock_end_of_DRAM();
if (!IS_ENABLED(CONFIG_XIP_KERNEL))
phys_ram_base = memblock_start_of_DRAM();
+
+ /*
+ * In 64-bit, any use of __va/__pa before this point is wrong as we
+ * did not know the start of DRAM before.
+ */
+ if (IS_ENABLED(CONFIG_64BIT))
+ kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base;
+
/*
* memblock allocator is not aware of the fact that last 4K bytes of
* the addressable memory can not be mapped because of IS_ERR_VALUE
@@ -261,9 +272,6 @@ static void __init setup_bootmem(void)
#ifdef CONFIG_MMU
struct pt_alloc_ops pt_ops __initdata;
-unsigned long riscv_pfn_base __ro_after_init;
-EXPORT_SYMBOL(riscv_pfn_base);
-
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
static pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
@@ -272,7 +280,6 @@ pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
#ifdef CONFIG_XIP_KERNEL
#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&pt_ops))
-#define riscv_pfn_base (*(unsigned long *)XIP_FIXUP(&riscv_pfn_base))
#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
@@ -654,9 +661,16 @@ void __init create_pgd_mapping(pgd_t *pgdp,
static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size)
{
- /* Upgrade to PMD_SIZE mappings whenever possible */
- base &= PMD_SIZE - 1;
- if (!base && size >= PMD_SIZE)
+ if (!(base & (PGDIR_SIZE - 1)) && size >= PGDIR_SIZE)
+ return PGDIR_SIZE;
+
+ if (!(base & (P4D_SIZE - 1)) && size >= P4D_SIZE)
+ return P4D_SIZE;
+
+ if (!(base & (PUD_SIZE - 1)) && size >= PUD_SIZE)
+ return PUD_SIZE;
+
+ if (!(base & (PMD_SIZE - 1)) && size >= PMD_SIZE)
return PMD_SIZE;
return PAGE_SIZE;
@@ -715,6 +729,8 @@ static __init pgprot_t pgprot_from_va(uintptr_t va)
#endif /* CONFIG_STRICT_KERNEL_RWX */
#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
+u64 __pi_set_satp_mode_from_cmdline(uintptr_t dtb_pa);
+
static void __init disable_pgtable_l5(void)
{
pgtable_l5_enabled = false;
@@ -729,17 +745,39 @@ static void __init disable_pgtable_l4(void)
satp_mode = SATP_MODE_39;
}
+static int __init print_no4lvl(char *p)
+{
+ pr_info("Disabled 4-level and 5-level paging");
+ return 0;
+}
+early_param("no4lvl", print_no4lvl);
+
+static int __init print_no5lvl(char *p)
+{
+ pr_info("Disabled 5-level paging");
+ return 0;
+}
+early_param("no5lvl", print_no5lvl);
+
/*
* There is a simple way to determine if 4-level is supported by the
* underlying hardware: establish 1:1 mapping in 4-level page table mode
* then read SATP to see if the configuration was taken into account
* meaning sv48 is supported.
*/
-static __init void set_satp_mode(void)
+static __init void set_satp_mode(uintptr_t dtb_pa)
{
u64 identity_satp, hw_satp;
uintptr_t set_satp_mode_pmd = ((unsigned long)set_satp_mode) & PMD_MASK;
- bool check_l4 = false;
+ u64 satp_mode_cmdline = __pi_set_satp_mode_from_cmdline(dtb_pa);
+
+ if (satp_mode_cmdline == SATP_MODE_57) {
+ disable_pgtable_l5();
+ } else if (satp_mode_cmdline == SATP_MODE_48) {
+ disable_pgtable_l5();
+ disable_pgtable_l4();
+ return;
+ }
create_p4d_mapping(early_p4d,
set_satp_mode_pmd, (uintptr_t)early_pud,
@@ -758,7 +796,8 @@ static __init void set_satp_mode(void)
retry:
create_pgd_mapping(early_pg_dir,
set_satp_mode_pmd,
- check_l4 ? (uintptr_t)early_pud : (uintptr_t)early_p4d,
+ pgtable_l5_enabled ?
+ (uintptr_t)early_p4d : (uintptr_t)early_pud,
PGDIR_SIZE, PAGE_TABLE);
identity_satp = PFN_DOWN((uintptr_t)&early_pg_dir) | satp_mode;
@@ -769,9 +808,8 @@ retry:
local_flush_tlb_all();
if (hw_satp != identity_satp) {
- if (!check_l4) {
+ if (pgtable_l5_enabled) {
disable_pgtable_l5();
- check_l4 = true;
memset(early_pg_dir, 0, PAGE_SIZE);
goto retry;
}
@@ -803,6 +841,44 @@ retry:
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
#endif
+#ifdef CONFIG_RELOCATABLE
+extern unsigned long __rela_dyn_start, __rela_dyn_end;
+
+static void __init relocate_kernel(void)
+{
+ Elf64_Rela *rela = (Elf64_Rela *)&__rela_dyn_start;
+ /*
+ * This holds the offset between the linked virtual address and the
+ * relocated virtual address.
+ */
+ uintptr_t reloc_offset = kernel_map.virt_addr - KERNEL_LINK_ADDR;
+ /*
+ * This holds the offset between kernel linked virtual address and
+ * physical address.
+ */
+ uintptr_t va_kernel_link_pa_offset = KERNEL_LINK_ADDR - kernel_map.phys_addr;
+
+ for ( ; rela < (Elf64_Rela *)&__rela_dyn_end; rela++) {
+ Elf64_Addr addr = (rela->r_offset - va_kernel_link_pa_offset);
+ Elf64_Addr relocated_addr = rela->r_addend;
+
+ if (rela->r_info != R_RISCV_RELATIVE)
+ continue;
+
+ /*
+ * Make sure to not relocate vdso symbols like rt_sigreturn
+ * which are linked from the address 0 in vmlinux since
+ * vdso symbol addresses are actually used as an offset from
+ * mm->context.vdso in VDSO_OFFSET macro.
+ */
+ if (relocated_addr >= KERNEL_LINK_ADDR)
+ relocated_addr += reloc_offset;
+
+ *(Elf64_Addr *)addr = relocated_addr;
+ }
+}
+#endif /* CONFIG_RELOCATABLE */
+
#ifdef CONFIG_XIP_KERNEL
static void __init create_kernel_page_table(pgd_t *pgdir,
__always_unused bool early)
@@ -958,14 +1034,25 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
#endif
#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL)
- set_satp_mode();
+ set_satp_mode(dtb_pa);
#endif
- kernel_map.va_pa_offset = PAGE_OFFSET - kernel_map.phys_addr;
+ /*
+ * In 64-bit, we defer the setup of va_pa_offset to setup_bootmem,
+ * where we have the system memory layout: this allows us to align
+ * the physical and virtual mappings and then make use of PUD/P4D/PGD
+ * for the linear mapping. This is only possible because the kernel
+ * mapping lies outside the linear mapping.
+ * In 32-bit however, as the kernel resides in the linear mapping,
+ * setup_vm_final can not change the mapping established here,
+ * otherwise the same kernel addresses would get mapped to different
+ * physical addresses (if the start of dram is different from the
+ * kernel physical address start).
+ */
+ kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ?
+ 0UL : PAGE_OFFSET - kernel_map.phys_addr;
kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr;
- riscv_pfn_base = PFN_DOWN(kernel_map.phys_addr);
-
/*
* The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit
* kernel, whereas for 64-bit kernel, the end of the virtual address
@@ -986,6 +1073,17 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
BUG_ON((kernel_map.virt_addr + kernel_map.size) > ADDRESS_SPACE_END - SZ_4K);
#endif
+#ifdef CONFIG_RELOCATABLE
+ /*
+ * Early page table uses only one PUD, which makes it possible
+ * to map PUD_SIZE aligned on PUD_SIZE: if the relocation offset
+ * makes the kernel cross over a PUD_SIZE boundary, raise a bug
+ * since a part of the kernel would not get mapped.
+ */
+ BUG_ON(PUD_SIZE - (kernel_map.virt_addr & (PUD_SIZE - 1)) < kernel_map.size);
+ relocate_kernel();
+#endif
+
apply_early_boot_alternatives();
pt_ops_set_early();
@@ -1070,12 +1168,62 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
pt_ops_set_fixmap();
}
-static void __init setup_vm_final(void)
+static void __init create_linear_mapping_range(phys_addr_t start,
+ phys_addr_t end)
{
+ phys_addr_t pa;
uintptr_t va, map_size;
- phys_addr_t pa, start, end;
+
+ for (pa = start; pa < end; pa += map_size) {
+ va = (uintptr_t)__va(pa);
+ map_size = best_map_size(pa, end - pa);
+
+ create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
+ pgprot_from_va(va));
+ }
+}
+
+static void __init create_linear_mapping_page_table(void)
+{
+ phys_addr_t start, end;
u64 i;
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ phys_addr_t ktext_start = __pa_symbol(_start);
+ phys_addr_t ktext_size = __init_data_begin - _start;
+ phys_addr_t krodata_start = __pa_symbol(__start_rodata);
+ phys_addr_t krodata_size = _data - __start_rodata;
+
+ /* Isolate kernel text and rodata so they don't get mapped with a PUD */
+ memblock_mark_nomap(ktext_start, ktext_size);
+ memblock_mark_nomap(krodata_start, krodata_size);
+#endif
+
+ /* Map all memory banks in the linear mapping */
+ for_each_mem_range(i, &start, &end) {
+ if (start >= end)
+ break;
+ if (start <= __pa(PAGE_OFFSET) &&
+ __pa(PAGE_OFFSET) < end)
+ start = __pa(PAGE_OFFSET);
+ if (end >= __pa(PAGE_OFFSET) + memory_limit)
+ end = __pa(PAGE_OFFSET) + memory_limit;
+
+ create_linear_mapping_range(start, end);
+ }
+
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ create_linear_mapping_range(ktext_start, ktext_start + ktext_size);
+ create_linear_mapping_range(krodata_start,
+ krodata_start + krodata_size);
+
+ memblock_clear_nomap(ktext_start, ktext_size);
+ memblock_clear_nomap(krodata_start, krodata_size);
+#endif
+}
+
+static void __init setup_vm_final(void)
+{
/* Setup swapper PGD for fixmap */
#if !defined(CONFIG_64BIT)
/*
@@ -1091,24 +1239,8 @@ static void __init setup_vm_final(void)
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
- /* Map all memory banks in the linear mapping */
- for_each_mem_range(i, &start, &end) {
- if (start >= end)
- break;
- if (start <= __pa(PAGE_OFFSET) &&
- __pa(PAGE_OFFSET) < end)
- start = __pa(PAGE_OFFSET);
- if (end >= __pa(PAGE_OFFSET) + memory_limit)
- end = __pa(PAGE_OFFSET) + memory_limit;
-
- for (pa = start; pa < end; pa += map_size) {
- va = (uintptr_t)__va(pa);
- map_size = best_map_size(pa, end - pa);
-
- create_pgd_mapping(swapper_pg_dir, va, pa, map_size,
- pgprot_from_va(va));
- }
- }
+ /* Map the linear mapping */
+ create_linear_mapping_page_table();
/* Map the kernel */
if (IS_ENABLED(CONFIG_64BIT))
diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c
index e1226709490f..8fc0efcf905c 100644
--- a/arch/riscv/mm/kasan_init.c
+++ b/arch/riscv/mm/kasan_init.c
@@ -18,58 +18,48 @@
* For sv39, the region is aligned on PGDIR_SIZE so we only need to populate
* the page global directory with kasan_early_shadow_pmd.
*
- * For sv48 and sv57, the region is not aligned on PGDIR_SIZE so the mapping
- * must be divided as follows:
- * - the first PGD entry, although incomplete, is populated with
- * kasan_early_shadow_pud/p4d
- * - the PGD entries in the middle are populated with kasan_early_shadow_pud/p4d
- * - the last PGD entry is shared with the kernel mapping so populated at the
- * lower levels pud/p4d
- *
- * In addition, when shallow populating a kasan region (for example vmalloc),
- * this region may also not be aligned on PGDIR size, so we must go down to the
- * pud level too.
+ * For sv48 and sv57, the region start is aligned on PGDIR_SIZE whereas the end
+ * region is not and then we have to go down to the PUD level.
*/
extern pgd_t early_pg_dir[PTRS_PER_PGD];
+pgd_t tmp_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
+p4d_t tmp_p4d[PTRS_PER_P4D] __page_aligned_bss;
+pud_t tmp_pud[PTRS_PER_PUD] __page_aligned_bss;
static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
- pte_t *ptep, *base_pte;
+ pte_t *ptep, *p;
- if (pmd_none(*pmd))
- base_pte = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
- else
- base_pte = (pte_t *)pmd_page_vaddr(*pmd);
+ if (pmd_none(*pmd)) {
+ p = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
+ set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(p)), PAGE_TABLE));
+ }
- ptep = base_pte + pte_index(vaddr);
+ ptep = pte_offset_kernel(pmd, vaddr);
do {
if (pte_none(*ptep)) {
phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ memset(__va(phys_addr), KASAN_SHADOW_INIT, PAGE_SIZE);
}
} while (ptep++, vaddr += PAGE_SIZE, vaddr != end);
-
- set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE));
}
static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
- pmd_t *pmdp, *base_pmd;
+ pmd_t *pmdp, *p;
unsigned long next;
if (pud_none(*pud)) {
- base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
- } else {
- base_pmd = (pmd_t *)pud_pgtable(*pud);
- if (base_pmd == lm_alias(kasan_early_shadow_pmd))
- base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ p = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+ set_pud(pud, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
- pmdp = base_pmd + pmd_index(vaddr);
+ pmdp = pmd_offset(pud, vaddr);
do {
next = pmd_addr_end(vaddr, end);
@@ -78,157 +68,77 @@ static void __init kasan_populate_pmd(pud_t *pud, unsigned long vaddr, unsigned
phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE);
if (phys_addr) {
set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ memset(__va(phys_addr), KASAN_SHADOW_INIT, PMD_SIZE);
continue;
}
}
kasan_populate_pte(pmdp, vaddr, next);
} while (pmdp++, vaddr = next, vaddr != end);
-
- /*
- * Wait for the whole PGD to be populated before setting the PGD in
- * the page table, otherwise, if we did set the PGD before populating
- * it entirely, memblock could allocate a page at a physical address
- * where KASAN is not populated yet and then we'd get a page fault.
- */
- set_pud(pud, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
}
-static void __init kasan_populate_pud(pgd_t *pgd,
- unsigned long vaddr, unsigned long end,
- bool early)
+static void __init kasan_populate_pud(p4d_t *p4d,
+ unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
- pud_t *pudp, *base_pud;
+ pud_t *pudp, *p;
unsigned long next;
- if (early) {
- /*
- * We can't use pgd_page_vaddr here as it would return a linear
- * mapping address but it is not mapped yet, but when populating
- * early_pg_dir, we need the physical address and when populating
- * swapper_pg_dir, we need the kernel virtual address so use
- * pt_ops facility.
- */
- base_pud = pt_ops.get_pud_virt(pfn_to_phys(_pgd_pfn(*pgd)));
- } else if (pgd_none(*pgd)) {
- base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
- memcpy(base_pud, (void *)kasan_early_shadow_pud,
- sizeof(pud_t) * PTRS_PER_PUD);
- } else {
- base_pud = (pud_t *)pgd_page_vaddr(*pgd);
- if (base_pud == lm_alias(kasan_early_shadow_pud)) {
- base_pud = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
- memcpy(base_pud, (void *)kasan_early_shadow_pud,
- sizeof(pud_t) * PTRS_PER_PUD);
- }
+ if (p4d_none(*p4d)) {
+ p = memblock_alloc(PTRS_PER_PUD * sizeof(pud_t), PAGE_SIZE);
+ set_p4d(p4d, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
- pudp = base_pud + pud_index(vaddr);
+ pudp = pud_offset(p4d, vaddr);
do {
next = pud_addr_end(vaddr, end);
if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) {
- if (early) {
- phys_addr = __pa(((uintptr_t)kasan_early_shadow_pmd));
- set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE));
+ phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE);
+ if (phys_addr) {
+ set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ memset(__va(phys_addr), KASAN_SHADOW_INIT, PUD_SIZE);
continue;
- } else {
- phys_addr = memblock_phys_alloc(PUD_SIZE, PUD_SIZE);
- if (phys_addr) {
- set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_KERNEL));
- continue;
- }
}
}
kasan_populate_pmd(pudp, vaddr, next);
} while (pudp++, vaddr = next, vaddr != end);
-
- /*
- * Wait for the whole PGD to be populated before setting the PGD in
- * the page table, otherwise, if we did set the PGD before populating
- * it entirely, memblock could allocate a page at a physical address
- * where KASAN is not populated yet and then we'd get a page fault.
- */
- if (!early)
- set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
}
static void __init kasan_populate_p4d(pgd_t *pgd,
- unsigned long vaddr, unsigned long end,
- bool early)
+ unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
- p4d_t *p4dp, *base_p4d;
+ p4d_t *p4dp, *p;
unsigned long next;
- if (early) {
- /*
- * We can't use pgd_page_vaddr here as it would return a linear
- * mapping address but it is not mapped yet, but when populating
- * early_pg_dir, we need the physical address and when populating
- * swapper_pg_dir, we need the kernel virtual address so use
- * pt_ops facility.
- */
- base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgd)));
- } else {
- base_p4d = (p4d_t *)pgd_page_vaddr(*pgd);
- if (base_p4d == lm_alias(kasan_early_shadow_p4d)) {
- base_p4d = memblock_alloc(PTRS_PER_PUD * sizeof(p4d_t), PAGE_SIZE);
- memcpy(base_p4d, (void *)kasan_early_shadow_p4d,
- sizeof(p4d_t) * PTRS_PER_P4D);
- }
+ if (pgd_none(*pgd)) {
+ p = memblock_alloc(PTRS_PER_P4D * sizeof(p4d_t), PAGE_SIZE);
+ set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
}
- p4dp = base_p4d + p4d_index(vaddr);
+ p4dp = p4d_offset(pgd, vaddr);
do {
next = p4d_addr_end(vaddr, end);
if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE) {
- if (early) {
- phys_addr = __pa(((uintptr_t)kasan_early_shadow_pud));
- set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE));
+ phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE);
+ if (phys_addr) {
+ set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ memset(__va(phys_addr), KASAN_SHADOW_INIT, P4D_SIZE);
continue;
- } else {
- phys_addr = memblock_phys_alloc(P4D_SIZE, P4D_SIZE);
- if (phys_addr) {
- set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_KERNEL));
- continue;
- }
}
}
- kasan_populate_pud((pgd_t *)p4dp, vaddr, next, early);
+ kasan_populate_pud(p4dp, vaddr, next);
} while (p4dp++, vaddr = next, vaddr != end);
-
- /*
- * Wait for the whole P4D to be populated before setting the P4D in
- * the page table, otherwise, if we did set the P4D before populating
- * it entirely, memblock could allocate a page at a physical address
- * where KASAN is not populated yet and then we'd get a page fault.
- */
- if (!early)
- set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_p4d)), PAGE_TABLE));
}
-#define kasan_early_shadow_pgd_next (pgtable_l5_enabled ? \
- (uintptr_t)kasan_early_shadow_p4d : \
- (pgtable_l4_enabled ? \
- (uintptr_t)kasan_early_shadow_pud : \
- (uintptr_t)kasan_early_shadow_pmd))
-#define kasan_populate_pgd_next(pgdp, vaddr, next, early) \
- (pgtable_l5_enabled ? \
- kasan_populate_p4d(pgdp, vaddr, next, early) : \
- (pgtable_l4_enabled ? \
- kasan_populate_pud(pgdp, vaddr, next, early) : \
- kasan_populate_pmd((pud_t *)pgdp, vaddr, next)))
-
static void __init kasan_populate_pgd(pgd_t *pgdp,
- unsigned long vaddr, unsigned long end,
- bool early)
+ unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
unsigned long next;
@@ -236,29 +146,174 @@ static void __init kasan_populate_pgd(pgd_t *pgdp,
do {
next = pgd_addr_end(vaddr, end);
- if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
- if (early) {
- phys_addr = __pa((uintptr_t)kasan_early_shadow_pgd_next);
- set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE));
+ if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
+ (next - vaddr) >= PGDIR_SIZE) {
+ phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
+ if (phys_addr) {
+ set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+ memset(__va(phys_addr), KASAN_SHADOW_INIT, PGDIR_SIZE);
continue;
- } else if (pgd_page_vaddr(*pgdp) ==
- (unsigned long)lm_alias(kasan_early_shadow_pgd_next)) {
- /*
- * pgdp can't be none since kasan_early_init
- * initialized all KASAN shadow region with
- * kasan_early_shadow_pud: if this is still the
- * case, that means we can try to allocate a
- * hugepage as a replacement.
- */
- phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
- if (phys_addr) {
- set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
- continue;
- }
}
}
- kasan_populate_pgd_next(pgdp, vaddr, next, early);
+ kasan_populate_p4d(pgdp, vaddr, next);
+ } while (pgdp++, vaddr = next, vaddr != end);
+}
+
+static void __init kasan_early_clear_pud(p4d_t *p4dp,
+ unsigned long vaddr, unsigned long end)
+{
+ pud_t *pudp, *base_pud;
+ unsigned long next;
+
+ if (!pgtable_l4_enabled) {
+ pudp = (pud_t *)p4dp;
+ } else {
+ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp)));
+ pudp = base_pud + pud_index(vaddr);
+ }
+
+ do {
+ next = pud_addr_end(vaddr, end);
+
+ if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE) {
+ pud_clear(pudp);
+ continue;
+ }
+
+ BUG();
+ } while (pudp++, vaddr = next, vaddr != end);
+}
+
+static void __init kasan_early_clear_p4d(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end)
+{
+ p4d_t *p4dp, *base_p4d;
+ unsigned long next;
+
+ if (!pgtable_l5_enabled) {
+ p4dp = (p4d_t *)pgdp;
+ } else {
+ base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp)));
+ p4dp = base_p4d + p4d_index(vaddr);
+ }
+
+ do {
+ next = p4d_addr_end(vaddr, end);
+
+ if (pgtable_l4_enabled && IS_ALIGNED(vaddr, P4D_SIZE) &&
+ (next - vaddr) >= P4D_SIZE) {
+ p4d_clear(p4dp);
+ continue;
+ }
+
+ kasan_early_clear_pud(p4dp, vaddr, next);
+ } while (p4dp++, vaddr = next, vaddr != end);
+}
+
+static void __init kasan_early_clear_pgd(pgd_t *pgdp,
+ unsigned long vaddr, unsigned long end)
+{
+ unsigned long next;
+
+ do {
+ next = pgd_addr_end(vaddr, end);
+
+ if (pgtable_l5_enabled && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
+ (next - vaddr) >= PGDIR_SIZE) {
+ pgd_clear(pgdp);
+ continue;
+ }
+
+ kasan_early_clear_p4d(pgdp, vaddr, next);
+ } while (pgdp++, vaddr = next, vaddr != end);
+}
+
+static void __init kasan_early_populate_pud(p4d_t *p4dp,
+ unsigned long vaddr,
+ unsigned long end)
+{
+ pud_t *pudp, *base_pud;
+ phys_addr_t phys_addr;
+ unsigned long next;
+
+ if (!pgtable_l4_enabled) {
+ pudp = (pud_t *)p4dp;
+ } else {
+ base_pud = pt_ops.get_pud_virt(pfn_to_phys(_p4d_pfn(*p4dp)));
+ pudp = base_pud + pud_index(vaddr);
+ }
+
+ do {
+ next = pud_addr_end(vaddr, end);
+
+ if (pud_none(*pudp) && IS_ALIGNED(vaddr, PUD_SIZE) &&
+ (next - vaddr) >= PUD_SIZE) {
+ phys_addr = __pa((uintptr_t)kasan_early_shadow_pmd);
+ set_pud(pudp, pfn_pud(PFN_DOWN(phys_addr), PAGE_TABLE));
+ continue;
+ }
+
+ BUG();
+ } while (pudp++, vaddr = next, vaddr != end);
+}
+
+static void __init kasan_early_populate_p4d(pgd_t *pgdp,
+ unsigned long vaddr,
+ unsigned long end)
+{
+ p4d_t *p4dp, *base_p4d;
+ phys_addr_t phys_addr;
+ unsigned long next;
+
+ /*
+ * We can't use pgd_page_vaddr here as it would return a linear
+ * mapping address but it is not mapped yet, but when populating
+ * early_pg_dir, we need the physical address and when populating
+ * swapper_pg_dir, we need the kernel virtual address so use
+ * pt_ops facility.
+ * Note that this test is then completely equivalent to
+ * p4dp = p4d_offset(pgdp, vaddr)
+ */
+ if (!pgtable_l5_enabled) {
+ p4dp = (p4d_t *)pgdp;
+ } else {
+ base_p4d = pt_ops.get_p4d_virt(pfn_to_phys(_pgd_pfn(*pgdp)));
+ p4dp = base_p4d + p4d_index(vaddr);
+ }
+
+ do {
+ next = p4d_addr_end(vaddr, end);
+
+ if (p4d_none(*p4dp) && IS_ALIGNED(vaddr, P4D_SIZE) &&
+ (next - vaddr) >= P4D_SIZE) {
+ phys_addr = __pa((uintptr_t)kasan_early_shadow_pud);
+ set_p4d(p4dp, pfn_p4d(PFN_DOWN(phys_addr), PAGE_TABLE));
+ continue;
+ }
+
+ kasan_early_populate_pud(p4dp, vaddr, next);
+ } while (p4dp++, vaddr = next, vaddr != end);
+}
+
+static void __init kasan_early_populate_pgd(pgd_t *pgdp,
+ unsigned long vaddr,
+ unsigned long end)
+{
+ phys_addr_t phys_addr;
+ unsigned long next;
+
+ do {
+ next = pgd_addr_end(vaddr, end);
+
+ if (pgd_none(*pgdp) && IS_ALIGNED(vaddr, PGDIR_SIZE) &&
+ (next - vaddr) >= PGDIR_SIZE) {
+ phys_addr = __pa((uintptr_t)kasan_early_shadow_p4d);
+ set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_TABLE));
+ continue;
+ }
+
+ kasan_early_populate_p4d(pgdp, vaddr, next);
} while (pgdp++, vaddr = next, vaddr != end);
}
@@ -295,16 +350,16 @@ asmlinkage void __init kasan_early_init(void)
PAGE_TABLE));
}
- kasan_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START),
- KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+ kasan_early_populate_pgd(early_pg_dir + pgd_index(KASAN_SHADOW_START),
+ KASAN_SHADOW_START, KASAN_SHADOW_END);
local_flush_tlb_all();
}
void __init kasan_swapper_init(void)
{
- kasan_populate_pgd(pgd_offset_k(KASAN_SHADOW_START),
- KASAN_SHADOW_START, KASAN_SHADOW_END, true);
+ kasan_early_populate_pgd(pgd_offset_k(KASAN_SHADOW_START),
+ KASAN_SHADOW_START, KASAN_SHADOW_END);
local_flush_tlb_all();
}
@@ -314,118 +369,65 @@ static void __init kasan_populate(void *start, void *end)
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
unsigned long vend = PAGE_ALIGN((unsigned long)end);
- kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend, false);
-
- local_flush_tlb_all();
- memset(start, KASAN_SHADOW_INIT, end - start);
+ kasan_populate_pgd(pgd_offset_k(vaddr), vaddr, vend);
}
-static void __init kasan_shallow_populate_pmd(pgd_t *pgdp,
+static void __init kasan_shallow_populate_pud(p4d_t *p4d,
unsigned long vaddr, unsigned long end)
{
unsigned long next;
- pmd_t *pmdp, *base_pmd;
- bool is_kasan_pte;
-
- base_pmd = (pmd_t *)pgd_page_vaddr(*pgdp);
- pmdp = base_pmd + pmd_index(vaddr);
-
- do {
- next = pmd_addr_end(vaddr, end);
- is_kasan_pte = (pmd_pgtable(*pmdp) == lm_alias(kasan_early_shadow_pte));
-
- if (is_kasan_pte)
- pmd_clear(pmdp);
- } while (pmdp++, vaddr = next, vaddr != end);
-}
-
-static void __init kasan_shallow_populate_pud(pgd_t *pgdp,
- unsigned long vaddr, unsigned long end)
-{
- unsigned long next;
- pud_t *pudp, *base_pud;
- pmd_t *base_pmd;
- bool is_kasan_pmd;
-
- base_pud = (pud_t *)pgd_page_vaddr(*pgdp);
- pudp = base_pud + pud_index(vaddr);
+ void *p;
+ pud_t *pud_k = pud_offset(p4d, vaddr);
do {
next = pud_addr_end(vaddr, end);
- is_kasan_pmd = (pud_pgtable(*pudp) == lm_alias(kasan_early_shadow_pmd));
-
- if (!is_kasan_pmd)
- continue;
-
- base_pmd = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- set_pud(pudp, pfn_pud(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
- if (IS_ALIGNED(vaddr, PUD_SIZE) && (next - vaddr) >= PUD_SIZE)
+ if (pud_none(*pud_k)) {
+ p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pud(pud_k, pfn_pud(PFN_DOWN(__pa(p)), PAGE_TABLE));
continue;
+ }
- memcpy(base_pmd, (void *)kasan_early_shadow_pmd, PAGE_SIZE);
- kasan_shallow_populate_pmd((pgd_t *)pudp, vaddr, next);
- } while (pudp++, vaddr = next, vaddr != end);
+ BUG();
+ } while (pud_k++, vaddr = next, vaddr != end);
}
-static void __init kasan_shallow_populate_p4d(pgd_t *pgdp,
+static void __init kasan_shallow_populate_p4d(pgd_t *pgd,
unsigned long vaddr, unsigned long end)
{
unsigned long next;
- p4d_t *p4dp, *base_p4d;
- pud_t *base_pud;
- bool is_kasan_pud;
-
- base_p4d = (p4d_t *)pgd_page_vaddr(*pgdp);
- p4dp = base_p4d + p4d_index(vaddr);
+ void *p;
+ p4d_t *p4d_k = p4d_offset(pgd, vaddr);
do {
next = p4d_addr_end(vaddr, end);
- is_kasan_pud = (p4d_pgtable(*p4dp) == lm_alias(kasan_early_shadow_pud));
- if (!is_kasan_pud)
- continue;
-
- base_pud = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
- set_p4d(p4dp, pfn_p4d(PFN_DOWN(__pa(base_pud)), PAGE_TABLE));
-
- if (IS_ALIGNED(vaddr, P4D_SIZE) && (next - vaddr) >= P4D_SIZE)
+ if (p4d_none(*p4d_k)) {
+ p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_p4d(p4d_k, pfn_p4d(PFN_DOWN(__pa(p)), PAGE_TABLE));
continue;
+ }
- memcpy(base_pud, (void *)kasan_early_shadow_pud, PAGE_SIZE);
- kasan_shallow_populate_pud((pgd_t *)p4dp, vaddr, next);
- } while (p4dp++, vaddr = next, vaddr != end);
+ kasan_shallow_populate_pud(p4d_k, vaddr, end);
+ } while (p4d_k++, vaddr = next, vaddr != end);
}
-#define kasan_shallow_populate_pgd_next(pgdp, vaddr, next) \
- (pgtable_l5_enabled ? \
- kasan_shallow_populate_p4d(pgdp, vaddr, next) : \
- (pgtable_l4_enabled ? \
- kasan_shallow_populate_pud(pgdp, vaddr, next) : \
- kasan_shallow_populate_pmd(pgdp, vaddr, next)))
-
static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
{
unsigned long next;
void *p;
pgd_t *pgd_k = pgd_offset_k(vaddr);
- bool is_kasan_pgd_next;
do {
next = pgd_addr_end(vaddr, end);
- is_kasan_pgd_next = (pgd_page_vaddr(*pgd_k) ==
- (unsigned long)lm_alias(kasan_early_shadow_pgd_next));
- if (is_kasan_pgd_next) {
+ if (pgd_none(*pgd_k)) {
p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
- }
-
- if (IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE)
continue;
+ }
- memcpy(p, (void *)kasan_early_shadow_pgd_next, PAGE_SIZE);
- kasan_shallow_populate_pgd_next(pgd_k, vaddr, next);
+ kasan_shallow_populate_p4d(pgd_k, vaddr, next);
} while (pgd_k++, vaddr = next, vaddr != end);
}
@@ -435,7 +437,37 @@ static void __init kasan_shallow_populate(void *start, void *end)
unsigned long vend = PAGE_ALIGN((unsigned long)end);
kasan_shallow_populate_pgd(vaddr, vend);
- local_flush_tlb_all();
+}
+
+static void create_tmp_mapping(void)
+{
+ void *ptr;
+ p4d_t *base_p4d;
+
+ /*
+ * We need to clean the early mapping: this is hard to achieve "in-place",
+ * so install a temporary mapping like arm64 and x86 do.
+ */
+ memcpy(tmp_pg_dir, swapper_pg_dir, sizeof(pgd_t) * PTRS_PER_PGD);
+
+ /* Copy the last p4d since it is shared with the kernel mapping. */
+ if (pgtable_l5_enabled) {
+ ptr = (p4d_t *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
+ memcpy(tmp_p4d, ptr, sizeof(p4d_t) * PTRS_PER_P4D);
+ set_pgd(&tmp_pg_dir[pgd_index(KASAN_SHADOW_END)],
+ pfn_pgd(PFN_DOWN(__pa(tmp_p4d)), PAGE_TABLE));
+ base_p4d = tmp_p4d;
+ } else {
+ base_p4d = (p4d_t *)tmp_pg_dir;
+ }
+
+ /* Copy the last pud since it is shared with the kernel mapping. */
+ if (pgtable_l4_enabled) {
+ ptr = (pud_t *)p4d_page_vaddr(*(base_p4d + p4d_index(KASAN_SHADOW_END)));
+ memcpy(tmp_pud, ptr, sizeof(pud_t) * PTRS_PER_PUD);
+ set_p4d(&base_p4d[p4d_index(KASAN_SHADOW_END)],
+ pfn_p4d(PFN_DOWN(__pa(tmp_pud)), PAGE_TABLE));
+ }
}
void __init kasan_init(void)
@@ -443,10 +475,27 @@ void __init kasan_init(void)
phys_addr_t p_start, p_end;
u64 i;
- if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+ create_tmp_mapping();
+ csr_write(CSR_SATP, PFN_DOWN(__pa(tmp_pg_dir)) | satp_mode);
+
+ kasan_early_clear_pgd(pgd_offset_k(KASAN_SHADOW_START),
+ KASAN_SHADOW_START, KASAN_SHADOW_END);
+
+ kasan_populate_early_shadow((void *)kasan_mem_to_shadow((void *)FIXADDR_START),
+ (void *)kasan_mem_to_shadow((void *)VMALLOC_START));
+
+ if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
kasan_shallow_populate(
(void *)kasan_mem_to_shadow((void *)VMALLOC_START),
(void *)kasan_mem_to_shadow((void *)VMALLOC_END));
+ /* Shallow populate modules and BPF which are vmalloc-allocated */
+ kasan_shallow_populate(
+ (void *)kasan_mem_to_shadow((void *)MODULES_VADDR),
+ (void *)kasan_mem_to_shadow((void *)MODULES_END));
+ } else {
+ kasan_populate_early_shadow((void *)kasan_mem_to_shadow((void *)VMALLOC_START),
+ (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
+ }
/* Populate the linear mapping */
for_each_mem_range(i, &p_start, &p_end) {
@@ -459,8 +508,8 @@ void __init kasan_init(void)
kasan_populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
}
- /* Populate kernel, BPF, modules mapping */
- kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR),
+ /* Populate kernel */
+ kasan_populate(kasan_mem_to_shadow((const void *)MODULES_END),
kasan_mem_to_shadow((const void *)MODULES_VADDR + SZ_2G));
for (i = 0; i < PTRS_PER_PTE; i++)
@@ -471,4 +520,7 @@ void __init kasan_init(void)
memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
init_task.kasan_depth = 0;
+
+ csr_write(CSR_SATP, PFN_DOWN(__pa(swapper_pg_dir)) | satp_mode);
+ local_flush_tlb_all();
}
diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c
index 9b18bda74154..18706f457da7 100644
--- a/arch/riscv/mm/physaddr.c
+++ b/arch/riscv/mm/physaddr.c
@@ -33,3 +33,19 @@ phys_addr_t __phys_addr_symbol(unsigned long x)
return __va_to_pa_nodebug(x);
}
EXPORT_SYMBOL(__phys_addr_symbol);
+
+phys_addr_t linear_mapping_va_to_pa(unsigned long x)
+{
+ BUG_ON(!kernel_map.va_pa_offset);
+
+ return ((unsigned long)(x) - kernel_map.va_pa_offset);
+}
+EXPORT_SYMBOL(linear_mapping_va_to_pa);
+
+void *linear_mapping_pa_to_va(unsigned long x)
+{
+ BUG_ON(!kernel_map.va_pa_offset);
+
+ return ((void *)((unsigned long)(x) + kernel_map.va_pa_offset));
+}
+EXPORT_SYMBOL(linear_mapping_pa_to_va);
diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c
index 830e7de65e3a..20a9f991a6d7 100644
--- a/arch/riscv/mm/ptdump.c
+++ b/arch/riscv/mm/ptdump.c
@@ -59,10 +59,6 @@ struct ptd_mm_info {
};
enum address_markers_idx {
-#ifdef CONFIG_KASAN
- KASAN_SHADOW_START_NR,
- KASAN_SHADOW_END_NR,
-#endif
FIXMAP_START_NR,
FIXMAP_END_NR,
PCI_IO_START_NR,
@@ -74,6 +70,10 @@ enum address_markers_idx {
VMALLOC_START_NR,
VMALLOC_END_NR,
PAGE_OFFSET_NR,
+#ifdef CONFIG_KASAN
+ KASAN_SHADOW_START_NR,
+ KASAN_SHADOW_END_NR,
+#endif
#ifdef CONFIG_64BIT
MODULES_MAPPING_NR,
KERNEL_MAPPING_NR,
@@ -82,10 +82,6 @@ enum address_markers_idx {
};
static struct addr_marker address_markers[] = {
-#ifdef CONFIG_KASAN
- {0, "Kasan shadow start"},
- {0, "Kasan shadow end"},
-#endif
{0, "Fixmap start"},
{0, "Fixmap end"},
{0, "PCI I/O start"},
@@ -97,6 +93,10 @@ static struct addr_marker address_markers[] = {
{0, "vmalloc() area"},
{0, "vmalloc() end"},
{0, "Linear mapping"},
+#ifdef CONFIG_KASAN
+ {0, "Kasan shadow start"},
+ {0, "Kasan shadow end"},
+#endif
#ifdef CONFIG_64BIT
{0, "Modules/BPF mapping"},
{0, "Kernel mapping"},
@@ -362,10 +362,6 @@ static int __init ptdump_init(void)
{
unsigned int i, j;
-#ifdef CONFIG_KASAN
- address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
- address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
-#endif
address_markers[FIXMAP_START_NR].start_address = FIXADDR_START;
address_markers[FIXMAP_END_NR].start_address = FIXADDR_TOP;
address_markers[PCI_IO_START_NR].start_address = PCI_IO_START;
@@ -377,6 +373,10 @@ static int __init ptdump_init(void)
address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
address_markers[PAGE_OFFSET_NR].start_address = PAGE_OFFSET;
+#ifdef CONFIG_KASAN
+ address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
+ address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
+#endif
#ifdef CONFIG_64BIT
address_markers[MODULES_MAPPING_NR].start_address = MODULES_VADDR;
address_markers[KERNEL_MAPPING_NR].start_address = kernel_map.virt_addr;
diff --git a/arch/riscv/tools/relocs_check.sh b/arch/riscv/tools/relocs_check.sh
new file mode 100755
index 000000000000..baeb2e7b2290
--- /dev/null
+++ b/arch/riscv/tools/relocs_check.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Based on powerpc relocs_check.sh
+
+# This script checks the relocations of a vmlinux for "suspicious"
+# relocations.
+
+if [ $# -lt 3 ]; then
+ echo "$0 [path to objdump] [path to nm] [path to vmlinux]" 1>&2
+ exit 1
+fi
+
+bad_relocs=$(
+${srctree}/scripts/relocs_check.sh "$@" |
+ # These relocations are okay
+ # R_RISCV_RELATIVE
+ grep -F -w -v 'R_RISCV_RELATIVE'
+)
+
+if [ -z "$bad_relocs" ]; then
+ exit 0
+fi
+
+num_bad=$(echo "$bad_relocs" | wc -l)
+echo "WARNING: $num_bad bad relocations"
+echo "$bad_relocs"
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index d1a68b6d03b3..d14735a81301 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -887,12 +887,13 @@ const void * __init of_flat_dt_match_machine(const void *default_match,
static void __early_init_dt_declare_initrd(unsigned long start,
unsigned long end)
{
- /* ARM64 would cause a BUG to occur here when CONFIG_DEBUG_VM is
- * enabled since __va() is called too early. ARM64 does make use
- * of phys_initrd_start/phys_initrd_size so we can skip this
- * conversion.
+ /*
+ * __va() is not yet available this early on some platforms. In that
+ * case, the platform uses phys_initrd_start/phys_initrd_size instead
+ * and does the VA conversion itself.
*/
- if (!IS_ENABLED(CONFIG_ARM64)) {
+ if (!IS_ENABLED(CONFIG_ARM64) &&
+ !(IS_ENABLED(CONFIG_RISCV) && IS_ENABLED(CONFIG_64BIT))) {
initrd_start = (unsigned long)__va(start);
initrd_end = (unsigned long)__va(end);
initrd_below_start_ok = 1;
diff --git a/scripts/relocs_check.sh b/scripts/relocs_check.sh
new file mode 100755
index 000000000000..137c660499f3
--- /dev/null
+++ b/scripts/relocs_check.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+# Get a list of all the relocations, remove from it the relocations
+# that are known to be legitimate and return this list to arch specific
+# script that will look for suspicious relocations.
+
+objdump="$1"
+nm="$2"
+vmlinux="$3"
+
+# Remove from the possible bad relocations those that match an undefined
+# weak symbol which will result in an absolute relocation to 0.
+# Weak unresolved symbols are of that form in nm output:
+# " w _binary__btf_vmlinux_bin_end"
+undef_weak_symbols=$($nm "$vmlinux" | awk '$1 ~ /w/ { print $2 }')
+
+$objdump -R "$vmlinux" |
+ grep -E '\<R_' |
+ ([ "$undef_weak_symbols" ] && grep -F -w -v "$undef_weak_symbols" || cat)
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 97dcdaa656f6..90a62cf75008 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -64,6 +64,7 @@ TARGETS += pstore
TARGETS += ptrace
TARGETS += openat2
TARGETS += resctrl
+TARGETS += riscv
TARGETS += rlimits
TARGETS += rseq
TARGETS += rtc
diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile
new file mode 100644
index 000000000000..32a72902d045
--- /dev/null
+++ b/tools/testing/selftests/riscv/Makefile
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0
+# Originally tools/testing/arm64/Makefile
+
+# When ARCH not overridden for crosscompiling, lookup machine
+ARCH ?= $(shell uname -m 2>/dev/null || echo not)
+
+ifneq (,$(filter $(ARCH),riscv))
+RISCV_SUBTARGETS ?= hwprobe
+else
+RISCV_SUBTARGETS :=
+endif
+
+CFLAGS := -Wall -O2 -g
+
+# A proper top_srcdir is needed by KSFT(lib.mk)
+top_srcdir = $(realpath ../../../../)
+
+# Additional include paths needed by kselftest.h and local headers
+CFLAGS += -I$(top_srcdir)/tools/testing/selftests/
+
+CFLAGS += $(KHDR_INCLUDES)
+
+export CFLAGS
+export top_srcdir
+
+all:
+ @for DIR in $(RISCV_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir -p $$BUILD_TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+install: all
+ @for DIR in $(RISCV_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+run_tests: all
+ @for DIR in $(RISCV_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+# Avoid any output on non riscv on emit_tests
+emit_tests: all
+ @for DIR in $(RISCV_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+clean:
+ @for DIR in $(RISCV_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+.PHONY: all clean install run_tests emit_tests
diff --git a/tools/testing/selftests/riscv/hwprobe/Makefile b/tools/testing/selftests/riscv/hwprobe/Makefile
new file mode 100644
index 000000000000..ebdbb3c22e54
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 ARM Limited
+# Originally tools/testing/arm64/abi/Makefile
+
+TEST_GEN_PROGS := hwprobe
+
+include ../../lib.mk
+
+$(OUTPUT)/hwprobe: hwprobe.c sys_hwprobe.S
+ $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
new file mode 100644
index 000000000000..09f290a67420
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+#include <asm/hwprobe.h>
+
+/*
+ * Rather than relying on having a new enough libc to define this, just do it
+ * ourselves. This way we don't need to be coupled to a new-enough libc to
+ * contain the call.
+ */
+long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpu_count, unsigned long *cpus, unsigned int flags);
+
+int main(int argc, char **argv)
+{
+ struct riscv_hwprobe pairs[8];
+ unsigned long cpus;
+ long out;
+
+ /* Fake the CPU_SET ops. */
+ cpus = -1;
+
+ /*
+ * Just run a basic test: pass enough pairs to get up to the base
+ * behavior, and then check to make sure it's sane.
+ */
+ for (long i = 0; i < 8; i++)
+ pairs[i].key = i;
+ out = riscv_hwprobe(pairs, 8, 1, &cpus, 0);
+ if (out != 0)
+ return -1;
+ for (long i = 0; i < 4; ++i) {
+ /* Fail if the kernel claims not to recognize a base key. */
+ if ((i < 4) && (pairs[i].key != i))
+ return -2;
+
+ if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
+ continue;
+
+ if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA)
+ continue;
+
+ return -3;
+ }
+
+ /*
+ * This should also work with a NULL CPU set, but should not work
+ * with an improperly supplied CPU set.
+ */
+ out = riscv_hwprobe(pairs, 8, 0, 0, 0);
+ if (out != 0)
+ return -4;
+
+ out = riscv_hwprobe(pairs, 8, 0, &cpus, 0);
+ if (out == 0)
+ return -5;
+
+ out = riscv_hwprobe(pairs, 8, 1, 0, 0);
+ if (out == 0)
+ return -6;
+
+ /*
+ * Check that keys work by providing one that we know exists, and
+ * checking to make sure the resultig pair is what we asked for.
+ */
+ pairs[0].key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR;
+ out = riscv_hwprobe(pairs, 1, 1, &cpus, 0);
+ if (out != 0)
+ return -7;
+ if (pairs[0].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
+ return -8;
+
+ /*
+ * Check that an unknown key gets overwritten with -1,
+ * but doesn't block elements after it.
+ */
+ pairs[0].key = 0x5555;
+ pairs[1].key = 1;
+ pairs[1].value = 0xAAAA;
+ out = riscv_hwprobe(pairs, 2, 0, 0, 0);
+ if (out != 0)
+ return -9;
+
+ if (pairs[0].key != -1)
+ return -10;
+
+ if ((pairs[1].key != 1) || (pairs[1].value == 0xAAAA))
+ return -11;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/riscv/hwprobe/sys_hwprobe.S b/tools/testing/selftests/riscv/hwprobe/sys_hwprobe.S
new file mode 100644
index 000000000000..a4773c88d267
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/sys_hwprobe.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2023 Rivos, Inc */
+
+.text
+.global riscv_hwprobe
+riscv_hwprobe:
+ # Put __NR_riscv_hwprobe in the syscall number register, then just shim
+ # back the kernel's return. This doesn't do any sort of errno
+ # handling, the caller can deal with it.
+ li a7, 258
+ ecall
+ ret