diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-29 01:55:39 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2023-04-29 01:55:39 +0200 |
commit | 89d77f71f493a3663b10fa812d17f472935d24be (patch) | |
tree | 03a7e0b87dc1e922ba028e8e0c4a5816c3c0e40a /arch/riscv/kernel | |
parent | Merge tag 'm68knommu-for-v6.4' of git://git.kernel.org/pub/scm/linux/kernel/g... (diff) | |
parent | RISC-V: hwprobe: Explicity check for -1 in vdso init (diff) | |
download | linux-89d77f71f493a3663b10fa812d17f472935d24be.tar.xz linux-89d77f71f493a3663b10fa812d17f472935d24be.zip |
Merge tag 'riscv-for-linus-6.4-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux
Pull RISC-V updates from Palmer Dabbelt:
- Support for runtime detection of the Svnapot extension
- Support for Zicboz when clearing pages
- We've moved to GENERIC_ENTRY
- Support for !MMU on rv32 systems
- The linear region is now mapped via huge pages
- Support for building relocatable kernels
- Support for the hwprobe interface
- Various fixes and cleanups throughout the tree
* tag 'riscv-for-linus-6.4-mw1' of git://git.kernel.org/pub/scm/linux/kernel/git/riscv/linux: (57 commits)
RISC-V: hwprobe: Explicity check for -1 in vdso init
RISC-V: hwprobe: There can only be one first
riscv: Allow to downgrade paging mode from the command line
dt-bindings: riscv: add sv57 mmu-type
RISC-V: hwprobe: Remove __init on probe_vendor_features()
riscv: Use --emit-relocs in order to move .rela.dyn in init
riscv: Check relocations at compile time
powerpc: Move script to check relocations at compile time in scripts/
riscv: Introduce CONFIG_RELOCATABLE
riscv: Move .rela.dyn outside of init to avoid empty relocations
riscv: Prepare EFI header for relocatable kernels
riscv: Unconditionnally select KASAN_VMALLOC if KASAN
riscv: Fix ptdump when KASAN is enabled
riscv: Fix EFI stub usage of KASAN instrumented strcmp function
riscv: Move DTB_EARLY_BASE_VA to the kernel address space
riscv: Rework kasan population functions
riscv: Split early and final KASAN population functions
riscv: Use PUD/P4D/PGD pages for the linear mapping
riscv: Move the linear mapping creation in its own function
riscv: Get rid of riscv_pfn_base variable
...
Diffstat (limited to 'arch/riscv/kernel')
28 files changed, 715 insertions, 564 deletions
diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 67f542be1bea..571f05958b41 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -68,8 +68,6 @@ obj-$(CONFIG_CPU_PM) += suspend_entry.o suspend.o obj-$(CONFIG_FUNCTION_TRACER) += mcount.o ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += mcount-dyn.o -obj-$(CONFIG_TRACE_IRQFLAGS) += trace_irq.o - obj-$(CONFIG_PERF_EVENTS) += perf_callchain.o obj-$(CONFIG_HAVE_PERF_REGS) += perf_regs.o obj-$(CONFIG_RISCV_SBI) += sbi.o @@ -90,3 +88,5 @@ obj-$(CONFIG_EFI) += efi.o obj-$(CONFIG_COMPAT) += compat_syscall_table.o obj-$(CONFIG_COMPAT) += compat_signal.o obj-$(CONFIG_COMPAT) += compat_vdso/ + +obj-$(CONFIG_64BIT) += pi/ diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c index 2354c69dc7d1..6b75788c18e6 100644 --- a/arch/riscv/kernel/alternative.c +++ b/arch/riscv/kernel/alternative.c @@ -27,9 +27,11 @@ struct cpu_manufacturer_info_t { void (*patch_func)(struct alt_entry *begin, struct alt_entry *end, unsigned long archid, unsigned long impid, unsigned int stage); + void (*feature_probe_func)(unsigned int cpu, unsigned long archid, + unsigned long impid); }; -static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info) +static void riscv_fill_cpu_mfr_info(struct cpu_manufacturer_info_t *cpu_mfr_info) { #ifdef CONFIG_RISCV_M_MODE cpu_mfr_info->vendor_id = csr_read(CSR_MVENDORID); @@ -41,6 +43,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf cpu_mfr_info->imp_id = sbi_get_mimpid(); #endif + cpu_mfr_info->feature_probe_func = NULL; switch (cpu_mfr_info->vendor_id) { #ifdef CONFIG_ERRATA_SIFIVE case SIFIVE_VENDOR_ID: @@ -50,6 +53,7 @@ static void __init_or_module riscv_fill_cpu_mfr_info(struct cpu_manufacturer_inf #ifdef CONFIG_ERRATA_THEAD case THEAD_VENDOR_ID: cpu_mfr_info->patch_func = thead_errata_patch_func; + cpu_mfr_info->feature_probe_func = thead_feature_probe_func; break; #endif default: @@ -139,6 +143,20 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len, } } +/* Called on each CPU as it starts */ +void probe_vendor_features(unsigned int cpu) +{ + struct cpu_manufacturer_info_t cpu_mfr_info; + + riscv_fill_cpu_mfr_info(&cpu_mfr_info); + if (!cpu_mfr_info.feature_probe_func) + return; + + cpu_mfr_info.feature_probe_func(cpu, + cpu_mfr_info.arch_id, + cpu_mfr_info.imp_id); +} + /* * This is called very early in the boot process (directly after we run * a feature detect on the boot CPU). No need to worry about other CPUs @@ -193,6 +211,7 @@ void __init apply_boot_alternatives(void) /* If called on non-boot cpu things could go wrong */ WARN_ON(smp_processor_id() != 0); + probe_vendor_features(0); _apply_alternatives((struct alt_entry *)__alt_start, (struct alt_entry *)__alt_end, RISCV_ALTERNATIVES_BOOT); diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index e3829d2de5d9..09e9b88110d1 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -63,53 +63,12 @@ uintptr_t get_cache_geometry(u32 level, enum cache_type type) 0; } -static void ci_leaf_init(struct cacheinfo *this_leaf, enum cache_type type, - unsigned int level, unsigned int size, - unsigned int sets, unsigned int line_size) +static void ci_leaf_init(struct cacheinfo *this_leaf, + struct device_node *node, + enum cache_type type, unsigned int level) { this_leaf->level = level; this_leaf->type = type; - this_leaf->size = size; - this_leaf->number_of_sets = sets; - this_leaf->coherency_line_size = line_size; - - /* - * If the cache is fully associative, there is no need to - * check the other properties. - */ - if (sets == 1) - return; - - /* - * Set the ways number for n-ways associative, make sure - * all properties are big than zero. - */ - if (sets > 0 && size > 0 && line_size > 0) - this_leaf->ways_of_associativity = (size / sets) / line_size; -} - -static void fill_cacheinfo(struct cacheinfo **this_leaf, - struct device_node *node, unsigned int level) -{ - unsigned int size, sets, line_size; - - if (!of_property_read_u32(node, "cache-size", &size) && - !of_property_read_u32(node, "cache-block-size", &line_size) && - !of_property_read_u32(node, "cache-sets", &sets)) { - ci_leaf_init((*this_leaf)++, CACHE_TYPE_UNIFIED, level, size, sets, line_size); - } - - if (!of_property_read_u32(node, "i-cache-size", &size) && - !of_property_read_u32(node, "i-cache-sets", &sets) && - !of_property_read_u32(node, "i-cache-block-size", &line_size)) { - ci_leaf_init((*this_leaf)++, CACHE_TYPE_INST, level, size, sets, line_size); - } - - if (!of_property_read_u32(node, "d-cache-size", &size) && - !of_property_read_u32(node, "d-cache-sets", &sets) && - !of_property_read_u32(node, "d-cache-block-size", &line_size)) { - ci_leaf_init((*this_leaf)++, CACHE_TYPE_DATA, level, size, sets, line_size); - } } int populate_cache_leaves(unsigned int cpu) @@ -120,24 +79,29 @@ int populate_cache_leaves(unsigned int cpu) struct device_node *prev = NULL; int levels = 1, level = 1; - /* Level 1 caches in cpu node */ - fill_cacheinfo(&this_leaf, np, level); + if (of_property_read_bool(np, "cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); + if (of_property_read_bool(np, "i-cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); + if (of_property_read_bool(np, "d-cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); - /* Next level caches in cache nodes */ prev = np; while ((np = of_find_next_cache_node(np))) { of_node_put(prev); prev = np; - if (!of_device_is_compatible(np, "cache")) break; if (of_property_read_u32(np, "cache-level", &level)) break; if (level <= levels) break; - - fill_cacheinfo(&this_leaf, np, level); - + if (of_property_read_bool(np, "cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_UNIFIED, level); + if (of_property_read_bool(np, "i-cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_INST, level); + if (of_property_read_bool(np, "d-cache-size")) + ci_leaf_init(this_leaf++, np, CACHE_TYPE_DATA, level); levels = level; } of_node_put(np); diff --git a/arch/riscv/kernel/compat_vdso/Makefile b/arch/riscv/kernel/compat_vdso/Makefile index 7f34f3c7c882..189345773e7e 100644 --- a/arch/riscv/kernel/compat_vdso/Makefile +++ b/arch/riscv/kernel/compat_vdso/Makefile @@ -26,7 +26,7 @@ targets := $(obj-compat_vdso) compat_vdso.so compat_vdso.so.dbg compat_vdso.lds obj-compat_vdso := $(addprefix $(obj)/, $(obj-compat_vdso)) obj-y += compat_vdso.o -CPPFLAGS_compat_vdso.lds += -P -C -U$(ARCH) +CPPFLAGS_compat_vdso.lds += -P -C -DCOMPAT_VDSO -U$(ARCH) # Disable profiling and instrumentation for VDSO code GCOV_PROFILE := n diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 8400f0cc9704..3df38052dcbd 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -7,6 +7,7 @@ #include <linux/init.h> #include <linux/seq_file.h> #include <linux/of.h> +#include <asm/cpufeature.h> #include <asm/csr.h> #include <asm/hwcap.h> #include <asm/sbi.h> @@ -70,12 +71,7 @@ int riscv_of_parent_hartid(struct device_node *node, unsigned long *hartid) return -1; } -struct riscv_cpuinfo { - unsigned long mvendorid; - unsigned long marchid; - unsigned long mimpid; -}; -static DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); +DEFINE_PER_CPU(struct riscv_cpuinfo, riscv_cpuinfo); unsigned long riscv_cached_mvendorid(unsigned int cpu_id) { @@ -186,11 +182,13 @@ arch_initcall(riscv_cpuinfo_init); */ static struct riscv_isa_ext_data isa_ext_arr[] = { __RISCV_ISA_EXT_DATA(zicbom, RISCV_ISA_EXT_ZICBOM), + __RISCV_ISA_EXT_DATA(zicboz, RISCV_ISA_EXT_ZICBOZ), __RISCV_ISA_EXT_DATA(zihintpause, RISCV_ISA_EXT_ZIHINTPAUSE), __RISCV_ISA_EXT_DATA(zbb, RISCV_ISA_EXT_ZBB), __RISCV_ISA_EXT_DATA(sscofpmf, RISCV_ISA_EXT_SSCOFPMF), __RISCV_ISA_EXT_DATA(sstc, RISCV_ISA_EXT_SSTC), __RISCV_ISA_EXT_DATA(svinval, RISCV_ISA_EXT_SVINVAL), + __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), __RISCV_ISA_EXT_DATA("", RISCV_ISA_EXT_MAX), }; diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 59d58ee0f68d..52585e088873 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -8,20 +8,15 @@ #include <linux/bitmap.h> #include <linux/ctype.h> -#include <linux/libfdt.h> #include <linux/log2.h> #include <linux/memory.h> #include <linux/module.h> #include <linux/of.h> #include <asm/alternative.h> #include <asm/cacheflush.h> -#include <asm/errata_list.h> #include <asm/hwcap.h> #include <asm/patch.h> -#include <asm/pgtable.h> #include <asm/processor.h> -#include <asm/smp.h> -#include <asm/switch_to.h> #define NUM_ALPHA_EXTS ('z' - 'a' + 1) @@ -30,6 +25,9 @@ unsigned long elf_hwcap __read_mostly; /* Host ISA bitmap */ static DECLARE_BITMAP(riscv_isa, RISCV_ISA_EXT_MAX) __read_mostly; +/* Performance information */ +DEFINE_PER_CPU(long, misaligned_access_speed); + /** * riscv_isa_extension_base() - Get base extension word * @@ -79,6 +77,15 @@ static bool riscv_isa_extension_check(int id) return false; } return true; + case RISCV_ISA_EXT_ZICBOZ: + if (!riscv_cboz_block_size) { + pr_err("Zicboz detected in ISA string, but no cboz-block-size found\n"); + return false; + } else if (!is_power_of_2(riscv_cboz_block_size)) { + pr_err("cboz-block-size present, but is not a power-of-2\n"); + return false; + } + return true; } return true; @@ -224,9 +231,11 @@ void __init riscv_fill_hwcap(void) SET_ISA_EXT_MAP("sscofpmf", RISCV_ISA_EXT_SSCOFPMF); SET_ISA_EXT_MAP("sstc", RISCV_ISA_EXT_SSTC); SET_ISA_EXT_MAP("svinval", RISCV_ISA_EXT_SVINVAL); + SET_ISA_EXT_MAP("svnapot", RISCV_ISA_EXT_SVNAPOT); SET_ISA_EXT_MAP("svpbmt", RISCV_ISA_EXT_SVPBMT); SET_ISA_EXT_MAP("zbb", RISCV_ISA_EXT_ZBB); SET_ISA_EXT_MAP("zicbom", RISCV_ISA_EXT_ZICBOM); + SET_ISA_EXT_MAP("zicboz", RISCV_ISA_EXT_ZICBOZ); SET_ISA_EXT_MAP("zihintpause", RISCV_ISA_EXT_ZIHINTPAUSE); } #undef SET_ISA_EXT_MAP @@ -269,12 +278,46 @@ void __init riscv_fill_hwcap(void) } #ifdef CONFIG_RISCV_ALTERNATIVE +/* + * Alternative patch sites consider 48 bits when determining when to patch + * the old instruction sequence with the new. These bits are broken into a + * 16-bit vendor ID and a 32-bit patch ID. A non-zero vendor ID means the + * patch site is for an erratum, identified by the 32-bit patch ID. When + * the vendor ID is zero, the patch site is for a cpufeature. cpufeatures + * further break down patch ID into two 16-bit numbers. The lower 16 bits + * are the cpufeature ID and the upper 16 bits are used for a value specific + * to the cpufeature and patch site. If the upper 16 bits are zero, then it + * implies no specific value is specified. cpufeatures that want to control + * patching on a per-site basis will provide non-zero values and implement + * checks here. The checks return true when patching should be done, and + * false otherwise. + */ +static bool riscv_cpufeature_patch_check(u16 id, u16 value) +{ + if (!value) + return true; + + switch (id) { + case RISCV_ISA_EXT_ZICBOZ: + /* + * Zicboz alternative applications provide the maximum + * supported block size order, or zero when it doesn't + * matter. If the current block size exceeds the maximum, + * then the alternative cannot be applied. + */ + return riscv_cboz_block_size <= (1U << value); + } + + return false; +} + void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned int stage) { struct alt_entry *alt; void *oldptr, *altptr; + u16 id, value; if (stage == RISCV_ALTERNATIVES_EARLY_BOOT) return; @@ -282,13 +325,19 @@ void __init_or_module riscv_cpufeature_patch_func(struct alt_entry *begin, for (alt = begin; alt < end; alt++) { if (alt->vendor_id != 0) continue; - if (alt->errata_id >= RISCV_ISA_EXT_MAX) { - WARN(1, "This extension id:%d is not in ISA extension list", - alt->errata_id); + + id = PATCH_ID_CPUFEATURE_ID(alt->patch_id); + + if (id >= RISCV_ISA_EXT_MAX) { + WARN(1, "This extension id:%d is not in ISA extension list", id); continue; } - if (!__riscv_isa_extension_available(NULL, alt->errata_id)) + if (!__riscv_isa_extension_available(NULL, id)) + continue; + + value = PATCH_ID_CPUFEATURE_VALUE(alt->patch_id); + if (!riscv_cpufeature_patch_check(id, value)) continue; oldptr = ALT_OLD_PTR(alt); diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S index 8e733aa48ba6..515b2dfbca75 100644 --- a/arch/riscv/kernel/efi-header.S +++ b/arch/riscv/kernel/efi-header.S @@ -6,6 +6,7 @@ #include <linux/pe.h> #include <linux/sizes.h> +#include <asm/set_memory.h> .macro __EFI_PE_HEADER .long PE_MAGIC @@ -33,7 +34,11 @@ optional_header: .byte 0x02 // MajorLinkerVersion .byte 0x14 // MinorLinkerVersion .long __pecoff_text_end - efi_header_end // SizeOfCode - .long __pecoff_data_virt_size // SizeOfInitializedData +#ifdef __clang__ + .long __pecoff_data_virt_size // SizeOfInitializedData +#else + .long __pecoff_data_virt_end - __pecoff_text_end // SizeOfInitializedData +#endif .long 0 // SizeOfUninitializedData .long __efistub_efi_pe_entry - _start // AddressOfEntryPoint .long efi_header_end - _start // BaseOfCode @@ -91,9 +96,17 @@ section_table: IMAGE_SCN_MEM_EXECUTE // Characteristics .ascii ".data\0\0\0" - .long __pecoff_data_virt_size // VirtualSize +#ifdef __clang__ + .long __pecoff_data_virt_size // VirtualSize +#else + .long __pecoff_data_virt_end - __pecoff_text_end // VirtualSize +#endif .long __pecoff_text_end - _start // VirtualAddress - .long __pecoff_data_raw_size // SizeOfRawData +#ifdef __clang__ + .long __pecoff_data_raw_size // SizeOfRawData +#else + .long __pecoff_data_raw_end - __pecoff_text_end // SizeOfRawData +#endif .long __pecoff_text_end - _start // PointerToRawData .long 0 // PointerToRelocations diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 99d38fdf8b18..3fbb100bc9e4 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -14,11 +14,7 @@ #include <asm/asm-offsets.h> #include <asm/errata_list.h> -#if !IS_ENABLED(CONFIG_PREEMPTION) -.set resume_kernel, restore_all -#endif - -ENTRY(handle_exception) +SYM_CODE_START(handle_exception) /* * If coming from userspace, preserve the user thread pointer and load * the kernel thread pointer. If we came from the kernel, the scratch @@ -46,32 +42,7 @@ _save_context: REG_S x1, PT_RA(sp) REG_S x3, PT_GP(sp) REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x8, PT_S0(sp) - REG_S x9, PT_S1(sp) - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - REG_S x18, PT_S2(sp) - REG_S x19, PT_S3(sp) - REG_S x20, PT_S4(sp) - REG_S x21, PT_S5(sp) - REG_S x22, PT_S6(sp) - REG_S x23, PT_S7(sp) - REG_S x24, PT_S8(sp) - REG_S x25, PT_S9(sp) - REG_S x26, PT_S10(sp) - REG_S x27, PT_S11(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) + save_from_x6_to_x31 /* * Disable user-mode memory access as it should only be set in the @@ -106,19 +77,8 @@ _save_context: .option norelax la gp, __global_pointer$ .option pop - -#ifdef CONFIG_TRACE_IRQFLAGS - call __trace_hardirqs_off -#endif - -#ifdef CONFIG_CONTEXT_TRACKING_USER - /* If previous state is in user mode, call user_exit_callable(). */ - li a0, SR_PP - and a0, s1, a0 - bnez a0, skip_context_tracking - call user_exit_callable -skip_context_tracking: -#endif + move a0, sp /* pt_regs */ + la ra, ret_from_exception /* * MSB of cause differentiates between @@ -126,38 +86,13 @@ skip_context_tracking: */ bge s4, zero, 1f - la ra, ret_from_exception - /* Handle interrupts */ - move a0, sp /* pt_regs */ - la a1, generic_handle_arch_irq - jr a1 + tail do_irq 1: - /* - * Exceptions run with interrupts enabled or disabled depending on the - * state of SR_PIE in m/sstatus. - */ - andi t0, s1, SR_PIE - beqz t0, 1f - /* kprobes, entered via ebreak, must have interrupts disabled. */ - li t0, EXC_BREAKPOINT - beq s4, t0, 1f -#ifdef CONFIG_TRACE_IRQFLAGS - call __trace_hardirqs_on -#endif - csrs CSR_STATUS, SR_IE - -1: - la ra, ret_from_exception - /* Handle syscalls */ - li t0, EXC_SYSCALL - beq s4, t0, handle_syscall - /* Handle other exceptions */ slli t0, s4, RISCV_LGPTR la t1, excp_vect_table la t2, excp_vect_table_end - move a0, sp /* pt_regs */ add t0, t1, t0 /* Check if exception code lies within bounds */ bgeu t0, t2, 1f @@ -165,95 +100,16 @@ skip_context_tracking: jr t0 1: tail do_trap_unknown +SYM_CODE_END(handle_exception) -handle_syscall: -#ifdef CONFIG_RISCV_M_MODE - /* - * When running is M-Mode (no MMU config), MPIE does not get set. - * As a result, we need to force enable interrupts here because - * handle_exception did not do set SR_IE as it always sees SR_PIE - * being cleared. - */ - csrs CSR_STATUS, SR_IE -#endif -#if defined(CONFIG_TRACE_IRQFLAGS) || defined(CONFIG_CONTEXT_TRACKING_USER) - /* Recover a0 - a7 for system calls */ - REG_L a0, PT_A0(sp) - REG_L a1, PT_A1(sp) - REG_L a2, PT_A2(sp) - REG_L a3, PT_A3(sp) - REG_L a4, PT_A4(sp) - REG_L a5, PT_A5(sp) - REG_L a6, PT_A6(sp) - REG_L a7, PT_A7(sp) -#endif - /* save the initial A0 value (needed in signal handlers) */ - REG_S a0, PT_ORIG_A0(sp) - /* - * Advance SEPC to avoid executing the original - * scall instruction on sret - */ - addi s2, s2, 0x4 - REG_S s2, PT_EPC(sp) - /* Trace syscalls, but only if requested by the user. */ - REG_L t0, TASK_TI_FLAGS(tp) - andi t0, t0, _TIF_SYSCALL_WORK - bnez t0, handle_syscall_trace_enter -check_syscall_nr: - /* Check to make sure we don't jump to a bogus syscall number. */ - li t0, __NR_syscalls - la s0, sys_ni_syscall - /* - * Syscall number held in a7. - * If syscall number is above allowed value, redirect to ni_syscall. - */ - bgeu a7, t0, 3f -#ifdef CONFIG_COMPAT - REG_L s0, PT_STATUS(sp) - srli s0, s0, SR_UXL_SHIFT - andi s0, s0, (SR_UXL >> SR_UXL_SHIFT) - li t0, (SR_UXL_32 >> SR_UXL_SHIFT) - sub t0, s0, t0 - bnez t0, 1f - - /* Call compat_syscall */ - la s0, compat_sys_call_table - j 2f -1: -#endif - /* Call syscall */ - la s0, sys_call_table -2: - slli t0, a7, RISCV_LGPTR - add s0, s0, t0 - REG_L s0, 0(s0) -3: - jalr s0 - -ret_from_syscall: - /* Set user a0 to kernel a0 */ - REG_S a0, PT_A0(sp) - /* - * We didn't execute the actual syscall. - * Seccomp already set return value for the current task pt_regs. - * (If it was configured with SECCOMP_RET_ERRNO/TRACE) - */ -ret_from_syscall_rejected: -#ifdef CONFIG_DEBUG_RSEQ - move a0, sp - call rseq_syscall -#endif - /* Trace syscalls, but only if requested by the user. */ - REG_L t0, TASK_TI_FLAGS(tp) - andi t0, t0, _TIF_SYSCALL_WORK - bnez t0, handle_syscall_trace_exit - +/* + * The ret_from_exception must be called with interrupt disabled. Here is the + * caller list: + * - handle_exception + * - ret_from_fork + */ SYM_CODE_START_NOALIGN(ret_from_exception) REG_L s0, PT_STATUS(sp) - csrc CSR_STATUS, SR_IE -#ifdef CONFIG_TRACE_IRQFLAGS - call __trace_hardirqs_off -#endif #ifdef CONFIG_RISCV_M_MODE /* the MPP value is too large to be used as an immediate arg for addi */ li t0, SR_MPP @@ -261,17 +117,7 @@ SYM_CODE_START_NOALIGN(ret_from_exception) #else andi s0, s0, SR_SPP #endif - bnez s0, resume_kernel -SYM_CODE_END(ret_from_exception) - - /* Interrupts must be disabled here so flags are checked atomically */ - REG_L s0, TASK_TI_FLAGS(tp) /* current_thread_info->flags */ - andi s1, s0, _TIF_WORK_MASK - bnez s1, resume_userspace_slow -resume_userspace: -#ifdef CONFIG_CONTEXT_TRACKING_USER - call user_enter_callable -#endif + bnez s0, 1f /* Save unwound kernel stack pointer in thread_info */ addi s0, sp, PT_SIZE_ON_STACK @@ -282,18 +128,7 @@ resume_userspace: * structures again. */ csrw CSR_SCRATCH, tp - -restore_all: -#ifdef CONFIG_TRACE_IRQFLAGS - REG_L s1, PT_STATUS(sp) - andi t0, s1, SR_PIE - beqz t0, 1f - call __trace_hardirqs_on - j 2f 1: - call __trace_hardirqs_off -2: -#endif REG_L a0, PT_STATUS(sp) /* * The current load reservation is effectively part of the processor's @@ -322,32 +157,7 @@ restore_all: REG_L x3, PT_GP(sp) REG_L x4, PT_TP(sp) REG_L x5, PT_T0(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x8, PT_S0(sp) - REG_L x9, PT_S1(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - REG_L x18, PT_S2(sp) - REG_L x19, PT_S3(sp) - REG_L x20, PT_S4(sp) - REG_L x21, PT_S5(sp) - REG_L x22, PT_S6(sp) - REG_L x23, PT_S7(sp) - REG_L x24, PT_S8(sp) - REG_L x25, PT_S9(sp) - REG_L x26, PT_S10(sp) - REG_L x27, PT_S11(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) + restore_from_x6_to_x31 REG_L x2, PT_SP(sp) @@ -356,47 +166,10 @@ restore_all: #else sret #endif - -#if IS_ENABLED(CONFIG_PREEMPTION) -resume_kernel: - REG_L s0, TASK_TI_PREEMPT_COUNT(tp) - bnez s0, restore_all - REG_L s0, TASK_TI_FLAGS(tp) - andi s0, s0, _TIF_NEED_RESCHED - beqz s0, restore_all - call preempt_schedule_irq - j restore_all -#endif - -resume_userspace_slow: - /* Enter slow path for supplementary processing */ - move a0, sp /* pt_regs */ - move a1, s0 /* current_thread_info->flags */ - call do_work_pending - j resume_userspace - -/* Slow paths for ptrace. */ -handle_syscall_trace_enter: - move a0, sp - call do_syscall_trace_enter - move t0, a0 - REG_L a0, PT_A0(sp) - REG_L a1, PT_A1(sp) - REG_L a2, PT_A2(sp) - REG_L a3, PT_A3(sp) - REG_L a4, PT_A4(sp) - REG_L a5, PT_A5(sp) - REG_L a6, PT_A6(sp) - REG_L a7, PT_A7(sp) - bnez t0, ret_from_syscall_rejected - j check_syscall_nr -handle_syscall_trace_exit: - move a0, sp - call do_syscall_trace_exit - j ret_from_exception +SYM_CODE_END(ret_from_exception) #ifdef CONFIG_VMAP_STACK -handle_kernel_stack_overflow: +SYM_CODE_START_LOCAL(handle_kernel_stack_overflow) /* * Takes the psuedo-spinlock for the shadow stack, in case multiple * harts are concurrently overflowing their kernel stacks. We could @@ -464,32 +237,7 @@ restore_caller_reg: REG_S x1, PT_RA(sp) REG_S x3, PT_GP(sp) REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x8, PT_S0(sp) - REG_S x9, PT_S1(sp) - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - REG_S x18, PT_S2(sp) - REG_S x19, PT_S3(sp) - REG_S x20, PT_S4(sp) - REG_S x21, PT_S5(sp) - REG_S x22, PT_S6(sp) - REG_S x23, PT_S7(sp) - REG_S x24, PT_S8(sp) - REG_S x25, PT_S9(sp) - REG_S x26, PT_S10(sp) - REG_S x27, PT_S11(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) + save_from_x6_to_x31 REG_L s0, TASK_TI_KERNEL_SP(tp) csrr s1, CSR_STATUS @@ -505,23 +253,20 @@ restore_caller_reg: REG_S s5, PT_TP(sp) move a0, sp tail handle_bad_stack +SYM_CODE_END(handle_kernel_stack_overflow) #endif -END(handle_exception) - -ENTRY(ret_from_fork) - la ra, ret_from_exception - tail schedule_tail -ENDPROC(ret_from_fork) - -ENTRY(ret_from_kernel_thread) +SYM_CODE_START(ret_from_fork) call schedule_tail + beqz s0, 1f /* not from kernel thread */ /* Call fn(arg) */ - la ra, ret_from_exception move a0, s1 - jr s0 -ENDPROC(ret_from_kernel_thread) - + jalr s0 +1: + move a0, sp /* pt_regs */ + la ra, ret_from_exception + tail syscall_exit_to_user_mode +SYM_CODE_END(ret_from_fork) /* * Integer register context switch @@ -533,7 +278,7 @@ ENDPROC(ret_from_kernel_thread) * The value of a0 and a1 must be preserved by this function, as that's how * arguments are passed to schedule_tail. */ -ENTRY(__switch_to) +SYM_FUNC_START(__switch_to) /* Save context into prev->thread */ li a4, TASK_THREAD_RA add a3, a0, a4 @@ -570,7 +315,7 @@ ENTRY(__switch_to) /* The offset of thread_info in task_struct is zero. */ move tp, a1 ret -ENDPROC(__switch_to) +SYM_FUNC_END(__switch_to) #ifndef CONFIG_MMU #define do_page_fault do_trap_unknown @@ -579,7 +324,7 @@ ENDPROC(__switch_to) .section ".rodata" .align LGREG /* Exception vector table */ -ENTRY(excp_vect_table) +SYM_CODE_START(excp_vect_table) RISCV_PTR do_trap_insn_misaligned ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault) RISCV_PTR do_trap_insn_illegal @@ -588,7 +333,7 @@ ENTRY(excp_vect_table) RISCV_PTR do_trap_load_fault RISCV_PTR do_trap_store_misaligned RISCV_PTR do_trap_store_fault - RISCV_PTR do_trap_ecall_u /* system call, gets intercepted */ + RISCV_PTR do_trap_ecall_u /* system call */ RISCV_PTR do_trap_ecall_s RISCV_PTR do_trap_unknown RISCV_PTR do_trap_ecall_m @@ -598,11 +343,11 @@ ENTRY(excp_vect_table) RISCV_PTR do_trap_unknown RISCV_PTR do_page_fault /* store page fault */ excp_vect_table_end: -END(excp_vect_table) +SYM_CODE_END(excp_vect_table) #ifndef CONFIG_MMU -ENTRY(__user_rt_sigreturn) +SYM_CODE_START(__user_rt_sigreturn) li a7, __NR_rt_sigreturn scall -END(__user_rt_sigreturn) +SYM_CODE_END(__user_rt_sigreturn) #endif diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h index 726731ada534..a556fdaafed9 100644 --- a/arch/riscv/kernel/head.h +++ b/arch/riscv/kernel/head.h @@ -10,7 +10,6 @@ extern atomic_t hart_lottery; -asmlinkage void do_page_fault(struct pt_regs *regs); asmlinkage void __init setup_vm(uintptr_t dtb_pa); #ifdef CONFIG_XIP_KERNEL asmlinkage void __init __copy_data(void); diff --git a/arch/riscv/kernel/image-vars.h b/arch/riscv/kernel/image-vars.h index 7e2962ef73f9..15616155008c 100644 --- a/arch/riscv/kernel/image-vars.h +++ b/arch/riscv/kernel/image-vars.h @@ -23,8 +23,6 @@ * linked at. The routines below are all implemented in assembler in a * position independent manner */ -__efistub_strcmp = strcmp; - __efistub__start = _start; __efistub__start_kernel = _start_kernel; __efistub__end = _end; diff --git a/arch/riscv/kernel/mcount-dyn.S b/arch/riscv/kernel/mcount-dyn.S index 125de818d1ba..669b8697aa38 100644 --- a/arch/riscv/kernel/mcount-dyn.S +++ b/arch/riscv/kernel/mcount-dyn.S @@ -66,66 +66,17 @@ REG_S x3, PT_GP(sp) REG_S x4, PT_TP(sp) REG_S x5, PT_T0(sp) - REG_S x6, PT_T1(sp) - REG_S x7, PT_T2(sp) - REG_S x8, PT_S0(sp) - REG_S x9, PT_S1(sp) - REG_S x10, PT_A0(sp) - REG_S x11, PT_A1(sp) - REG_S x12, PT_A2(sp) - REG_S x13, PT_A3(sp) - REG_S x14, PT_A4(sp) - REG_S x15, PT_A5(sp) - REG_S x16, PT_A6(sp) - REG_S x17, PT_A7(sp) - REG_S x18, PT_S2(sp) - REG_S x19, PT_S3(sp) - REG_S x20, PT_S4(sp) - REG_S x21, PT_S5(sp) - REG_S x22, PT_S6(sp) - REG_S x23, PT_S7(sp) - REG_S x24, PT_S8(sp) - REG_S x25, PT_S9(sp) - REG_S x26, PT_S10(sp) - REG_S x27, PT_S11(sp) - REG_S x28, PT_T3(sp) - REG_S x29, PT_T4(sp) - REG_S x30, PT_T5(sp) - REG_S x31, PT_T6(sp) + save_from_x6_to_x31 .endm .macro RESTORE_ALL - REG_L t0, PT_EPC(sp) REG_L x1, PT_RA(sp) REG_L x2, PT_SP(sp) REG_L x3, PT_GP(sp) REG_L x4, PT_TP(sp) - REG_L x6, PT_T1(sp) - REG_L x7, PT_T2(sp) - REG_L x8, PT_S0(sp) - REG_L x9, PT_S1(sp) - REG_L x10, PT_A0(sp) - REG_L x11, PT_A1(sp) - REG_L x12, PT_A2(sp) - REG_L x13, PT_A3(sp) - REG_L x14, PT_A4(sp) - REG_L x15, PT_A5(sp) - REG_L x16, PT_A6(sp) - REG_L x17, PT_A7(sp) - REG_L x18, PT_S2(sp) - REG_L x19, PT_S3(sp) - REG_L x20, PT_S4(sp) - REG_L x21, PT_S5(sp) - REG_L x22, PT_S6(sp) - REG_L x23, PT_S7(sp) - REG_L x24, PT_S8(sp) - REG_L x25, PT_S9(sp) - REG_L x26, PT_S10(sp) - REG_L x27, PT_S11(sp) - REG_L x28, PT_T3(sp) - REG_L x29, PT_T4(sp) - REG_L x30, PT_T5(sp) - REG_L x31, PT_T6(sp) + /* Restore t0 with PT_EPC */ + REG_L x5, PT_EPC(sp) + restore_from_x6_to_x31 addi sp, sp, PT_SIZE_ON_STACK .endm diff --git a/arch/riscv/kernel/pi/Makefile b/arch/riscv/kernel/pi/Makefile new file mode 100644 index 000000000000..5d7cb991f2b8 --- /dev/null +++ b/arch/riscv/kernel/pi/Makefile @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 +# This file was copied from arm64/kernel/pi/Makefile. + +KBUILD_CFLAGS := $(subst $(CC_FLAGS_FTRACE),,$(KBUILD_CFLAGS)) -fpie \ + -Os -DDISABLE_BRANCH_PROFILING $(DISABLE_STACKLEAK_PLUGIN) \ + $(call cc-option,-mbranch-protection=none) \ + -I$(srctree)/scripts/dtc/libfdt -fno-stack-protector \ + -D__DISABLE_EXPORTS -ffreestanding \ + -fno-asynchronous-unwind-tables -fno-unwind-tables \ + $(call cc-option,-fno-addrsig) + +KBUILD_CFLAGS += -mcmodel=medany + +CFLAGS_cmdline_early.o += -D__NO_FORTIFY +CFLAGS_lib-fdt_ro.o += -D__NO_FORTIFY + +GCOV_PROFILE := n +KASAN_SANITIZE := n +KCSAN_SANITIZE := n +UBSAN_SANITIZE := n +KCOV_INSTRUMENT := n + +$(obj)/%.pi.o: OBJCOPYFLAGS := --prefix-symbols=__pi_ \ + --remove-section=.note.gnu.property \ + --prefix-alloc-sections=.init +$(obj)/%.pi.o: $(obj)/%.o FORCE + $(call if_changed,objcopy) + +$(obj)/lib-%.o: $(srctree)/lib/%.c FORCE + $(call if_changed_rule,cc_o_c) + +$(obj)/string.o: $(srctree)/lib/string.c FORCE + $(call if_changed_rule,cc_o_c) + +$(obj)/ctype.o: $(srctree)/lib/ctype.c FORCE + $(call if_changed_rule,cc_o_c) + +obj-y := cmdline_early.pi.o string.pi.o ctype.pi.o lib-fdt.pi.o lib-fdt_ro.pi.o +extra-y := $(patsubst %.pi.o,%.o,$(obj-y)) diff --git a/arch/riscv/kernel/pi/cmdline_early.c b/arch/riscv/kernel/pi/cmdline_early.c new file mode 100644 index 000000000000..05652d13c746 --- /dev/null +++ b/arch/riscv/kernel/pi/cmdline_early.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/types.h> +#include <linux/init.h> +#include <linux/libfdt.h> +#include <linux/string.h> +#include <asm/pgtable.h> +#include <asm/setup.h> + +static char early_cmdline[COMMAND_LINE_SIZE]; + +/* + * Declare the functions that are exported (but prefixed) here so that LLVM + * does not complain it lacks the 'static' keyword (which, if added, makes + * LLVM complain because the function is actually unused in this file). + */ +u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa); + +static char *get_early_cmdline(uintptr_t dtb_pa) +{ + const char *fdt_cmdline = NULL; + unsigned int fdt_cmdline_size = 0; + int chosen_node; + + if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) { + chosen_node = fdt_path_offset((void *)dtb_pa, "/chosen"); + if (chosen_node >= 0) { + fdt_cmdline = fdt_getprop((void *)dtb_pa, chosen_node, + "bootargs", NULL); + if (fdt_cmdline) { + fdt_cmdline_size = strlen(fdt_cmdline); + strscpy(early_cmdline, fdt_cmdline, + COMMAND_LINE_SIZE); + } + } + } + + if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || + IS_ENABLED(CONFIG_CMDLINE_FORCE) || + fdt_cmdline_size == 0 /* CONFIG_CMDLINE_FALLBACK */) { + strncat(early_cmdline, CONFIG_CMDLINE, + COMMAND_LINE_SIZE - fdt_cmdline_size); + } + + return early_cmdline; +} + +static u64 match_noXlvl(char *cmdline) +{ + if (strstr(cmdline, "no4lvl")) + return SATP_MODE_48; + else if (strstr(cmdline, "no5lvl")) + return SATP_MODE_57; + + return 0; +} + +u64 set_satp_mode_from_cmdline(uintptr_t dtb_pa) +{ + char *cmdline = get_early_cmdline(dtb_pa); + + return match_noXlvl(cmdline); +} diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c index 774ffde386ab..e2a060066730 100644 --- a/arch/riscv/kernel/process.c +++ b/arch/riscv/kernel/process.c @@ -34,7 +34,6 @@ EXPORT_SYMBOL(__stack_chk_guard); #endif extern asmlinkage void ret_from_fork(void); -extern asmlinkage void ret_from_kernel_thread(void); void arch_cpu_idle(void) { @@ -173,7 +172,6 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Supervisor/Machine, irqs on: */ childregs->status = SR_PP | SR_PIE; - p->thread.ra = (unsigned long)ret_from_kernel_thread; p->thread.s[0] = (unsigned long)args->fn; p->thread.s[1] = (unsigned long)args->fn_arg; } else { @@ -183,8 +181,9 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) if (clone_flags & CLONE_SETTLS) childregs->tp = tls; childregs->a0 = 0; /* Return value of fork() */ - p->thread.ra = (unsigned long)ret_from_fork; + p->thread.s[0] = 0; } + p->thread.ra = (unsigned long)ret_from_fork; p->thread.sp = (unsigned long)childregs; /* kernel sp */ return 0; } diff --git a/arch/riscv/kernel/ptrace.c b/arch/riscv/kernel/ptrace.c index 2ae8280ae475..23c48b14a0e7 100644 --- a/arch/riscv/kernel/ptrace.c +++ b/arch/riscv/kernel/ptrace.c @@ -19,9 +19,6 @@ #include <linux/sched.h> #include <linux/sched/task_stack.h> -#define CREATE_TRACE_POINTS -#include <trace/events/syscalls.h> - enum riscv_regset { REGSET_X, #ifdef CONFIG_FPU @@ -212,7 +209,6 @@ unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n) void ptrace_disable(struct task_struct *child) { - clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE); } long arch_ptrace(struct task_struct *child, long request, @@ -229,46 +225,6 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -/* - * Allows PTRACE_SYSCALL to work. These are called from entry.S in - * {handle,ret_from}_syscall. - */ -__visible int do_syscall_trace_enter(struct pt_regs *regs) -{ - if (test_thread_flag(TIF_SYSCALL_TRACE)) - if (ptrace_report_syscall_entry(regs)) - return -1; - - /* - * Do the secure computing after ptrace; failures should be fast. - * If this fails we might have return value in a0 from seccomp - * (via SECCOMP_RET_ERRNO/TRACE). - */ - if (secure_computing() == -1) - return -1; - -#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) - trace_sys_enter(regs, syscall_get_nr(current, regs)); -#endif - - audit_syscall_entry(regs->a7, regs->a0, regs->a1, regs->a2, regs->a3); - return 0; -} - -__visible void do_syscall_trace_exit(struct pt_regs *regs) -{ - audit_syscall_exit(regs); - - if (test_thread_flag(TIF_SYSCALL_TRACE)) - ptrace_report_syscall_exit(regs, 0); - -#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS - if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) - trace_sys_exit(regs, regs_return_value(regs)); -#endif -} - #ifdef CONFIG_COMPAT static int compat_riscv_gpr_get(struct task_struct *target, const struct user_regset *regset, diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 3020f44dcf58..36b026057503 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -293,7 +293,7 @@ void __init setup_arch(char **cmdline_p) setup_smp(); #endif - riscv_init_cbom_blocksize(); + riscv_init_cbo_blocksizes(); riscv_fill_hwcap(); apply_boot_alternatives(); if (IS_ENABLED(CONFIG_RISCV_ISA_ZICBOM) && diff --git a/arch/riscv/kernel/signal.c b/arch/riscv/kernel/signal.c index dee66c9290cc..9aff9d720590 100644 --- a/arch/riscv/kernel/signal.c +++ b/arch/riscv/kernel/signal.c @@ -12,6 +12,7 @@ #include <linux/syscalls.h> #include <linux/resume_user_mode.h> #include <linux/linkage.h> +#include <linux/entry-common.h> #include <asm/ucontext.h> #include <asm/vdso.h> @@ -281,7 +282,7 @@ static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) signal_setup_done(ret, ksig, 0); } -static void do_signal(struct pt_regs *regs) +void arch_do_signal_or_restart(struct pt_regs *regs) { struct ksignal ksig; @@ -318,29 +319,3 @@ static void do_signal(struct pt_regs *regs) */ restore_saved_sigmask(); } - -/* - * Handle any pending work on the resume-to-userspace path, as indicated by - * _TIF_WORK_MASK. Entered from assembly with IRQs off. - */ -asmlinkage __visible void do_work_pending(struct pt_regs *regs, - unsigned long thread_info_flags) -{ - do { - if (thread_info_flags & _TIF_NEED_RESCHED) { - schedule(); - } else { - local_irq_enable(); - if (thread_info_flags & _TIF_UPROBE) - uprobe_notify_resume(regs); - /* Handle pending signal delivery */ - if (thread_info_flags & (_TIF_SIGPENDING | - _TIF_NOTIFY_SIGNAL)) - do_signal(regs); - if (thread_info_flags & _TIF_NOTIFY_RESUME) - resume_user_mode_work(regs); - } - local_irq_disable(); - thread_info_flags = read_thread_flags(); - } while (thread_info_flags & _TIF_WORK_MASK); -} diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 00b53913d4c6..445a4efee267 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -167,6 +167,7 @@ asmlinkage __visible void smp_callin(void) notify_cpu_starting(curr_cpuid); numa_add_cpu(curr_cpuid); set_cpu_online(curr_cpuid, 1); + probe_vendor_features(curr_cpuid); /* * Remote TLB flushes are ignored while the CPU is offline, so emit diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index 5d3f2fbeb33c..5db29683ebee 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -6,9 +6,15 @@ */ #include <linux/syscalls.h> -#include <asm/unistd.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> +#include <asm/hwprobe.h> +#include <asm/sbi.h> +#include <asm/switch_to.h> +#include <asm/uaccess.h> +#include <asm/unistd.h> #include <asm-generic/mman-common.h> +#include <vdso/vsyscall.h> static long riscv_sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, @@ -69,3 +75,225 @@ SYSCALL_DEFINE3(riscv_flush_icache, uintptr_t, start, uintptr_t, end, return 0; } + +/* + * The hwprobe interface, for allowing userspace to probe to see which features + * are supported by the hardware. See Documentation/riscv/hwprobe.rst for more + * details. + */ +static void hwprobe_arch_id(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + u64 id = -1ULL; + bool first = true; + int cpu; + + for_each_cpu(cpu, cpus) { + u64 cpu_id; + + switch (pair->key) { + case RISCV_HWPROBE_KEY_MVENDORID: + cpu_id = riscv_cached_mvendorid(cpu); + break; + case RISCV_HWPROBE_KEY_MIMPID: + cpu_id = riscv_cached_mimpid(cpu); + break; + case RISCV_HWPROBE_KEY_MARCHID: + cpu_id = riscv_cached_marchid(cpu); + break; + } + + if (first) { + id = cpu_id; + first = false; + } + + /* + * If there's a mismatch for the given set, return -1 in the + * value. + */ + if (id != cpu_id) { + id = -1ULL; + break; + } + } + + pair->value = id; +} + +static u64 hwprobe_misaligned(const struct cpumask *cpus) +{ + int cpu; + u64 perf = -1ULL; + + for_each_cpu(cpu, cpus) { + int this_perf = per_cpu(misaligned_access_speed, cpu); + + if (perf == -1ULL) + perf = this_perf; + + if (perf != this_perf) { + perf = RISCV_HWPROBE_MISALIGNED_UNKNOWN; + break; + } + } + + if (perf == -1ULL) + return RISCV_HWPROBE_MISALIGNED_UNKNOWN; + + return perf; +} + +static void hwprobe_one_pair(struct riscv_hwprobe *pair, + const struct cpumask *cpus) +{ + switch (pair->key) { + case RISCV_HWPROBE_KEY_MVENDORID: + case RISCV_HWPROBE_KEY_MARCHID: + case RISCV_HWPROBE_KEY_MIMPID: + hwprobe_arch_id(pair, cpus); + break; + /* + * The kernel already assumes that the base single-letter ISA + * extensions are supported on all harts, and only supports the + * IMA base, so just cheat a bit here and tell that to + * userspace. + */ + case RISCV_HWPROBE_KEY_BASE_BEHAVIOR: + pair->value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA; + break; + + case RISCV_HWPROBE_KEY_IMA_EXT_0: + pair->value = 0; + if (has_fpu()) + pair->value |= RISCV_HWPROBE_IMA_FD; + + if (riscv_isa_extension_available(NULL, c)) + pair->value |= RISCV_HWPROBE_IMA_C; + + break; + + case RISCV_HWPROBE_KEY_CPUPERF_0: + pair->value = hwprobe_misaligned(cpus); + break; + + /* + * For forward compatibility, unknown keys don't fail the whole + * call, but get their element key set to -1 and value set to 0 + * indicating they're unrecognized. + */ + default: + pair->key = -1; + pair->value = 0; + break; + } +} + +static int do_riscv_hwprobe(struct riscv_hwprobe __user *pairs, + size_t pair_count, size_t cpu_count, + unsigned long __user *cpus_user, + unsigned int flags) +{ + size_t out; + int ret; + cpumask_t cpus; + + /* Check the reserved flags. */ + if (flags != 0) + return -EINVAL; + + /* + * The interface supports taking in a CPU mask, and returns values that + * are consistent across that mask. Allow userspace to specify NULL and + * 0 as a shortcut to all online CPUs. + */ + cpumask_clear(&cpus); + if (!cpu_count && !cpus_user) { + cpumask_copy(&cpus, cpu_online_mask); + } else { + if (cpu_count > cpumask_size()) + cpu_count = cpumask_size(); + + ret = copy_from_user(&cpus, cpus_user, cpu_count); + if (ret) + return -EFAULT; + + /* + * Userspace must provide at least one online CPU, without that + * there's no way to define what is supported. + */ + cpumask_and(&cpus, &cpus, cpu_online_mask); + if (cpumask_empty(&cpus)) + return -EINVAL; + } + + for (out = 0; out < pair_count; out++, pairs++) { + struct riscv_hwprobe pair; + + if (get_user(pair.key, &pairs->key)) + return -EFAULT; + + pair.value = 0; + hwprobe_one_pair(&pair, &cpus); + ret = put_user(pair.key, &pairs->key); + if (ret == 0) + ret = put_user(pair.value, &pairs->value); + + if (ret) + return -EFAULT; + } + + return 0; +} + +#ifdef CONFIG_MMU + +static int __init init_hwprobe_vdso_data(void) +{ + struct vdso_data *vd = __arch_get_k_vdso_data(); + struct arch_vdso_data *avd = &vd->arch_data; + u64 id_bitsmash = 0; + struct riscv_hwprobe pair; + int key; + + /* + * Initialize vDSO data with the answers for the "all CPUs" case, to + * save a syscall in the common case. + */ + for (key = 0; key <= RISCV_HWPROBE_MAX_KEY; key++) { + pair.key = key; + hwprobe_one_pair(&pair, cpu_online_mask); + + WARN_ON_ONCE(pair.key < 0); + + avd->all_cpu_hwprobe_values[key] = pair.value; + /* + * Smash together the vendor, arch, and impl IDs to see if + * they're all 0 or any negative. + */ + if (key <= RISCV_HWPROBE_KEY_MIMPID) + id_bitsmash |= pair.value; + } + + /* + * If the arch, vendor, and implementation ID are all the same across + * all harts, then assume all CPUs are the same, and allow the vDSO to + * answer queries for arbitrary masks. However if all values are 0 (not + * populated) or any value returns -1 (varies across CPUs), then the + * vDSO should defer to the kernel for exotic cpu masks. + */ + avd->homogeneous_cpus = id_bitsmash != 0 && id_bitsmash != -1; + return 0; +} + +arch_initcall_sync(init_hwprobe_vdso_data); + +#endif /* CONFIG_MMU */ + +SYSCALL_DEFINE5(riscv_hwprobe, struct riscv_hwprobe __user *, pairs, + size_t, pair_count, size_t, cpu_count, unsigned long __user *, + cpus, unsigned int, flags) +{ + return do_riscv_hwprobe(pairs, pair_count, cpu_count, + cpus, flags); +} diff --git a/arch/riscv/kernel/trace_irq.c b/arch/riscv/kernel/trace_irq.c deleted file mode 100644 index 095ac976d7da..000000000000 --- a/arch/riscv/kernel/trace_irq.c +++ /dev/null @@ -1,27 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> - */ - -#include <linux/irqflags.h> -#include <linux/kprobes.h> -#include "trace_irq.h" - -/* - * trace_hardirqs_on/off require the caller to setup frame pointer properly. - * Otherwise, CALLER_ADDR1 might trigger an pagging exception in kernel. - * Here we add one extra level so they can be safely called by low - * level entry code which $fp is used for other purpose. - */ - -void __trace_hardirqs_on(void) -{ - trace_hardirqs_on(); -} -NOKPROBE_SYMBOL(__trace_hardirqs_on); - -void __trace_hardirqs_off(void) -{ - trace_hardirqs_off(); -} -NOKPROBE_SYMBOL(__trace_hardirqs_off); diff --git a/arch/riscv/kernel/trace_irq.h b/arch/riscv/kernel/trace_irq.h deleted file mode 100644 index 99fe67377e5e..000000000000 --- a/arch/riscv/kernel/trace_irq.h +++ /dev/null @@ -1,11 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2022 Changbin Du <changbin.du@gmail.com> - */ -#ifndef __TRACE_IRQ_H -#define __TRACE_IRQ_H - -void __trace_hardirqs_on(void); -void __trace_hardirqs_off(void); - -#endif /* __TRACE_IRQ_H */ diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index f6fda94e8e59..8c258b78c925 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -17,12 +17,14 @@ #include <linux/module.h> #include <linux/irq.h> #include <linux/kexec.h> +#include <linux/entry-common.h> #include <asm/asm-prototypes.h> #include <asm/bug.h> #include <asm/csr.h> #include <asm/processor.h> #include <asm/ptrace.h> +#include <asm/syscall.h> #include <asm/thread_info.h> int show_unhandled_signals = 1; @@ -119,14 +121,22 @@ static void do_trap_error(struct pt_regs *regs, int signo, int code, } #if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_RISCV_ALTERNATIVE) -#define __trap_section __section(".xip.traps") +#define __trap_section __noinstr_section(".xip.traps") #else -#define __trap_section +#define __trap_section noinstr #endif -#define DO_ERROR_INFO(name, signo, code, str) \ -asmlinkage __visible __trap_section void name(struct pt_regs *regs) \ -{ \ - do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ +#define DO_ERROR_INFO(name, signo, code, str) \ +asmlinkage __visible __trap_section void name(struct pt_regs *regs) \ +{ \ + if (user_mode(regs)) { \ + irqentry_enter_from_user_mode(regs); \ + do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ + irqentry_exit_to_user_mode(regs); \ + } else { \ + irqentry_state_t state = irqentry_nmi_enter(regs); \ + do_trap_error(regs, signo, code, regs->epc, "Oops - " str); \ + irqentry_nmi_exit(regs, state); \ + } \ } DO_ERROR_INFO(do_trap_unknown, @@ -148,26 +158,50 @@ DO_ERROR_INFO(do_trap_store_misaligned, int handle_misaligned_load(struct pt_regs *regs); int handle_misaligned_store(struct pt_regs *regs); -asmlinkage void __trap_section do_trap_load_misaligned(struct pt_regs *regs) +asmlinkage __visible __trap_section void do_trap_load_misaligned(struct pt_regs *regs) { - if (!handle_misaligned_load(regs)) - return; - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - load address misaligned"); + if (user_mode(regs)) { + irqentry_enter_from_user_mode(regs); + + if (handle_misaligned_load(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - load address misaligned"); + + irqentry_exit_to_user_mode(regs); + } else { + irqentry_state_t state = irqentry_nmi_enter(regs); + + if (handle_misaligned_load(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - load address misaligned"); + + irqentry_nmi_exit(regs, state); + } } -asmlinkage void __trap_section do_trap_store_misaligned(struct pt_regs *regs) +asmlinkage __visible __trap_section void do_trap_store_misaligned(struct pt_regs *regs) { - if (!handle_misaligned_store(regs)) - return; - do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, - "Oops - store (or AMO) address misaligned"); + if (user_mode(regs)) { + irqentry_enter_from_user_mode(regs); + + if (handle_misaligned_store(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - store (or AMO) address misaligned"); + + irqentry_exit_to_user_mode(regs); + } else { + irqentry_state_t state = irqentry_nmi_enter(regs); + + if (handle_misaligned_store(regs)) + do_trap_error(regs, SIGBUS, BUS_ADRALN, regs->epc, + "Oops - store (or AMO) address misaligned"); + + irqentry_nmi_exit(regs, state); + } } #endif DO_ERROR_INFO(do_trap_store_fault, SIGSEGV, SEGV_ACCERR, "store (or AMO) access fault"); -DO_ERROR_INFO(do_trap_ecall_u, - SIGILL, ILL_ILLTRP, "environment call from U-mode"); DO_ERROR_INFO(do_trap_ecall_s, SIGILL, ILL_ILLTRP, "environment call from S-mode"); DO_ERROR_INFO(do_trap_ecall_m, @@ -183,7 +217,7 @@ static inline unsigned long get_break_insn_length(unsigned long pc) return GET_INSN_LENGTH(insn); } -asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) +void handle_break(struct pt_regs *regs) { #ifdef CONFIG_KPROBES if (kprobe_single_step_handler(regs)) @@ -213,7 +247,77 @@ asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) else die(regs, "Kernel BUG"); } -NOKPROBE_SYMBOL(do_trap_break); + +asmlinkage __visible __trap_section void do_trap_break(struct pt_regs *regs) +{ + if (user_mode(regs)) { + irqentry_enter_from_user_mode(regs); + + handle_break(regs); + + irqentry_exit_to_user_mode(regs); + } else { + irqentry_state_t state = irqentry_nmi_enter(regs); + + handle_break(regs); + + irqentry_nmi_exit(regs, state); + } +} + +asmlinkage __visible __trap_section void do_trap_ecall_u(struct pt_regs *regs) +{ + if (user_mode(regs)) { + ulong syscall = regs->a7; + + regs->epc += 4; + regs->orig_a0 = regs->a0; + + syscall = syscall_enter_from_user_mode(regs, syscall); + + if (syscall < NR_syscalls) + syscall_handler(regs, syscall); + else + regs->a0 = -ENOSYS; + + syscall_exit_to_user_mode(regs); + } else { + irqentry_state_t state = irqentry_nmi_enter(regs); + + do_trap_error(regs, SIGILL, ILL_ILLTRP, regs->epc, + "Oops - environment call from U-mode"); + + irqentry_nmi_exit(regs, state); + } + +} + +#ifdef CONFIG_MMU +asmlinkage __visible noinstr void do_page_fault(struct pt_regs *regs) +{ + irqentry_state_t state = irqentry_enter(regs); + + handle_page_fault(regs); + + local_irq_disable(); + + irqentry_exit(regs, state); +} +#endif + +asmlinkage __visible noinstr void do_irq(struct pt_regs *regs) +{ + struct pt_regs *old_regs; + irqentry_state_t state = irqentry_enter(regs); + + irq_enter_rcu(); + old_regs = set_irq_regs(regs); + handle_arch_irq(regs); + set_irq_regs(old_regs); + irq_exit_rcu(); + + irqentry_exit(regs, state); +} #ifdef CONFIG_GENERIC_BUG int is_valid_bugaddr(unsigned long pc) diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index cc2d1e8c8736..9a68e7eaae4d 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -14,13 +14,7 @@ #include <asm/page.h> #include <asm/vdso.h> #include <linux/time_namespace.h> - -#ifdef CONFIG_GENERIC_TIME_VSYSCALL #include <vdso/datapage.h> -#else -struct vdso_data { -}; -#endif enum vvar_pages { VVAR_DATA_PAGE_OFFSET, diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index a04b3bc35ca2..6b1dba11bf6d 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -10,6 +10,8 @@ vdso-syms += vgettimeofday endif vdso-syms += getcpu vdso-syms += flush_icache +vdso-syms += hwprobe +vdso-syms += sys_hwprobe # Files to link into the vdso obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o @@ -21,6 +23,8 @@ ifneq ($(c-gettimeofday-y),) CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y) endif +CFLAGS_hwprobe.o += -fPIC + # Build rules targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) diff --git a/arch/riscv/kernel/vdso/hwprobe.c b/arch/riscv/kernel/vdso/hwprobe.c new file mode 100644 index 000000000000..d40bec6ac078 --- /dev/null +++ b/arch/riscv/kernel/vdso/hwprobe.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright 2023 Rivos, Inc + */ + +#include <linux/types.h> +#include <vdso/datapage.h> +#include <vdso/helpers.h> + +extern int riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, unsigned long *cpus, + unsigned int flags); + +/* Add a prototype to avoid -Wmissing-prototypes warning. */ +int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, unsigned long *cpus, + unsigned int flags); + +int __vdso_riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count, + size_t cpu_count, unsigned long *cpus, + unsigned int flags) +{ + const struct vdso_data *vd = __arch_get_vdso_data(); + const struct arch_vdso_data *avd = &vd->arch_data; + bool all_cpus = !cpu_count && !cpus; + struct riscv_hwprobe *p = pairs; + struct riscv_hwprobe *end = pairs + pair_count; + + /* + * Defer to the syscall for exotic requests. The vdso has answers + * stashed away only for the "all cpus" case. If all CPUs are + * homogeneous, then this function can handle requests for arbitrary + * masks. + */ + if ((flags != 0) || (!all_cpus && !avd->homogeneous_cpus)) + return riscv_hwprobe(pairs, pair_count, cpu_count, cpus, flags); + + /* This is something we can handle, fill out the pairs. */ + while (p < end) { + if (p->key <= RISCV_HWPROBE_MAX_KEY) { + p->value = avd->all_cpu_hwprobe_values[p->key]; + + } else { + p->key = -1; + p->value = 0; + } + + p++; + } + + return 0; +} diff --git a/arch/riscv/kernel/vdso/sys_hwprobe.S b/arch/riscv/kernel/vdso/sys_hwprobe.S new file mode 100644 index 000000000000..4e704146c77a --- /dev/null +++ b/arch/riscv/kernel/vdso/sys_hwprobe.S @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2023 Rivos, Inc */ + +#include <linux/linkage.h> +#include <asm/unistd.h> + +.text +ENTRY(riscv_hwprobe) + .cfi_startproc + li a7, __NR_riscv_hwprobe + ecall + ret + + .cfi_endproc +ENDPROC(riscv_hwprobe) diff --git a/arch/riscv/kernel/vdso/vdso.lds.S b/arch/riscv/kernel/vdso/vdso.lds.S index 4a0606633290..82ce64900f3d 100644 --- a/arch/riscv/kernel/vdso/vdso.lds.S +++ b/arch/riscv/kernel/vdso/vdso.lds.S @@ -82,6 +82,9 @@ VERSION #endif __vdso_getcpu; __vdso_flush_icache; +#ifndef COMPAT_VDSO + __vdso_riscv_hwprobe; +#endif local: *; }; } diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 53a8ad65b255..305877d85e96 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -27,9 +27,6 @@ ENTRY(_start) jiffies = jiffies_64; -PECOFF_SECTION_ALIGNMENT = 0x1000; -PECOFF_FILE_ALIGNMENT = 0x200; - SECTIONS { /* Beginning of code and text segment */ @@ -86,6 +83,14 @@ SECTIONS /* Start of init data section */ __init_data_begin = .; INIT_DATA_SECTION(16) + + /* Those sections result from the compilation of kernel/pi/string.c */ + .init.pidata : { + *(.init.srodata.cst8*) + *(.init__bug_table*) + *(.init.sdata*) + } + .init.bss : { *(.init.bss) /* from the EFI stub */ } @@ -99,10 +104,6 @@ SECTIONS *(.rel.dyn*) } - .rela.dyn : { - *(.rela*) - } - __init_data_end = .; . = ALIGN(8); @@ -129,9 +130,28 @@ SECTIONS *(.sdata*) } + .rela.dyn : ALIGN(8) { + __rela_dyn_start = .; + *(.rela .rela*) + __rela_dyn_end = .; + } + + .got : { *(.got*) } + +#ifdef CONFIG_RELOCATABLE + .data.rel : { *(.data.rel*) } + .plt : { *(.plt) } + .dynamic : { *(.dynamic) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } +#endif + #ifdef CONFIG_EFI .pecoff_edata_padding : { BYTE(0); . = ALIGN(PECOFF_FILE_ALIGNMENT); } __pecoff_data_raw_size = ABSOLUTE(. - __pecoff_text_end); + __pecoff_data_raw_end = ABSOLUTE(.); #endif /* End of data section */ @@ -142,6 +162,7 @@ SECTIONS #ifdef CONFIG_EFI . = ALIGN(PECOFF_SECTION_ALIGNMENT); __pecoff_data_virt_size = ABSOLUTE(. - __pecoff_text_end); + __pecoff_data_virt_end = ABSOLUTE(.); #endif _end = .; |