diff options
author | Palmer Dabbelt <palmer@rivosinc.com> | 2024-09-12 16:23:05 +0200 |
---|---|---|
committer | Palmer Dabbelt <palmer@rivosinc.com> | 2024-09-12 16:23:05 +0200 |
commit | 9ea7b92b77df7d2eee3c31ef4a19f0f12ec74190 (patch) | |
tree | d1963d8effd4e52dc7e31bdf2e066eb0c2d1e0dc /arch/riscv | |
parent | riscv: Add license to vmalloc.h (diff) | |
parent | riscv: remove limit on the size of read-only section for XIP kernel (diff) | |
download | linux-9ea7b92b77df7d2eee3c31ef4a19f0f12ec74190.tar.xz linux-9ea7b92b77df7d2eee3c31ef4a19f0f12ec74190.zip |
Merge patch series "remove size limit on XIP kernel"
Nam Cao <namcao@linutronix.de> says:
Hi,
For XIP kernel, the writable data section is always at offset specified in
XIP_OFFSET, which is hard-coded to 32MB.
Unfortunately, this means the read-only section (placed before the
writable section) is restricted in size. This causes build failure if the
kernel gets too large.
This series remove the use of XIP_OFFSET one by one, then remove this
macro entirely at the end, with the goal of lifting this size restriction.
Also some cleanup and documentation along the way.
* b4-shazam-merge
riscv: remove limit on the size of read-only section for XIP kernel
riscv: drop the use of XIP_OFFSET in create_kernel_page_table()
riscv: drop the use of XIP_OFFSET in kernel_mapping_va_to_pa()
riscv: drop the use of XIP_OFFSET in XIP_FIXUP_FLASH_OFFSET
riscv: drop the use of XIP_OFFSET in XIP_FIXUP_OFFSET
riscv: replace misleading va_kernel_pa_offset on XIP kernel
riscv: don't export va_kernel_pa_offset in vmcoreinfo for XIP kernel
riscv: cleanup XIP_FIXUP macro
riscv: change XIP's kernel_map.size to be size of the entire kernel
...
Link: https://lore.kernel.org/r/cover.1717789719.git.namcao@linutronix.de
Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
Diffstat (limited to 'arch/riscv')
-rw-r--r-- | arch/riscv/include/asm/page.h | 29 | ||||
-rw-r--r-- | arch/riscv/include/asm/pgtable.h | 18 | ||||
-rw-r--r-- | arch/riscv/include/asm/set_memory.h | 2 | ||||
-rw-r--r-- | arch/riscv/include/asm/xip_fixup.h | 30 | ||||
-rw-r--r-- | arch/riscv/kernel/Makefile.syscalls | 2 | ||||
-rw-r--r-- | arch/riscv/kernel/cpufeature.c | 14 | ||||
-rw-r--r-- | arch/riscv/kernel/patch.c | 4 | ||||
-rw-r--r-- | arch/riscv/kernel/sbi-ipi.c | 2 | ||||
-rw-r--r-- | arch/riscv/kernel/traps.c | 4 | ||||
-rw-r--r-- | arch/riscv/kernel/vmcore_info.c | 7 | ||||
-rw-r--r-- | arch/riscv/kernel/vmlinux-xip.lds.S | 5 | ||||
-rw-r--r-- | arch/riscv/mm/fault.c | 17 | ||||
-rw-r--r-- | arch/riscv/mm/init.c | 32 | ||||
-rw-r--r-- | arch/riscv/purgatory/entry.S | 2 |
14 files changed, 109 insertions, 59 deletions
diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index 7ede2111c591..32d308a3355f 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -112,11 +112,13 @@ struct kernel_mapping { /* Offset between linear mapping virtual address and kernel load address */ unsigned long va_pa_offset; /* Offset between kernel mapping virtual address and kernel load address */ - unsigned long va_kernel_pa_offset; - unsigned long va_kernel_xip_pa_offset; #ifdef CONFIG_XIP_KERNEL + unsigned long va_kernel_xip_text_pa_offset; + unsigned long va_kernel_xip_data_pa_offset; uintptr_t xiprom; uintptr_t xiprom_sz; +#else + unsigned long va_kernel_pa_offset; #endif }; @@ -134,12 +136,18 @@ extern phys_addr_t phys_ram_base; #else void *linear_mapping_pa_to_va(unsigned long x); #endif + +#ifdef CONFIG_XIP_KERNEL #define kernel_mapping_pa_to_va(y) ({ \ unsigned long _y = (unsigned long)(y); \ - (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < phys_ram_base) ? \ - (void *)(_y + kernel_map.va_kernel_xip_pa_offset) : \ - (void *)(_y + kernel_map.va_kernel_pa_offset + XIP_OFFSET); \ + (_y < phys_ram_base) ? \ + (void *)(_y + kernel_map.va_kernel_xip_text_pa_offset) : \ + (void *)(_y + kernel_map.va_kernel_xip_data_pa_offset); \ }) +#else +#define kernel_mapping_pa_to_va(y) ((void *)((unsigned long)(y) + kernel_map.va_kernel_pa_offset)) +#endif + #define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) #ifndef CONFIG_DEBUG_VIRTUAL @@ -147,12 +155,17 @@ void *linear_mapping_pa_to_va(unsigned long x); #else phys_addr_t linear_mapping_va_to_pa(unsigned long x); #endif + +#ifdef CONFIG_XIP_KERNEL #define kernel_mapping_va_to_pa(y) ({ \ unsigned long _y = (unsigned long)(y); \ - (IS_ENABLED(CONFIG_XIP_KERNEL) && _y < kernel_map.virt_addr + XIP_OFFSET) ? \ - (_y - kernel_map.va_kernel_xip_pa_offset) : \ - (_y - kernel_map.va_kernel_pa_offset - XIP_OFFSET); \ + (_y < kernel_map.virt_addr + kernel_map.xiprom_sz) ? \ + (_y - kernel_map.va_kernel_xip_text_pa_offset) : \ + (_y - kernel_map.va_kernel_xip_data_pa_offset); \ }) +#else +#define kernel_mapping_va_to_pa(y) ((unsigned long)(y) - kernel_map.va_kernel_pa_offset) +#endif #define __va_to_pa_nodebug(x) ({ \ unsigned long _x = x; \ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 089f3c9f56a3..ede2f921967c 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -107,13 +107,6 @@ #endif -#ifdef CONFIG_XIP_KERNEL -#define XIP_OFFSET SZ_32M -#define XIP_OFFSET_MASK (SZ_32M - 1) -#else -#define XIP_OFFSET 0 -#endif - #ifndef __ASSEMBLY__ #include <asm/page.h> @@ -142,11 +135,14 @@ #ifdef CONFIG_XIP_KERNEL #define XIP_FIXUP(addr) ({ \ + extern char _sdata[], _start[], _end[]; \ + uintptr_t __rom_start_data = CONFIG_XIP_PHYS_ADDR \ + + (uintptr_t)&_sdata - (uintptr_t)&_start; \ + uintptr_t __rom_end_data = CONFIG_XIP_PHYS_ADDR \ + + (uintptr_t)&_end - (uintptr_t)&_start; \ uintptr_t __a = (uintptr_t)(addr); \ - (__a >= CONFIG_XIP_PHYS_ADDR && \ - __a < CONFIG_XIP_PHYS_ADDR + XIP_OFFSET * 2) ? \ - __a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\ - __a; \ + (__a >= __rom_start_data && __a < __rom_end_data) ? \ + __a - __rom_start_data + CONFIG_PHYS_RAM_BASE : __a; \ }) #else #define XIP_FIXUP(addr) (addr) diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index ec11001c3fe0..ab92fc84e1fc 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -46,7 +46,7 @@ bool kernel_page_present(struct page *page); #endif /* __ASSEMBLY__ */ -#ifdef CONFIG_STRICT_KERNEL_RWX +#if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_XIP_KERNEL) #ifdef CONFIG_64BIT #define SECTION_ALIGN (1 << 21) #else diff --git a/arch/riscv/include/asm/xip_fixup.h b/arch/riscv/include/asm/xip_fixup.h index b65bf6306f69..f3d56299bc22 100644 --- a/arch/riscv/include/asm/xip_fixup.h +++ b/arch/riscv/include/asm/xip_fixup.h @@ -9,18 +9,36 @@ #ifdef CONFIG_XIP_KERNEL .macro XIP_FIXUP_OFFSET reg - REG_L t0, _xip_fixup + /* Fix-up address in Flash into address in RAM early during boot before + * MMU is up. Because generated code "thinks" data is in Flash, but it + * is actually in RAM (actually data is also in Flash, but Flash is + * read-only, thus we need to use the data residing in RAM). + * + * The start of data in Flash is _sdata and the start of data in RAM is + * CONFIG_PHYS_RAM_BASE. So this fix-up essentially does this: + * reg += CONFIG_PHYS_RAM_BASE - _start + */ + li t0, CONFIG_PHYS_RAM_BASE add \reg, \reg, t0 + la t0, _sdata + sub \reg, \reg, t0 .endm .macro XIP_FIXUP_FLASH_OFFSET reg + /* In linker script, at the transition from read-only section to + * writable section, the VMA is increased while LMA remains the same. + * (See in linker script how _sdata, __data_loc and LOAD_OFFSET is + * changed) + * + * Consequently, early during boot before MMU is up, the generated code + * reads the "writable" section at wrong addresses, because VMA is used + * by compiler to generate code, but the data is located in Flash using + * LMA. + */ + la t0, _sdata + sub \reg, \reg, t0 la t0, __data_loc - REG_L t1, _xip_phys_offset - sub \reg, \reg, t1 add \reg, \reg, t0 .endm - -_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET -_xip_phys_offset: .dword CONFIG_XIP_PHYS_ADDR + XIP_OFFSET #else .macro XIP_FIXUP_OFFSET reg .endm diff --git a/arch/riscv/kernel/Makefile.syscalls b/arch/riscv/kernel/Makefile.syscalls index 52087a023b3d..9668fd1faf60 100644 --- a/arch/riscv/kernel/Makefile.syscalls +++ b/arch/riscv/kernel/Makefile.syscalls @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 syscall_abis_32 += riscv memfd_secret -syscall_abis_64 += riscv newstat rlimit memfd_secret +syscall_abis_64 += riscv rlimit memfd_secret diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 8f20607adb40..b427188b28fc 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -432,28 +432,26 @@ static void __init riscv_resolve_isa(unsigned long *source_isa, bitmap_copy(prev_resolved_isa, resolved_isa, RISCV_ISA_EXT_MAX); for_each_set_bit(bit, source_isa, RISCV_ISA_EXT_MAX) { ext = riscv_get_isa_ext_data(bit); - if (!ext) - continue; - if (ext->validate) { + if (ext && ext->validate) { ret = ext->validate(ext, resolved_isa); if (ret == -EPROBE_DEFER) { loop = true; continue; } else if (ret) { /* Disable the extension entirely */ - clear_bit(ext->id, source_isa); + clear_bit(bit, source_isa); continue; } } - set_bit(ext->id, resolved_isa); + set_bit(bit, resolved_isa); /* No need to keep it in source isa now that it is enabled */ - clear_bit(ext->id, source_isa); + clear_bit(bit, source_isa); /* Single letter extensions get set in hwcap */ - if (ext->id < RISCV_ISA_EXT_BASE) - *this_hwcap |= isa2hwcap[ext->id]; + if (bit < RISCV_ISA_EXT_BASE) + *this_hwcap |= isa2hwcap[bit]; } } while (loop && memcmp(prev_resolved_isa, resolved_isa, sizeof(prev_resolved_isa))); } diff --git a/arch/riscv/kernel/patch.c b/arch/riscv/kernel/patch.c index 69e5796fc51f..34ef522f07a8 100644 --- a/arch/riscv/kernel/patch.c +++ b/arch/riscv/kernel/patch.c @@ -205,6 +205,8 @@ int patch_text_set_nosync(void *addr, u8 c, size_t len) int ret; ret = patch_insn_set(addr, c, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } @@ -239,6 +241,8 @@ int patch_text_nosync(void *addr, const void *insns, size_t len) int ret; ret = patch_insn_write(addr, insns, len); + if (!ret) + flush_icache_range((uintptr_t)addr, (uintptr_t)addr + len); return ret; } diff --git a/arch/riscv/kernel/sbi-ipi.c b/arch/riscv/kernel/sbi-ipi.c index 1026e22955cc..0cc5559c08d8 100644 --- a/arch/riscv/kernel/sbi-ipi.c +++ b/arch/riscv/kernel/sbi-ipi.c @@ -71,7 +71,7 @@ void __init sbi_ipi_init(void) * the masking/unmasking of virtual IPIs is done * via generic IPI-Mux */ - cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + cpuhp_setup_state(CPUHP_AP_IRQ_RISCV_SBI_IPI_STARTING, "irqchip/sbi-ipi:starting", sbi_ipi_starting_cpu, NULL); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 05a16b1f0aee..51ebfd23e007 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -319,6 +319,7 @@ void do_trap_ecall_u(struct pt_regs *regs) regs->epc += 4; regs->orig_a0 = regs->a0; + regs->a0 = -ENOSYS; riscv_v_vstate_discard(regs); @@ -328,8 +329,7 @@ void do_trap_ecall_u(struct pt_regs *regs) if (syscall >= 0 && syscall < NR_syscalls) syscall_handler(regs, syscall); - else if (syscall != -1) - regs->a0 = -ENOSYS; + /* * Ultimately, this value will get limited by KSTACK_OFFSET_MAX(), * so the maximum stack offset is 1k bytes (10 bits). diff --git a/arch/riscv/kernel/vmcore_info.c b/arch/riscv/kernel/vmcore_info.c index 6d7a22522d63..d5e448aa90e7 100644 --- a/arch/riscv/kernel/vmcore_info.c +++ b/arch/riscv/kernel/vmcore_info.c @@ -19,6 +19,13 @@ void arch_crash_save_vmcoreinfo(void) #endif #endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); +#ifdef CONFIG_XIP_KERNEL + /* TODO: Communicate with crash-utility developers on the information to + * export. The XIP case is more complicated, because the virtual-physical + * address offset depends on whether the address is in ROM or in RAM. + */ +#else vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", kernel_map.va_kernel_pa_offset); +#endif } diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S index 8c3daa1b0531..a7611789bad5 100644 --- a/arch/riscv/kernel/vmlinux-xip.lds.S +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -14,6 +14,7 @@ #include <asm/page.h> #include <asm/cache.h> #include <asm/thread_info.h> +#include <asm/set_memory.h> OUTPUT_ARCH(riscv) ENTRY(_start) @@ -65,10 +66,10 @@ SECTIONS * From this point, stuff is considered writable and will be copied to RAM */ __data_loc = ALIGN(PAGE_SIZE); /* location in file */ - . = KERNEL_LINK_ADDR + XIP_OFFSET; /* location in memory */ + . = ALIGN(SECTION_ALIGN); /* location in memory */ #undef LOAD_OFFSET -#define LOAD_OFFSET (KERNEL_LINK_ADDR + XIP_OFFSET - (__data_loc & XIP_OFFSET_MASK)) +#define LOAD_OFFSET (KERNEL_LINK_ADDR + _sdata - __data_loc) _sdata = .; /* Start of data section */ _data = .; diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 5224f3733802..a9f2b4af8f3f 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -61,26 +61,27 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr) static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) { + if (!user_mode(regs)) { + no_context(regs, addr); + return; + } + if (fault & VM_FAULT_OOM) { /* * We ran out of memory, call the OOM killer, and return the userspace * (which will retry the fault, or kill us if we got oom-killed). */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } pagefault_out_of_memory(); return; } else if (fault & (VM_FAULT_SIGBUS | VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE)) { /* Kernel mode? Handle exceptions or die */ - if (!user_mode(regs)) { - no_context(regs, addr); - return; - } do_trap(regs, SIGBUS, BUS_ADRERR, addr); return; + } else if (fault & VM_FAULT_SIGSEGV) { + do_trap(regs, SIGSEGV, SEGV_MAPERR, addr); + return; } + BUG(); } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 849a7e4daff7..f43139db82a9 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -234,8 +234,6 @@ static void __init setup_bootmem(void) */ memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); - phys_ram_end = memblock_end_of_DRAM(); - /* * Make sure we align the start of the memory on a PMD boundary so that * at worst, we map the linear mapping with PMD mappings. @@ -251,6 +249,16 @@ static void __init setup_bootmem(void) kernel_map.va_pa_offset = PAGE_OFFSET - phys_ram_base; /* + * The size of the linear page mapping may restrict the amount of + * usable RAM. + */ + if (IS_ENABLED(CONFIG_64BIT)) { + max_mapped_addr = __pa(PAGE_OFFSET) + KERN_VIRT_SIZE; + memblock_cap_memory_range(phys_ram_base, + max_mapped_addr - phys_ram_base); + } + + /* * Reserve physical address space that would be mapped to virtual * addresses greater than (void *)(-PAGE_SIZE) because: * - This memory would overlap with ERR_PTR @@ -266,6 +274,7 @@ static void __init setup_bootmem(void) memblock_reserve(max_mapped_addr, (phys_addr_t)-max_mapped_addr); } + phys_ram_end = memblock_end_of_DRAM(); min_low_pfn = PFN_UP(phys_ram_base); max_low_pfn = max_pfn = PFN_DOWN(phys_ram_end); high_memory = (void *)(__va(PFN_PHYS(max_low_pfn))); @@ -908,7 +917,7 @@ static void __init relocate_kernel(void) static void __init create_kernel_page_table(pgd_t *pgdir, __always_unused bool early) { - uintptr_t va, end_va; + uintptr_t va, start_va, end_va; /* Map the flash resident part */ end_va = kernel_map.virt_addr + kernel_map.xiprom_sz; @@ -918,10 +927,11 @@ static void __init create_kernel_page_table(pgd_t *pgdir, PMD_SIZE, PAGE_KERNEL_EXEC); /* Map the data in RAM */ - end_va = kernel_map.virt_addr + XIP_OFFSET + kernel_map.size; - for (va = kernel_map.virt_addr + XIP_OFFSET; va < end_va; va += PMD_SIZE) + start_va = kernel_map.virt_addr + (uintptr_t)&_sdata - (uintptr_t)&_start; + end_va = kernel_map.virt_addr + kernel_map.size; + for (va = start_va; va < end_va; va += PMD_SIZE) create_pgd_mapping(pgdir, va, - kernel_map.phys_addr + (va - (kernel_map.virt_addr + XIP_OFFSET)), + kernel_map.phys_addr + (va - start_va), PMD_SIZE, PAGE_KERNEL); } #else @@ -1090,13 +1100,16 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) phys_ram_base = CONFIG_PHYS_RAM_BASE; kernel_map.phys_addr = (uintptr_t)CONFIG_PHYS_RAM_BASE; - kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); + kernel_map.size = (uintptr_t)(&_end) - (uintptr_t)(&_start); - kernel_map.va_kernel_xip_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; + kernel_map.va_kernel_xip_text_pa_offset = kernel_map.virt_addr - kernel_map.xiprom; + kernel_map.va_kernel_xip_data_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr + + (uintptr_t)&_sdata - (uintptr_t)&_start; #else kernel_map.page_offset = _AC(CONFIG_PAGE_OFFSET, UL); kernel_map.phys_addr = (uintptr_t)(&_start); kernel_map.size = (uintptr_t)(&_end) - kernel_map.phys_addr; + kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; #endif #if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) @@ -1118,7 +1131,6 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) */ kernel_map.va_pa_offset = IS_ENABLED(CONFIG_64BIT) ? 0UL : PAGE_OFFSET - kernel_map.phys_addr; - kernel_map.va_kernel_pa_offset = kernel_map.virt_addr - kernel_map.phys_addr; /* * The default maximal physical memory size is KERN_VIRT_SIZE for 32-bit @@ -1287,8 +1299,6 @@ static void __init create_linear_mapping_page_table(void) if (start <= __pa(PAGE_OFFSET) && __pa(PAGE_OFFSET) < end) start = __pa(PAGE_OFFSET); - if (end >= __pa(PAGE_OFFSET) + memory_limit) - end = __pa(PAGE_OFFSET) + memory_limit; create_linear_mapping_range(start, end, 0, NULL); } diff --git a/arch/riscv/purgatory/entry.S b/arch/riscv/purgatory/entry.S index 5bcf3af903da..0e6ca6d5ae4b 100644 --- a/arch/riscv/purgatory/entry.S +++ b/arch/riscv/purgatory/entry.S @@ -7,6 +7,7 @@ * Author: Li Zhengyu (lizhengyu3@huawei.com) * */ +#include <asm/asm.h> #include <linux/linkage.h> .text @@ -34,6 +35,7 @@ SYM_CODE_END(purgatory_start) .data +.align LGREG SYM_DATA(riscv_kernel_entry, .quad 0) .end |