From 8339f7d8e178d9c933f437d14be0a5fd1359f53d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 30 May 2023 12:03:23 +0100 Subject: arm64: module: remove old !KASAN_VMALLOC logic Historically, KASAN could be selected with or without KASAN_VMALLOC, and we had to be very careful where to place modules when KASAN_VMALLOC was not selected. However, since commit: f6f37d9320a11e90 ("arm64: select KASAN_VMALLOC for SW/HW_TAGS modes") Selecting CONFIG_KASAN on arm64 will also select CONFIG_KASAN_VMALLOC, and so the logic for handling CONFIG_KASAN without CONFIG_KASAN_VMALLOC is redundant and can be removed. Note: the "kasan.vmalloc={on,off}" option which only exists for HW_TAGS changes whether the vmalloc region is given non-match-all tags, and does not affect the page table manipulation code. The VM_DEFER_KMEMLEAK flag was only necessary for !CONFIG_KASAN_VMALLOC as described in its introduction in commit: 60115fa54ad7b913 ("mm: defer kmemleak object creation of module_alloc()") ... and therefore it can also be removed. Remove the redundant logic for !CONFIG_KASAN_VMALLOC. At the same time, add the missing braces around the multi-line conditional block in arch/arm64/kernel/module.c. Suggested-by: Ard Biesheuvel Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Alexander Potapenko Cc: Andrew Morton Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Will Deacon Tested-by: Shanker Donthineni Link: https://lore.kernel.org/r/20230530110328.2213762-2-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/kaslr.c | 10 ---------- arch/arm64/kernel/module.c | 23 +++-------------------- 2 files changed, 3 insertions(+), 30 deletions(-) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index e7477f21a4c9..df433c80c6ef 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -48,16 +48,6 @@ static int __init kaslr_init(void) pr_info("KASLR enabled\n"); - /* - * KASAN without KASAN_VMALLOC does not expect the module region to - * intersect the vmalloc region, since shadow memory is allocated for - * each module at load time, whereas the vmalloc region will already be - * shadowed by KASAN zero pages. - */ - BUILD_BUG_ON((IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) && - !IS_ENABLED(CONFIG_KASAN_VMALLOC)); - seed = get_random_u32(); if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 5af4975caeb5..b2657ac41226 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -32,33 +32,16 @@ void *module_alloc(unsigned long size) if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) gfp_mask |= __GFP_NOWARN; - if (IS_ENABLED(CONFIG_KASAN_GENERIC) || - IS_ENABLED(CONFIG_KASAN_SW_TAGS)) - /* don't exceed the static module region - see below */ - module_alloc_end = MODULES_END; - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_end, gfp_mask, PAGE_KERNEL, VM_DEFER_KMEMLEAK, + module_alloc_end, gfp_mask, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); - if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - (IS_ENABLED(CONFIG_KASAN_VMALLOC) || - (!IS_ENABLED(CONFIG_KASAN_GENERIC) && - !IS_ENABLED(CONFIG_KASAN_SW_TAGS)))) - /* - * KASAN without KASAN_VMALLOC can only deal with module - * allocations being served from the reserved module region, - * since the remainder of the vmalloc region is already - * backed by zero shadow pages, and punching holes into it - * is non-trivial. Since the module region is not randomized - * when KASAN is enabled without KASAN_VMALLOC, it is even - * less likely that the module region gets exhausted, so we - * can simply omit this fallback in that case. - */ + if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) { p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, module_alloc_base + SZ_2G, GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); + } if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { vfree(p); -- cgit v1.2.3 From 55123afffe931ab02066f60084c9c495b9b52fda Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 30 May 2023 12:03:24 +0100 Subject: arm64: kasan: remove !KASAN_VMALLOC remnants Historically, KASAN could be selected with or without KASAN_VMALLOC, but since commit: f6f37d9320a11e90 ("arm64: select KASAN_VMALLOC for SW/HW_TAGS modes") ... we can never select KASAN without KASAN_VMALLOC on arm64, and thus arm64 code for KASAN && !KASAN_VMALLOC is redundant and can be removed. Remove the redundant code kasan_init.c Signed-off-by: Mark Rutland Reviewed-by: Alexander Potapenko Reviewed-by: Ard Biesheuvel Cc: Andrey Konovalov Cc: Andrey Ryabinin Cc: Dmitry Vyukov Cc: Will Deacon Tested-by: Shanker Donthineni Link: https://lore.kernel.org/r/20230530110328.2213762-3-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/mm/kasan_init.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index e969e68de005..f17d066e85eb 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -214,7 +214,7 @@ static void __init clear_pgds(unsigned long start, static void __init kasan_init_shadow(void) { u64 kimg_shadow_start, kimg_shadow_end; - u64 mod_shadow_start, mod_shadow_end; + u64 mod_shadow_start; u64 vmalloc_shadow_end; phys_addr_t pa_start, pa_end; u64 i; @@ -223,7 +223,6 @@ static void __init kasan_init_shadow(void) kimg_shadow_end = PAGE_ALIGN((u64)kasan_mem_to_shadow(KERNEL_END)); mod_shadow_start = (u64)kasan_mem_to_shadow((void *)MODULES_VADDR); - mod_shadow_end = (u64)kasan_mem_to_shadow((void *)MODULES_END); vmalloc_shadow_end = (u64)kasan_mem_to_shadow((void *)VMALLOC_END); @@ -246,17 +245,9 @@ static void __init kasan_init_shadow(void) kasan_populate_early_shadow(kasan_mem_to_shadow((void *)PAGE_END), (void *)mod_shadow_start); - if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) { - BUILD_BUG_ON(VMALLOC_START != MODULES_END); - kasan_populate_early_shadow((void *)vmalloc_shadow_end, - (void *)KASAN_SHADOW_END); - } else { - kasan_populate_early_shadow((void *)kimg_shadow_end, - (void *)KASAN_SHADOW_END); - if (kimg_shadow_start > mod_shadow_end) - kasan_populate_early_shadow((void *)mod_shadow_end, - (void *)kimg_shadow_start); - } + BUILD_BUG_ON(VMALLOC_START != MODULES_END); + kasan_populate_early_shadow((void *)vmalloc_shadow_end, + (void *)KASAN_SHADOW_END); for_each_mem_range(i, &pa_start, &pa_end) { void *start = (void *)__phys_to_virt(pa_start); -- cgit v1.2.3 From 6e13b6b923b35a965d128a40ef0c5d9dd101e603 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 30 May 2023 12:03:25 +0100 Subject: arm64: kaslr: split kaslr/module initialization Currently kaslr_init() handles a mixture of detecting/announcing whether KASLR is enabled, and randomizing the module region depending on whether KASLR is enabled. To make it easier to rework the module region initialization, split the KASLR initialization into two steps: * kaslr_init() determines whether KASLR should be enabled, and announces this choice, recording this to a new global boolean variable. This is called from setup_arch() just before the existing call to kaslr_requires_kpti() so that this will always provide the expected result. * kaslr_module_init() randomizes the module region when required. This is called as a subsys_initcall, where we previously called kaslr_init(). As a bonus, moving the KASLR reporting earlier makes it easier to spot and permits it to be logged via earlycon, making it easier to debug any issues that could be triggered by KASLR. Booting a v6.4-rc1 kernel with this patch applied, the log looks like: | EFI stub: Booting Linux Kernel... | EFI stub: Generating empty DTB | EFI stub: Exiting boot services... | [ 0.000000] Booting Linux on physical CPU 0x0000000000 [0x000f0510] | [ 0.000000] Linux version 6.4.0-rc1-00006-g4763a8f8aeb3 (mark@lakrids) (aarch64-linux-gcc (GCC) 12.1.0, GNU ld (GNU Binutils) 2.38) #2 SMP PREEMPT Tue May 9 11:03:37 BST 2023 | [ 0.000000] KASLR enabled | [ 0.000000] earlycon: pl11 at MMIO 0x0000000009000000 (options '') | [ 0.000000] printk: bootconsole [pl11] enabled Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Will Deacon Tested-by: Shanker Donthineni Link: https://lore.kernel.org/r/20230530110328.2213762-4-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 14 ++++++++------ arch/arm64/kernel/kaslr.c | 39 +++++++++++++++++++++++++-------------- arch/arm64/kernel/setup.c | 2 ++ 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index c735afdf639b..215efc3bbbcf 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -204,15 +204,17 @@ static inline unsigned long kaslr_offset(void) return kimage_vaddr - KIMAGE_VADDR; } +#ifdef CONFIG_RANDOMIZE_BASE +void kaslr_init(void); static inline bool kaslr_enabled(void) { - /* - * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical - * placement of the image rather than from the seed, so a displacement - * of less than MIN_KIMG_ALIGN means that no seed was provided. - */ - return kaslr_offset() >= MIN_KIMG_ALIGN; + extern bool __kaslr_is_enabled; + return __kaslr_is_enabled; } +#else +static inline void kaslr_init(void) { } +static inline bool kaslr_enabled(void) { return false; } +#endif /* * Allow all memory at the discovery stage. We will clip it later. diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index df433c80c6ef..ce7079ba1dc1 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -25,28 +25,39 @@ u16 __initdata memstart_offset_seed; struct arm64_ftr_override kaslr_feature_override __initdata; -static int __init kaslr_init(void) -{ - u64 module_range; - u32 seed; - - /* - * Set a reasonable default for module_alloc_base in case - * we end up running with module randomization disabled. - */ - module_alloc_base = (u64)_etext - MODULES_VSIZE; +bool __ro_after_init __kaslr_is_enabled = false; +void __init kaslr_init(void) +{ if (kaslr_feature_override.val & kaslr_feature_override.mask & 0xf) { pr_info("KASLR disabled on command line\n"); - return 0; + return; } - if (!kaslr_enabled()) { + /* + * The KASLR offset modulo MIN_KIMG_ALIGN is taken from the physical + * placement of the image rather than from the seed, so a displacement + * of less than MIN_KIMG_ALIGN means that no seed was provided. + */ + if (kaslr_offset() < MIN_KIMG_ALIGN) { pr_warn("KASLR disabled due to lack of seed\n"); - return 0; + return; } pr_info("KASLR enabled\n"); + __kaslr_is_enabled = true; +} + +int kaslr_module_init(void) +{ + u64 module_range; + u32 seed; + + /* + * Set a reasonable default for module_alloc_base in case + * we end up running with module randomization disabled. + */ + module_alloc_base = (u64)_etext - MODULES_VSIZE; seed = get_random_u32(); @@ -80,4 +91,4 @@ static int __init kaslr_init(void) return 0; } -subsys_initcall(kaslr_init) +subsys_initcall(kaslr_module_init) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index b8ec7b3ac9cb..417a8a86b2db 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -296,6 +296,8 @@ void __init __no_sanitize_address setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + kaslr_init(); + /* * If know now we are going to need KPTI then use non-global * mappings from the start, avoiding the cost of rewriting -- cgit v1.2.3 From e46b7103aef39c3f421f0bff7a10ae5a29cd5cee Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 30 May 2023 12:03:26 +0100 Subject: arm64: module: move module randomization to module.c When CONFIG_RANDOMIZE_BASE=y, module_alloc_base is a variable which is configured by kaslr_module_init() in kaslr.c, and otherwise it is an expression defined in module.h. As kaslr_module_init() is no longer tightly coupled with the KASLR initialization code, we can centralize this in module.c. This patch moves kaslr_module_init() to module.c, making module_alloc_base a static variable, and removing redundant includes from kaslr.c. For the defintion of struct arm64_ftr_override we must include , which was previously included transitively via another header. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Will Deacon Tested-by: Shanker Donthineni Link: https://lore.kernel.org/r/20230530110328.2213762-5-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/module.h | 6 ----- arch/arm64/kernel/kaslr.c | 60 ++--------------------------------------- arch/arm64/kernel/module.c | 48 +++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 64 deletions(-) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 18734fed3bdd..3e7dcc5fa2f2 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -30,12 +30,6 @@ u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, u64 module_emit_veneer_for_adrp(struct module *mod, Elf64_Shdr *sechdrs, void *loc, u64 val); -#ifdef CONFIG_RANDOMIZE_BASE -extern u64 module_alloc_base; -#else -#define module_alloc_base ((u64)_etext - MODULES_VSIZE) -#endif - struct plt_entry { /* * A program that conforms to the AArch64 Procedure Call Standard diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index ce7079ba1dc1..17f96a19781d 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -4,23 +4,12 @@ */ #include -#include #include -#include -#include -#include -#include -#include -#include +#include -#include -#include +#include #include -#include -#include -#include -u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; struct arm64_ftr_override kaslr_feature_override __initdata; @@ -47,48 +36,3 @@ void __init kaslr_init(void) pr_info("KASLR enabled\n"); __kaslr_is_enabled = true; } - -int kaslr_module_init(void) -{ - u64 module_range; - u32 seed; - - /* - * Set a reasonable default for module_alloc_base in case - * we end up running with module randomization disabled. - */ - module_alloc_base = (u64)_etext - MODULES_VSIZE; - - seed = get_random_u32(); - - if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { - /* - * Randomize the module region over a 2 GB window covering the - * kernel. This reduces the risk of modules leaking information - * about the address of the kernel itself, but results in - * branches between modules and the core kernel that are - * resolved via PLTs. (Branches between modules will be - * resolved normally.) - */ - module_range = SZ_2G - (u64)(_end - _stext); - module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR); - } else { - /* - * Randomize the module region by setting module_alloc_base to - * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, - * _stext) . This guarantees that the resulting region still - * covers [_stext, _etext], and that all relative branches can - * be resolved without veneers unless this region is exhausted - * and we fall back to a larger 2GB window in module_alloc() - * when ARM64_MODULE_PLTS is enabled. - */ - module_range = MODULES_VSIZE - (u64)(_etext - _stext); - } - - /* use the lower 21 bits to randomize the base of the module region */ - module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; - module_alloc_base &= PAGE_MASK; - - return 0; -} -subsys_initcall(kaslr_module_init) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index b2657ac41226..51fd8abd9b74 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -15,13 +15,61 @@ #include #include #include +#include #include #include + #include #include #include #include +static u64 __ro_after_init module_alloc_base = (u64)_etext - MODULES_VSIZE; + +#ifdef CONFIG_RANDOMIZE_BASE +static int __init kaslr_module_init(void) +{ + u64 module_range; + u32 seed; + + if (!kaslr_enabled()) + return 0; + + seed = get_random_u32(); + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + /* + * Randomize the module region over a 2 GB window covering the + * kernel. This reduces the risk of modules leaking information + * about the address of the kernel itself, but results in + * branches between modules and the core kernel that are + * resolved via PLTs. (Branches between modules will be + * resolved normally.) + */ + module_range = SZ_2G - (u64)(_end - _stext); + module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR); + } else { + /* + * Randomize the module region by setting module_alloc_base to + * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, + * _stext) . This guarantees that the resulting region still + * covers [_stext, _etext], and that all relative branches can + * be resolved without veneers unless this region is exhausted + * and we fall back to a larger 2GB window in module_alloc() + * when ARM64_MODULE_PLTS is enabled. + */ + module_range = MODULES_VSIZE - (u64)(_etext - _stext); + } + + /* use the lower 21 bits to randomize the base of the module region */ + module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; + module_alloc_base &= PAGE_MASK; + + return 0; +} +subsys_initcall(kaslr_module_init) +#endif + void *module_alloc(unsigned long size) { u64 module_alloc_end = module_alloc_base + MODULES_VSIZE; -- cgit v1.2.3 From ea3752ba9685b47db4571ddaee39344cf2b0bf45 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 30 May 2023 12:03:27 +0100 Subject: arm64: module: mandate MODULE_PLTS Contemporary kernels and modules can be relatively large, especially when common debug options are enabled. Using GCC 12.1.0, a v6.3-rc7 defconfig kernel is ~38M, and with PROVE_LOCKING + KASAN_INLINE enabled this expands to ~117M. Shanker reports [1] that the NVIDIA GPU driver alone can consume 110M of module space in some configurations. Both KASLR and ARM64_ERRATUM_843419 select MODULE_PLTS, so anyone wanting a kernel to have KASLR or run on Cortex-A53 will have MODULE_PLTS selected. This is the case in defconfig and distribution kernels (e.g. Debian, Android, etc). Practically speaking, this means we're very likely to need MODULE_PLTS and while it's almost guaranteed that MODULE_PLTS will be selected, it is possible to disable support, and we have to maintain some awkward special cases for such unusual configurations. This patch removes the MODULE_PLTS config option, with the support code always enabled if MODULES is selected. This results in a slight simplification, and will allow for further improvement in subsequent patches. For any config which currently selects MODULE_PLTS, there will be no functional change as a result of this patch. [1] https://lore.kernel.org/linux-arm-kernel/159ceeab-09af-3174-5058-445bc8dcf85b@nvidia.com/ Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Shanker Donthineni Cc: Will Deacon Tested-by: Shanker Donthineni Link: https://lore.kernel.org/r/20230530110328.2213762-6-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 28 +++------------------------- arch/arm64/include/asm/module.h | 2 -- arch/arm64/include/asm/module.lds.h | 2 -- arch/arm64/kernel/Makefile | 3 +-- arch/arm64/kernel/ftrace.c | 8 +++----- arch/arm64/kernel/module.c | 25 ++++++++++++------------- 6 files changed, 19 insertions(+), 49 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1201d25a8a4..a6002084f09c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -207,6 +207,7 @@ config ARM64 select HAVE_IOREMAP_PROT select HAVE_IRQ_TIME_ACCOUNTING select HAVE_KVM + select HAVE_MOD_ARCH_SPECIFIC select HAVE_NMI select HAVE_PERF_EVENTS select HAVE_PERF_REGS @@ -577,7 +578,6 @@ config ARM64_ERRATUM_845719 config ARM64_ERRATUM_843419 bool "Cortex-A53: 843419: A load or store might access an incorrect address" default y - select ARM64_MODULE_PLTS if MODULES help This option links the kernel with '--fix-cortex-a53-843419' and enables PLT support to replace certain ADRP instructions, which can @@ -2107,26 +2107,6 @@ config ARM64_SME register state capable of holding two dimensional matrix tiles to enable various matrix operations. -config ARM64_MODULE_PLTS - bool "Use PLTs to allow module memory to spill over into vmalloc area" - depends on MODULES - select HAVE_MOD_ARCH_SPECIFIC - help - Allocate PLTs when loading modules so that jumps and calls whose - targets are too far away for their relative offsets to be encoded - in the instructions themselves can be bounced via veneers in the - module's PLT. This allows modules to be allocated in the generic - vmalloc area after the dedicated module memory area has been - exhausted. - - When running with address space randomization (KASLR), the module - region itself may be too far away for ordinary relative jumps and - calls, and so in that case, module PLTs are required and cannot be - disabled. - - Specific errata workaround(s) might also force module PLTs to be - enabled (ARM64_ERRATUM_843419). - config ARM64_PSEUDO_NMI bool "Support for NMI-like interrupts" select ARM_GIC_V3 @@ -2167,7 +2147,6 @@ config RELOCATABLE config RANDOMIZE_BASE bool "Randomize the address of the kernel image" - select ARM64_MODULE_PLTS if MODULES select RELOCATABLE help Randomizes the virtual address at which the kernel image is @@ -2198,9 +2177,8 @@ config RANDOMIZE_MODULE_REGION_FULL When this option is not set, the module region will be randomized over a limited range that contains the [_stext, _etext] interval of the core kernel, so branch relocations are almost always in range unless - ARM64_MODULE_PLTS is enabled and the region is exhausted. In this - particular case of region exhaustion, modules might be able to fall - back to a larger 2GB area. + the region is exhausted. In this particular case of region + exhaustion, modules might be able to fall back to a larger 2GB area. config CC_HAVE_STACKPROTECTOR_SYSREG def_bool $(cc-option,-mstack-protector-guard=sysreg -mstack-protector-guard-reg=sp_el0 -mstack-protector-guard-offset=0) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index 3e7dcc5fa2f2..bfa6638b4c93 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -7,7 +7,6 @@ #include -#ifdef CONFIG_ARM64_MODULE_PLTS struct mod_plt_sec { int plt_shndx; int plt_num_entries; @@ -21,7 +20,6 @@ struct mod_arch_specific { /* for CONFIG_DYNAMIC_FTRACE */ struct plt_entry *ftrace_trampolines; }; -#endif u64 module_emit_plt_entry(struct module *mod, Elf64_Shdr *sechdrs, void *loc, const Elf64_Rela *rela, diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index dbba4b7559aa..b9ae8349e35d 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,9 +1,7 @@ SECTIONS { -#ifdef CONFIG_ARM64_MODULE_PLTS .plt 0 : { BYTE(0) } .init.plt 0 : { BYTE(0) } .text.ftrace_trampoline 0 : { BYTE(0) } -#endif #ifdef CONFIG_KASAN_SW_TAGS /* diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 7c2bb4e72476..3864a64e2b2b 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -42,8 +42,7 @@ obj-$(CONFIG_COMPAT) += sigreturn32.o obj-$(CONFIG_COMPAT_ALIGNMENT_FIXUPS) += compat_alignment.o obj-$(CONFIG_KUSER_HELPERS) += kuser32.o obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o -obj-$(CONFIG_MODULES) += module.o -obj-$(CONFIG_ARM64_MODULE_PLTS) += module-plts.o +obj-$(CONFIG_MODULES) += module.o module-plts.o obj-$(CONFIG_PERF_EVENTS) += perf_regs.o perf_callchain.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CPU_PM) += sleep.o suspend.o diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 432626c866a8..a650f5e11fc5 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -197,7 +197,7 @@ int ftrace_update_ftrace_func(ftrace_func_t func) static struct plt_entry *get_ftrace_plt(struct module *mod) { -#ifdef CONFIG_ARM64_MODULE_PLTS +#ifdef CONFIG_MODULES struct plt_entry *plt = mod->arch.ftrace_trampolines; return &plt[FTRACE_PLT_IDX]; @@ -249,7 +249,7 @@ static bool ftrace_find_callable_addr(struct dyn_ftrace *rec, * must use a PLT to reach it. We can only place PLTs for modules, and * only when module PLT support is built-in. */ - if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + if (!IS_ENABLED(CONFIG_MODULES)) return false; /* @@ -431,10 +431,8 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, * * Note: 'mod' is only set at module load time. */ - if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && - IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && mod) { + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_ARGS) && mod) return aarch64_insn_patch_text_nosync((void *)pc, new); - } if (!ftrace_find_callable_addr(rec, mod, &addr)) return -EINVAL; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 51fd8abd9b74..f64636c2fd05 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -73,25 +73,26 @@ subsys_initcall(kaslr_module_init) void *module_alloc(unsigned long size) { u64 module_alloc_end = module_alloc_base + MODULES_VSIZE; - gfp_t gfp_mask = GFP_KERNEL; void *p; - /* Silence the initial allocation */ - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) - gfp_mask |= __GFP_NOWARN; - + /* + * Where possible, prefer to allocate within direct branch range of the + * kernel such that no PLTs are necessary. This may fail, so we pass + * __GFP_NOWARN to silence the resulting warning. + */ p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_end, gfp_mask, PAGE_KERNEL, 0, - NUMA_NO_NODE, __builtin_return_address(0)); + module_alloc_end, GFP_KERNEL | __GFP_NOWARN, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); - if (!p && IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) { + if (!p) { p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, module_alloc_base + SZ_2G, GFP_KERNEL, PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } - if (p && (kasan_alloc_module_shadow(p, size, gfp_mask) < 0)) { + if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) { vfree(p); return NULL; } @@ -479,9 +480,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, AARCH64_INSN_IMM_26); - - if (IS_ENABLED(CONFIG_ARM64_MODULE_PLTS) && - ovf == -ERANGE) { + if (ovf == -ERANGE) { val = module_emit_plt_entry(me, sechdrs, loc, &rel[i], sym); if (!val) return -ENOEXEC; @@ -518,7 +517,7 @@ static int module_init_ftrace_plt(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { -#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE) +#if defined(CONFIG_DYNAMIC_FTRACE) const Elf_Shdr *s; struct plt_entry *plts; -- cgit v1.2.3 From 3e35d303ab7d22c4b6597e56ba46ee7cc61f3a5a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 30 May 2023 12:03:28 +0100 Subject: arm64: module: rework module VA range selection Currently, the modules region is 128M in size, which is a problem for some large modules. Shanker reports [1] that the NVIDIA GPU driver alone can consume 110M of module space in some configurations. We'd like to make the modules region a full 2G such that we can always make use of a 2G range. It's possible to build kernel images which are larger than 128M in some configurations, such as when many debug options are selected and many drivers are built in. In these configurations, we can't legitimately select a base for a 128M module region, though we currently select a value for which allocation will fail. It would be nicer to have a diagnostic message in this case. Similarly, in theory it's possible to build a kernel image which is larger than 2G and which cannot support modules. While this isn't likely to be the case for any realistic kernel deplyed in the field, it would be nice if we could print a diagnostic in this case. This patch reworks the module VA range selection to use a 2G range, and improves handling of cases where we cannot select legitimate module regions. We now attempt to select a 128M region and a 2G region: * The 128M region is selected such that modules can use direct branches (with JUMP26/CALL26 relocations) to branch to kernel code and other modules, and so that modules can reference data and text (using PREL32 relocations) anywhere in the kernel image and other modules. This region covers the entire kernel image (rather than just the text) to ensure that all PREL32 relocations are in range even when the kernel data section is absurdly large. Where we cannot allocate from this region, we'll fall back to the full 2G region. * The 2G region is selected such that modules can use direct branches with PLTs to branch to kernel code and other modules, and so that modules can use reference data and text (with PREL32 relocations) in the kernel image and other modules. This region covers the entire kernel image, and the 128M region (if one is selected). The two module regions are randomized independently while ensuring the constraints described above. [1] https://lore.kernel.org/linux-arm-kernel/159ceeab-09af-3174-5058-445bc8dcf85b@nvidia.com/ Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Cc: Shanker Donthineni Cc: Will Deacon Tested-by: Shanker Donthineni Link: https://lore.kernel.org/r/20230530110328.2213762-7-mark.rutland@arm.com Signed-off-by: Catalin Marinas --- Documentation/arm64/memory.rst | 8 +-- arch/arm64/include/asm/memory.h | 2 +- arch/arm64/kernel/module.c | 139 +++++++++++++++++++++++++++------------- 3 files changed, 99 insertions(+), 50 deletions(-) diff --git a/Documentation/arm64/memory.rst b/Documentation/arm64/memory.rst index 2a641ba7be3b..55a55f30eed8 100644 --- a/Documentation/arm64/memory.rst +++ b/Documentation/arm64/memory.rst @@ -33,8 +33,8 @@ AArch64 Linux memory layout with 4KB pages + 4 levels (48-bit):: 0000000000000000 0000ffffffffffff 256TB user ffff000000000000 ffff7fffffffffff 128TB kernel logical memory map [ffff600000000000 ffff7fffffffffff] 32TB [kasan shadow region] - ffff800000000000 ffff800007ffffff 128MB modules - ffff800008000000 fffffbffefffffff 124TB vmalloc + ffff800000000000 ffff80007fffffff 2GB modules + ffff800080000000 fffffbffefffffff 124TB vmalloc fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space @@ -50,8 +50,8 @@ AArch64 Linux memory layout with 64KB pages + 3 levels (52-bit with HW support): 0000000000000000 000fffffffffffff 4PB user fff0000000000000 ffff7fffffffffff ~4PB kernel logical memory map [fffd800000000000 ffff7fffffffffff] 512TB [kasan shadow region] - ffff800000000000 ffff800007ffffff 128MB modules - ffff800008000000 fffffbffefffffff 124TB vmalloc + ffff800000000000 ffff80007fffffff 2GB modules + ffff800080000000 fffffbffefffffff 124TB vmalloc fffffbfff0000000 fffffbfffdffffff 224MB fixed mappings (top down) fffffbfffe000000 fffffbfffe7fffff 8MB [guard region] fffffbfffe800000 fffffbffff7fffff 16MB PCI I/O space diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 215efc3bbbcf..6e0e5722f229 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -46,7 +46,7 @@ #define KIMAGE_VADDR (MODULES_END) #define MODULES_END (MODULES_VADDR + MODULES_VSIZE) #define MODULES_VADDR (_PAGE_END(VA_BITS_MIN)) -#define MODULES_VSIZE (SZ_128M) +#define MODULES_VSIZE (SZ_2G) #define VMEMMAP_START (-(UL(1) << (VA_BITS - VMEMMAP_SHIFT))) #define VMEMMAP_END (VMEMMAP_START + VMEMMAP_SIZE) #define PCI_IO_END (VMEMMAP_START - SZ_8M) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index f64636c2fd05..dd851297596e 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -7,6 +7,8 @@ * Author: Will Deacon */ +#define pr_fmt(fmt) "Modules: " fmt + #include #include #include @@ -24,72 +26,119 @@ #include #include -static u64 __ro_after_init module_alloc_base = (u64)_etext - MODULES_VSIZE; +static u64 module_direct_base __ro_after_init = 0; +static u64 module_plt_base __ro_after_init = 0; -#ifdef CONFIG_RANDOMIZE_BASE -static int __init kaslr_module_init(void) +/* + * Choose a random page-aligned base address for a window of 'size' bytes which + * entirely contains the interval [start, end - 1]. + */ +static u64 __init random_bounding_box(u64 size, u64 start, u64 end) { - u64 module_range; - u32 seed; + u64 max_pgoff, pgoff; - if (!kaslr_enabled()) + if ((end - start) >= size) return 0; - seed = get_random_u32(); + max_pgoff = (size - (end - start)) / PAGE_SIZE; + pgoff = get_random_u32_inclusive(0, max_pgoff); - if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { - /* - * Randomize the module region over a 2 GB window covering the - * kernel. This reduces the risk of modules leaking information - * about the address of the kernel itself, but results in - * branches between modules and the core kernel that are - * resolved via PLTs. (Branches between modules will be - * resolved normally.) - */ - module_range = SZ_2G - (u64)(_end - _stext); - module_alloc_base = max((u64)_end - SZ_2G, (u64)MODULES_VADDR); + return start - pgoff * PAGE_SIZE; +} + +/* + * Modules may directly reference data and text anywhere within the kernel + * image and other modules. References using PREL32 relocations have a +/-2G + * range, and so we need to ensure that the entire kernel image and all modules + * fall within a 2G window such that these are always within range. + * + * Modules may directly branch to functions and code within the kernel text, + * and to functions and code within other modules. These branches will use + * CALL26/JUMP26 relocations with a +/-128M range. Without PLTs, we must ensure + * that the entire kernel text and all module text falls within a 128M window + * such that these are always within range. With PLTs, we can expand this to a + * 2G window. + * + * We chose the 128M region to surround the entire kernel image (rather than + * just the text) as using the same bounds for the 128M and 2G regions ensures + * by construction that we never select a 128M region that is not a subset of + * the 2G region. For very large and unusual kernel configurations this means + * we may fall back to PLTs where they could have been avoided, but this keeps + * the logic significantly simpler. + */ +static int __init module_init_limits(void) +{ + u64 kernel_end = (u64)_end; + u64 kernel_start = (u64)_text; + u64 kernel_size = kernel_end - kernel_start; + + /* + * The default modules region is placed immediately below the kernel + * image, and is large enough to use the full 2G relocation range. + */ + BUILD_BUG_ON(KIMAGE_VADDR != MODULES_END); + BUILD_BUG_ON(MODULES_VSIZE < SZ_2G); + + if (!kaslr_enabled()) { + if (kernel_size < SZ_128M) + module_direct_base = kernel_end - SZ_128M; + if (kernel_size < SZ_2G) + module_plt_base = kernel_end - SZ_2G; } else { - /* - * Randomize the module region by setting module_alloc_base to - * a PAGE_SIZE multiple in the range [_etext - MODULES_VSIZE, - * _stext) . This guarantees that the resulting region still - * covers [_stext, _etext], and that all relative branches can - * be resolved without veneers unless this region is exhausted - * and we fall back to a larger 2GB window in module_alloc() - * when ARM64_MODULE_PLTS is enabled. - */ - module_range = MODULES_VSIZE - (u64)(_etext - _stext); + u64 min = kernel_start; + u64 max = kernel_end; + + if (IS_ENABLED(CONFIG_RANDOMIZE_MODULE_REGION_FULL)) { + pr_info("2G module region forced by RANDOMIZE_MODULE_REGION_FULL\n"); + } else { + module_direct_base = random_bounding_box(SZ_128M, min, max); + if (module_direct_base) { + min = module_direct_base; + max = module_direct_base + SZ_128M; + } + } + + module_plt_base = random_bounding_box(SZ_2G, min, max); } - /* use the lower 21 bits to randomize the base of the module region */ - module_alloc_base += (module_range * (seed & ((1 << 21) - 1))) >> 21; - module_alloc_base &= PAGE_MASK; + pr_info("%llu pages in range for non-PLT usage", + module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0); + pr_info("%llu pages in range for PLT usage", + module_plt_base ? (SZ_2G - kernel_size) / PAGE_SIZE : 0); return 0; } -subsys_initcall(kaslr_module_init) -#endif +subsys_initcall(module_init_limits); void *module_alloc(unsigned long size) { - u64 module_alloc_end = module_alloc_base + MODULES_VSIZE; - void *p; + void *p = NULL; /* * Where possible, prefer to allocate within direct branch range of the - * kernel such that no PLTs are necessary. This may fail, so we pass - * __GFP_NOWARN to silence the resulting warning. + * kernel such that no PLTs are necessary. */ - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_end, GFP_KERNEL | __GFP_NOWARN, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + if (module_direct_base) { + p = __vmalloc_node_range(size, MODULE_ALIGN, + module_direct_base, + module_direct_base + SZ_128M, + GFP_KERNEL | __GFP_NOWARN, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + } + + if (!p && module_plt_base) { + p = __vmalloc_node_range(size, MODULE_ALIGN, + module_plt_base, + module_plt_base + SZ_2G, + GFP_KERNEL | __GFP_NOWARN, + PAGE_KERNEL, 0, NUMA_NO_NODE, + __builtin_return_address(0)); + } if (!p) { - p = __vmalloc_node_range(size, MODULE_ALIGN, module_alloc_base, - module_alloc_base + SZ_2G, GFP_KERNEL, - PAGE_KERNEL, 0, NUMA_NO_NODE, - __builtin_return_address(0)); + pr_warn_ratelimited("%s: unable to allocate memory\n", + __func__); } if (p && (kasan_alloc_module_shadow(p, size, GFP_KERNEL) < 0)) { -- cgit v1.2.3