diff options
author | Ard Biesheuvel <ardb@kernel.org> | 2021-11-24 18:56:22 +0100 |
---|---|---|
committer | Ard Biesheuvel <ardb@kernel.org> | 2021-12-06 12:49:16 +0100 |
commit | 1fa8c4b19543ae8c8894ec92a18696c9f9b03fc8 (patch) | |
tree | cc6a2f523338f66bf6b59be8b91e3433e0f9baa4 /arch/arm/kernel | |
parent | ARM: entry: preserve thread_info pointer in switch_to (diff) | |
download | linux-1fa8c4b19543ae8c8894ec92a18696c9f9b03fc8.tar.xz linux-1fa8c4b19543ae8c8894ec92a18696c9f9b03fc8.zip |
ARM: module: implement support for PC-relative group relocations
Add support for the R_ARM_ALU_PC_Gn_NC and R_ARM_LDR_PC_G2 group
relocations [0] so we can use them in modules. These will be used to
load the current task pointer from a global variable without having to
rely on a literal pool entry to carry the address of this variable,
which may have a significant negative impact on cache utilization for
variables that are used often and in many different places, as each
occurrence will result in a literal pool entry and therefore a line in
the D-cache.
[0] 'ELF for the ARM architecture'
https://github.com/ARM-software/abi-aa/releases
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Nicolas Pitre <nico@fluxnic.net>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Tested-by: Marc Zyngier <maz@kernel.org>
Tested-by: Vladimir Murzin <vladimir.murzin@arm.com> # ARMv7M
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r-- | arch/arm/kernel/module.c | 85 |
1 files changed, 85 insertions, 0 deletions
diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index beac45e89ba6..4d33a7acf617 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -68,6 +68,42 @@ bool module_exit_section(const char *name) strstarts(name, ".ARM.exidx.exit"); } +/* + * This implements the partitioning algorithm for group relocations as + * documented in the ARM AArch32 ELF psABI (IHI 0044). + * + * A single PC-relative symbol reference is divided in up to 3 add or subtract + * operations, where the final one could be incorporated into a load/store + * instruction with immediate offset. E.g., + * + * ADD Rd, PC, #... or ADD Rd, PC, #... + * ADD Rd, Rd, #... ADD Rd, Rd, #... + * LDR Rd, [Rd, #...] ADD Rd, Rd, #... + * + * The latter has a guaranteed range of only 16 MiB (3x8 == 24 bits), so it is + * of limited use in the kernel. However, the ADD/ADD/LDR combo has a range of + * -/+ 256 MiB, (2x8 + 12 == 28 bits), which means it has sufficient range for + * any in-kernel symbol reference (unless module PLTs are being used). + * + * The main advantage of this approach over the typical pattern using a literal + * load is that literal loads may miss in the D-cache, and generally lead to + * lower cache efficiency for variables that are referenced often from many + * different places in the code. + */ +static u32 get_group_rem(u32 group, u32 *offset) +{ + u32 val = *offset; + u32 shift; + do { + shift = val ? (31 - __fls(val)) & ~1 : 32; + *offset = val; + if (!val) + break; + val &= 0xffffff >> shift; + } while (group--); + return shift; +} + int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned int relindex, struct module *module) @@ -82,6 +118,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, unsigned long loc; Elf32_Sym *sym; const char *symname; + u32 shift, group = 1; s32 offset; u32 tmp; #ifdef CONFIG_THUMB2_KERNEL @@ -212,6 +249,54 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, *(u32 *)loc = __opcode_to_mem_arm(tmp); break; + case R_ARM_ALU_PC_G0_NC: + group = 0; + fallthrough; + case R_ARM_ALU_PC_G1_NC: + tmp = __mem_to_opcode_arm(*(u32 *)loc); + offset = ror32(tmp & 0xff, (tmp & 0xf00) >> 7); + if (tmp & BIT(22)) + offset = -offset; + offset += sym->st_value - loc; + if (offset < 0) { + offset = -offset; + tmp = (tmp & ~BIT(23)) | BIT(22); // SUB opcode + } else { + tmp = (tmp & ~BIT(22)) | BIT(23); // ADD opcode + } + + shift = get_group_rem(group, &offset); + if (shift < 24) { + offset >>= 24 - shift; + offset |= (shift + 8) << 7; + } + *(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset); + break; + + case R_ARM_LDR_PC_G2: + tmp = __mem_to_opcode_arm(*(u32 *)loc); + offset = tmp & 0xfff; + if (~tmp & BIT(23)) // U bit cleared? + offset = -offset; + offset += sym->st_value - loc; + if (offset < 0) { + offset = -offset; + tmp &= ~BIT(23); // clear U bit + } else { + tmp |= BIT(23); // set U bit + } + get_group_rem(2, &offset); + + if (offset > 0xfff) { + pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", + module->name, relindex, i, symname, + ELF32_R_TYPE(rel->r_info), loc, + sym->st_value); + return -ENOEXEC; + } + *(u32 *)loc = __opcode_to_mem_arm((tmp & ~0xfff) | offset); + break; + #ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: |