diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/net/bpf_jit_32.c | 42 | ||||
-rw-r--r-- | arch/arm64/crypto/crc32-arm64.c | 22 | ||||
-rw-r--r-- | arch/arm64/crypto/sha1-ce-glue.c | 3 | ||||
-rw-r--r-- | arch/arm64/crypto/sha2-ce-glue.c | 3 | ||||
-rw-r--r-- | arch/arm64/kernel/alternative.c | 53 | ||||
-rw-r--r-- | arch/arm64/kernel/perf_event.c | 8 | ||||
-rw-r--r-- | arch/arm64/mm/dump.c | 2 | ||||
-rw-r--r-- | arch/arm64/net/bpf_jit_comp.c | 2 | ||||
-rw-r--r-- | arch/mips/include/asm/smp.h | 2 | ||||
-rw-r--r-- | arch/mips/kernel/elf.c | 32 | ||||
-rw-r--r-- | arch/mips/kernel/smp.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 7 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_rapl.c | 1 | ||||
-rw-r--r-- | arch/x86/net/bpf_jit_comp.c | 28 |
14 files changed, 126 insertions, 85 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index e1268f905026..e0e23582c8b4 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -54,6 +54,7 @@ #define SEEN_DATA (1 << (BPF_MEMWORDS + 3)) #define FLAG_NEED_X_RESET (1 << 0) +#define FLAG_IMM_OVERFLOW (1 << 1) struct jit_ctx { const struct bpf_prog *skf; @@ -293,6 +294,15 @@ static u16 imm_offset(u32 k, struct jit_ctx *ctx) /* PC in ARM mode == address of the instruction + 8 */ imm = offset - (8 + ctx->idx * 4); + if (imm & ~0xfff) { + /* + * literal pool is too far, signal it into flags. we + * can only detect it on the second pass unfortunately. + */ + ctx->flags |= FLAG_IMM_OVERFLOW; + return 0; + } + return imm; } @@ -449,10 +459,21 @@ static inline void emit_udiv(u8 rd, u8 rm, u8 rn, struct jit_ctx *ctx) return; } #endif - if (rm != ARM_R0) - emit(ARM_MOV_R(ARM_R0, rm), ctx); + + /* + * For BPF_ALU | BPF_DIV | BPF_K instructions, rm is ARM_R4 + * (r_A) and rn is ARM_R0 (r_scratch) so load rn first into + * ARM_R1 to avoid accidentally overwriting ARM_R0 with rm + * before using it as a source for ARM_R1. + * + * For BPF_ALU | BPF_DIV | BPF_X rm is ARM_R4 (r_A) and rn is + * ARM_R5 (r_X) so there is no particular register overlap + * issues. + */ if (rn != ARM_R1) emit(ARM_MOV_R(ARM_R1, rn), ctx); + if (rm != ARM_R0) + emit(ARM_MOV_R(ARM_R0, rm), ctx); ctx->seen |= SEEN_CALL; emit_mov_i(ARM_R3, (u32)jit_udiv, ctx); @@ -855,6 +876,14 @@ b_epilogue: default: return -1; } + + if (ctx->flags & FLAG_IMM_OVERFLOW) + /* + * this instruction generated an overflow when + * trying to access the literal pool, so + * delegate this filter to the kernel interpreter. + */ + return -1; } /* compute offsets only during the first pass */ @@ -917,7 +946,14 @@ void bpf_jit_compile(struct bpf_prog *fp) ctx.idx = 0; build_prologue(&ctx); - build_body(&ctx); + if (build_body(&ctx) < 0) { +#if __LINUX_ARM_ARCH__ < 7 + if (ctx.imm_count) + kfree(ctx.imms); +#endif + bpf_jit_binary_free(header); + goto out; + } build_epilogue(&ctx); flush_icache_range((u32)ctx.target, (u32)(ctx.target + ctx.idx)); diff --git a/arch/arm64/crypto/crc32-arm64.c b/arch/arm64/crypto/crc32-arm64.c index 9499199924ae..6a37c3c6b11d 100644 --- a/arch/arm64/crypto/crc32-arm64.c +++ b/arch/arm64/crypto/crc32-arm64.c @@ -147,13 +147,21 @@ static int chksum_final(struct shash_desc *desc, u8 *out) { struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + put_unaligned_le32(ctx->crc, out); + return 0; +} + +static int chksumc_final(struct shash_desc *desc, u8 *out) +{ + struct chksum_desc_ctx *ctx = shash_desc_ctx(desc); + put_unaligned_le32(~ctx->crc, out); return 0; } static int __chksum_finup(u32 crc, const u8 *data, unsigned int len, u8 *out) { - put_unaligned_le32(~crc32_arm64_le_hw(crc, data, len), out); + put_unaligned_le32(crc32_arm64_le_hw(crc, data, len), out); return 0; } @@ -199,6 +207,14 @@ static int crc32_cra_init(struct crypto_tfm *tfm) { struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + mctx->key = 0; + return 0; +} + +static int crc32c_cra_init(struct crypto_tfm *tfm) +{ + struct chksum_ctx *mctx = crypto_tfm_ctx(tfm); + mctx->key = ~0; return 0; } @@ -229,7 +245,7 @@ static struct shash_alg crc32c_alg = { .setkey = chksum_setkey, .init = chksum_init, .update = chksumc_update, - .final = chksum_final, + .final = chksumc_final, .finup = chksumc_finup, .digest = chksumc_digest, .descsize = sizeof(struct chksum_desc_ctx), @@ -241,7 +257,7 @@ static struct shash_alg crc32c_alg = { .cra_alignmask = 0, .cra_ctxsize = sizeof(struct chksum_ctx), .cra_module = THIS_MODULE, - .cra_init = crc32_cra_init, + .cra_init = crc32c_cra_init, } }; diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index 114e7cc5de8c..aefda9868627 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -74,6 +74,9 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, static int sha1_ce_final(struct shash_desc *desc, u8 *out) { + struct sha1_ce_state *sctx = shash_desc_ctx(desc); + + sctx->finalize = 0; kernel_neon_begin_partial(16); sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_ce_transform); kernel_neon_end(); diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 1340e44c048b..7cd587564a41 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -75,6 +75,9 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, static int sha256_ce_final(struct shash_desc *desc, u8 *out) { + struct sha256_ce_state *sctx = shash_desc_ctx(desc); + + sctx->finalize = 0; kernel_neon_begin_partial(28); sha256_base_do_finalize(desc, (sha256_block_fn *)sha2_ce_transform); kernel_neon_end(); diff --git a/arch/arm64/kernel/alternative.c b/arch/arm64/kernel/alternative.c index 21033bba9390..28f8365edc4c 100644 --- a/arch/arm64/kernel/alternative.c +++ b/arch/arm64/kernel/alternative.c @@ -24,7 +24,6 @@ #include <asm/cacheflush.h> #include <asm/alternative.h> #include <asm/cpufeature.h> -#include <asm/insn.h> #include <linux/stop_machine.h> extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; @@ -34,48 +33,6 @@ struct alt_region { struct alt_instr *end; }; -/* - * Decode the imm field of a b/bl instruction, and return the byte - * offset as a signed value (so it can be used when computing a new - * branch target). - */ -static s32 get_branch_offset(u32 insn) -{ - s32 imm = aarch64_insn_decode_immediate(AARCH64_INSN_IMM_26, insn); - - /* sign-extend the immediate before turning it into a byte offset */ - return (imm << 6) >> 4; -} - -static u32 get_alt_insn(u8 *insnptr, u8 *altinsnptr) -{ - u32 insn; - - aarch64_insn_read(altinsnptr, &insn); - - /* Stop the world on instructions we don't support... */ - BUG_ON(aarch64_insn_is_cbz(insn)); - BUG_ON(aarch64_insn_is_cbnz(insn)); - BUG_ON(aarch64_insn_is_bcond(insn)); - /* ... and there is probably more. */ - - if (aarch64_insn_is_b(insn) || aarch64_insn_is_bl(insn)) { - enum aarch64_insn_branch_type type; - unsigned long target; - - if (aarch64_insn_is_b(insn)) - type = AARCH64_INSN_BRANCH_NOLINK; - else - type = AARCH64_INSN_BRANCH_LINK; - - target = (unsigned long)altinsnptr + get_branch_offset(insn); - insn = aarch64_insn_gen_branch_imm((unsigned long)insnptr, - target, type); - } - - return insn; -} - static int __apply_alternatives(void *alt_region) { struct alt_instr *alt; @@ -83,9 +40,6 @@ static int __apply_alternatives(void *alt_region) u8 *origptr, *replptr; for (alt = region->begin; alt < region->end; alt++) { - u32 insn; - int i; - if (!cpus_have_cap(alt->cpufeature)) continue; @@ -95,12 +49,7 @@ static int __apply_alternatives(void *alt_region) origptr = (u8 *)&alt->orig_offset + alt->orig_offset; replptr = (u8 *)&alt->alt_offset + alt->alt_offset; - - for (i = 0; i < alt->alt_len; i += sizeof(insn)) { - insn = get_alt_insn(origptr + i, replptr + i); - aarch64_insn_write(origptr + i, insn); - } - + memcpy(origptr, replptr, alt->alt_len); flush_icache_range((uintptr_t)origptr, (uintptr_t)(origptr + alt->alt_len)); } diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 23f25acf43a9..cce18c85d2e8 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -1315,15 +1315,15 @@ static int armpmu_device_probe(struct platform_device *pdev) if (!cpu_pmu) return -ENODEV; - irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); - if (!irqs) - return -ENOMEM; - /* Don't bother with PPIs; they're already affine */ irq = platform_get_irq(pdev, 0); if (irq >= 0 && irq_is_percpu(irq)) return 0; + irqs = kcalloc(pdev->num_resources, sizeof(*irqs), GFP_KERNEL); + if (!irqs) + return -ENOMEM; + for (i = 0; i < pdev->num_resources; ++i) { struct device_node *dn; int cpu; diff --git a/arch/arm64/mm/dump.c b/arch/arm64/mm/dump.c index 74c256744b25..f3d6221cd5bd 100644 --- a/arch/arm64/mm/dump.c +++ b/arch/arm64/mm/dump.c @@ -328,10 +328,12 @@ static int ptdump_init(void) for (j = 0; j < pg_level[i].num; j++) pg_level[i].mask |= pg_level[i].bits[j].mask; +#ifdef CONFIG_SPARSEMEM_VMEMMAP address_markers[VMEMMAP_START_NR].start_address = (unsigned long)virt_to_page(PAGE_OFFSET); address_markers[VMEMMAP_END_NR].start_address = (unsigned long)virt_to_page(high_memory); +#endif pe = debugfs_create_file("kernel_page_tables", 0400, NULL, NULL, &ptdump_fops); diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index edba042b2325..dc6a4842683a 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -487,7 +487,7 @@ emit_cond_jmp: return -EINVAL; } - imm64 = (u64)insn1.imm << 32 | imm; + imm64 = (u64)insn1.imm << 32 | (u32)imm; emit_a64_mov_i64(dst, imm64, ctx); return 1; diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index bb02fac9b4fa..2b25d1ba1ea0 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -45,7 +45,7 @@ extern int __cpu_logical_map[NR_CPUS]; #define SMP_DUMP 0x8 #define SMP_ASK_C0COUNT 0x10 -extern volatile cpumask_t cpu_callin_map; +extern cpumask_t cpu_callin_map; /* Mask of CPUs which are currently definitely operating coherently */ extern cpumask_t cpu_coherent_mask; diff --git a/arch/mips/kernel/elf.c b/arch/mips/kernel/elf.c index be4899f3c393..4a4d9e067c89 100644 --- a/arch/mips/kernel/elf.c +++ b/arch/mips/kernel/elf.c @@ -76,14 +76,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf, /* Lets see if this is an O32 ELF */ if (ehdr32->e_ident[EI_CLASS] == ELFCLASS32) { - /* FR = 1 for N32 */ - if (ehdr32->e_flags & EF_MIPS_ABI2) - state->overall_fp_mode = FP_FR1; - else - /* Set a good default FPU mode for O32 */ - state->overall_fp_mode = cpu_has_mips_r6 ? - FP_FRE : FP_FR0; - if (ehdr32->e_flags & EF_MIPS_FP64) { /* * Set MIPS_ABI_FP_OLD_64 for EF_MIPS_FP64. We will override it @@ -104,9 +96,6 @@ int arch_elf_pt_proc(void *_ehdr, void *_phdr, struct file *elf, (char *)&abiflags, sizeof(abiflags)); } else { - /* FR=1 is really the only option for 64-bit */ - state->overall_fp_mode = FP_FR1; - if (phdr64->p_type != PT_MIPS_ABIFLAGS) return 0; if (phdr64->p_filesz < sizeof(abiflags)) @@ -137,6 +126,7 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, struct elf32_hdr *ehdr = _ehdr; struct mode_req prog_req, interp_req; int fp_abi, interp_fp_abi, abi0, abi1, max_abi; + bool is_mips64; if (!config_enabled(CONFIG_MIPS_O32_FP64_SUPPORT)) return 0; @@ -152,10 +142,22 @@ int arch_check_elf(void *_ehdr, bool has_interpreter, abi0 = abi1 = fp_abi; } - /* ABI limits. O32 = FP_64A, N32/N64 = FP_SOFT */ - max_abi = ((ehdr->e_ident[EI_CLASS] == ELFCLASS32) && - (!(ehdr->e_flags & EF_MIPS_ABI2))) ? - MIPS_ABI_FP_64A : MIPS_ABI_FP_SOFT; + is_mips64 = (ehdr->e_ident[EI_CLASS] == ELFCLASS64) || + (ehdr->e_flags & EF_MIPS_ABI2); + + if (is_mips64) { + /* MIPS64 code always uses FR=1, thus the default is easy */ + state->overall_fp_mode = FP_FR1; + + /* Disallow access to the various FPXX & FP64 ABIs */ + max_abi = MIPS_ABI_FP_SOFT; + } else { + /* Default to a mode capable of running code expecting FR=0 */ + state->overall_fp_mode = cpu_has_mips_r6 ? FP_FRE : FP_FR0; + + /* Allow all ABIs we know about */ + max_abi = MIPS_ABI_FP_64A; + } if ((abi0 > max_abi && abi0 != MIPS_ABI_FP_UNKNOWN) || (abi1 > max_abi && abi1 != MIPS_ABI_FP_UNKNOWN)) diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 193ace7955fb..faa46ebd9dda 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -43,7 +43,7 @@ #include <asm/time.h> #include <asm/setup.h> -volatile cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ +cpumask_t cpu_callin_map; /* Bitmask of started secondaries */ int __cpu_number_map[NR_CPUS]; /* Map physical to logical */ EXPORT_SYMBOL(__cpu_number_map); @@ -218,8 +218,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) /* * Trust is futile. We should really have timeouts ... */ - while (!cpumask_test_cpu(cpu, &cpu_callin_map)) + while (!cpumask_test_cpu(cpu, &cpu_callin_map)) { udelay(100); + schedule(); + } synchronise_count_master(cpu); return 0; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 960e85de13fb..3998131d1a68 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1134,7 +1134,7 @@ static __initconst const u64 slm_hw_cache_extra_regs [ C(LL ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = SLM_DMND_READ|SLM_LLC_ACCESS, - [ C(RESULT_MISS) ] = SLM_DMND_READ|SLM_LLC_MISS, + [ C(RESULT_MISS) ] = 0, }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = SLM_DMND_WRITE|SLM_LLC_ACCESS, @@ -1184,8 +1184,7 @@ static __initconst const u64 slm_hw_cache_event_ids [ C(OP_READ) ] = { /* OFFCORE_RESPONSE.ANY_DATA.LOCAL_CACHE */ [ C(RESULT_ACCESS) ] = 0x01b7, - /* OFFCORE_RESPONSE.ANY_DATA.ANY_LLC_MISS */ - [ C(RESULT_MISS) ] = 0x01b7, + [ C(RESULT_MISS) ] = 0, }, [ C(OP_WRITE) ] = { /* OFFCORE_RESPONSE.ANY_RFO.LOCAL_CACHE */ @@ -1217,7 +1216,7 @@ static __initconst const u64 slm_hw_cache_event_ids [ C(ITLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ + [ C(RESULT_MISS) ] = 0x40205, /* PAGE_WALKS.I_SIDE_WALKS */ }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c index 999289b94025..358c54ad20d4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c +++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c @@ -722,6 +722,7 @@ static int __init rapl_pmu_init(void) break; case 60: /* Haswell */ case 69: /* Haswell-Celeron */ + case 61: /* Broadwell */ rapl_cntr_mask = RAPL_IDX_HSW; rapl_pmu_events_group.attrs = rapl_events_hsw_attr; break; diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 987514396c1e..99f76103c6b7 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -559,6 +559,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, if (is_ereg(dst_reg)) EMIT1(0x41); EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); + + /* emit 'movzwl eax, ax' */ + if (is_ereg(dst_reg)) + EMIT3(0x45, 0x0F, 0xB7); + else + EMIT2(0x0F, 0xB7); + EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); break; case 32: /* emit 'bswap eax' to swap lower 4 bytes */ @@ -577,6 +584,27 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, break; case BPF_ALU | BPF_END | BPF_FROM_LE: + switch (imm32) { + case 16: + /* emit 'movzwl eax, ax' to zero extend 16-bit + * into 64 bit + */ + if (is_ereg(dst_reg)) + EMIT3(0x45, 0x0F, 0xB7); + else + EMIT2(0x0F, 0xB7); + EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); + break; + case 32: + /* emit 'mov eax, eax' to clear upper 32-bits */ + if (is_ereg(dst_reg)) + EMIT1(0x45); + EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); + break; + case 64: + /* nop */ + break; + } break; /* ST: *(u8*)(dst_reg + off) = imm */ |