diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-05 18:53:37 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-05 18:53:37 +0200 |
commit | 04759194dc447ff0b9ef35bc641ce3bb076c2930 (patch) | |
tree | 92eca3b7aa1e0d5013db254ae9f5bc130bd7e735 /lib/raid6/recov_neon.c | |
parent | Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s39... (diff) | |
parent | arm64: cleanup {COMPAT_,}SET_PERSONALITY() macro (diff) | |
download | linux-04759194dc447ff0b9ef35bc641ce3bb076c2930.tar.xz linux-04759194dc447ff0b9ef35bc641ce3bb076c2930.zip |
Merge tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux
Pull arm64 updates from Catalin Marinas:
- VMAP_STACK support, allowing the kernel stacks to be allocated in the
vmalloc space with a guard page for trapping stack overflows. One of
the patches introduces THREAD_ALIGN and changes the generic
alloc_thread_stack_node() to use this instead of THREAD_SIZE (no
functional change for other architectures)
- Contiguous PTE hugetlb support re-enabled (after being reverted a
couple of times). We now have the semantics agreed in the generic mm
layer together with API improvements so that the architecture code
can detect between contiguous and non-contiguous huge PTEs
- Initial support for persistent memory on ARM: DC CVAP instruction
exposed to user space (HWCAP) and the in-kernel pmem API implemented
- raid6 improvements for arm64: faster algorithm for the delta syndrome
and implementation of the recovery routines using Neon
- FP/SIMD refactoring and removal of support for Neon in interrupt
context. This is in preparation for full SVE support
- PTE accessors converted from inline asm to cmpxchg so that we can use
LSE atomics if available (ARMv8.1)
- Perf support for Cortex-A35 and A73
- Non-urgent fixes and cleanups
* tag 'arm64-upstream' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux: (75 commits)
arm64: cleanup {COMPAT_,}SET_PERSONALITY() macro
arm64: introduce separated bits for mm_context_t flags
arm64: hugetlb: Cleanup setup_hugepagesz
arm64: Re-enable support for contiguous hugepages
arm64: hugetlb: Override set_huge_swap_pte_at() to support contiguous hugepages
arm64: hugetlb: Override huge_pte_clear() to support contiguous hugepages
arm64: hugetlb: Handle swap entries in huge_pte_offset() for contiguous hugepages
arm64: hugetlb: Add break-before-make logic for contiguous entries
arm64: hugetlb: Spring clean huge pte accessors
arm64: hugetlb: Introduce pte_pgprot helper
arm64: hugetlb: set_huge_pte_at Add WARN_ON on !pte_present
arm64: kexec: have own crash_smp_send_stop() for crash dump for nonpanic cores
arm64: dma-mapping: Mark atomic_pool as __ro_after_init
arm64: dma-mapping: Do not pass data to gen_pool_set_algo()
arm64: Remove the !CONFIG_ARM64_HW_AFDBM alternative code paths
arm64: Ignore hardware dirty bit updates in ptep_set_wrprotect()
arm64: Move PTE_RDONLY bit handling out of set_pte_at()
kvm: arm64: Convert kvm_set_s2pte_readonly() from inline asm to cmpxchg()
arm64: Convert pte handling from inline asm to using (cmp)xchg
arm64: neon/efi: Make EFI fpsimd save/restore variables static
...
Diffstat (limited to 'lib/raid6/recov_neon.c')
-rw-r--r-- | lib/raid6/recov_neon.c | 110 |
1 files changed, 110 insertions, 0 deletions
diff --git a/lib/raid6/recov_neon.c b/lib/raid6/recov_neon.c new file mode 100644 index 000000000000..eeb5c4065b92 --- /dev/null +++ b/lib/raid6/recov_neon.c @@ -0,0 +1,110 @@ +/* + * Copyright (C) 2012 Intel Corporation + * Copyright (C) 2017 Linaro Ltd. <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include <linux/raid/pq.h> + +#ifdef __KERNEL__ +#include <asm/neon.h> +#else +#define kernel_neon_begin() +#define kernel_neon_end() +#define cpu_has_neon() (1) +#endif + +static int raid6_has_neon(void) +{ + return cpu_has_neon(); +} + +void __raid6_2data_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dp, + uint8_t *dq, const uint8_t *pbmul, + const uint8_t *qmul); + +void __raid6_datap_recov_neon(int bytes, uint8_t *p, uint8_t *q, uint8_t *dq, + const uint8_t *qmul); + +static void raid6_2data_recov_neon(int disks, size_t bytes, int faila, + int failb, void **ptrs) +{ + u8 *p, *q, *dp, *dq; + const u8 *pbmul; /* P multiplier table for B data */ + const u8 *qmul; /* Q multiplier table (for both) */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data pages + * Use the dead data pages as temporary storage for + * delta p and delta q + */ + dp = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 2] = dp; + dq = (u8 *)ptrs[failb]; + ptrs[failb] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dp; + ptrs[failb] = dq; + ptrs[disks - 2] = p; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + pbmul = raid6_vgfmul[raid6_gfexi[failb-faila]]; + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila] ^ + raid6_gfexp[failb]]]; + + kernel_neon_begin(); + __raid6_2data_recov_neon(bytes, p, q, dp, dq, pbmul, qmul); + kernel_neon_end(); +} + +static void raid6_datap_recov_neon(int disks, size_t bytes, int faila, + void **ptrs) +{ + u8 *p, *q, *dq; + const u8 *qmul; /* Q multiplier table */ + + p = (u8 *)ptrs[disks - 2]; + q = (u8 *)ptrs[disks - 1]; + + /* + * Compute syndrome with zero for the missing data page + * Use the dead data page as temporary storage for delta q + */ + dq = (u8 *)ptrs[faila]; + ptrs[faila] = (void *)raid6_empty_zero_page; + ptrs[disks - 1] = dq; + + raid6_call.gen_syndrome(disks, bytes, ptrs); + + /* Restore pointer table */ + ptrs[faila] = dq; + ptrs[disks - 1] = q; + + /* Now, pick the proper data tables */ + qmul = raid6_vgfmul[raid6_gfinv[raid6_gfexp[faila]]]; + + kernel_neon_begin(); + __raid6_datap_recov_neon(bytes, p, q, dq, qmul); + kernel_neon_end(); +} + +const struct raid6_recov_calls raid6_recov_neon = { + .data2 = raid6_2data_recov_neon, + .datap = raid6_datap_recov_neon, + .valid = raid6_has_neon, + .name = "neon", + .priority = 10, +}; |