diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 1 | ||||
-rw-r--r-- | lib/dma-debug.c | 10 | ||||
-rw-r--r-- | lib/dma-direct.c | 5 | ||||
-rw-r--r-- | lib/idr.c | 15 | ||||
-rw-r--r-- | lib/radix-tree.c | 2 | ||||
-rw-r--r-- | lib/raid6/.gitignore | 1 | ||||
-rw-r--r-- | lib/raid6/Makefile | 27 | ||||
-rw-r--r-- | lib/raid6/algos.c | 4 | ||||
-rw-r--r-- | lib/raid6/altivec.uc | 3 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 22 | ||||
-rw-r--r-- | lib/raid6/vpermxor.uc | 105 | ||||
-rw-r--r-- | lib/vsprintf.c | 2 |
12 files changed, 177 insertions, 20 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6088408ef26c..64155e310a9f 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1642,6 +1642,7 @@ config DMA_API_DEBUG menuconfig RUNTIME_TESTING_MENU bool "Runtime Testing" + def_bool y if RUNTIME_TESTING_MENU diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 1b34d210452c..7f5cdc1e6b29 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -1491,12 +1491,12 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, if (unlikely(virt == NULL)) return; - entry = dma_entry_alloc(); - if (!entry) + /* handle vmalloc and linear addresses */ + if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) return; - /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_to_page(virt)) + entry = dma_entry_alloc(); + if (!entry) return; entry->type = dma_debug_coherent; @@ -1528,7 +1528,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size, }; /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_to_page(virt)) + if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) return; if (is_vmalloc_addr(virt)) diff --git a/lib/dma-direct.c b/lib/dma-direct.c index 40b1f92f2214..c9e8e21cb334 100644 --- a/lib/dma-direct.c +++ b/lib/dma-direct.c @@ -84,6 +84,10 @@ again: return page_address(page); } +/* + * NOTE: this function must never look at the dma_addr argument, because we want + * to be able to use it as a helper for iommu implementations as well. + */ void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) { @@ -152,5 +156,6 @@ const struct dma_map_ops dma_direct_ops = { .map_sg = dma_direct_map_sg, .dma_supported = dma_direct_supported, .mapping_error = dma_direct_mapping_error, + .is_phys = 1, }; EXPORT_SYMBOL(dma_direct_ops); diff --git a/lib/idr.c b/lib/idr.c index c98d77fcf393..823b813f08f8 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -36,8 +36,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid, { struct radix_tree_iter iter; void __rcu **slot; - int base = idr->idr_base; - int id = *nextid; + unsigned int base = idr->idr_base; + unsigned int id = *nextid; if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) return -EINVAL; @@ -204,10 +204,11 @@ int idr_for_each(const struct idr *idr, radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) { int ret; + unsigned long id = iter.index + base; - if (WARN_ON_ONCE(iter.index > INT_MAX)) + if (WARN_ON_ONCE(id > INT_MAX)) break; - ret = fn(iter.index + base, rcu_dereference_raw(*slot), data); + ret = fn(id, rcu_dereference_raw(*slot), data); if (ret) return ret; } @@ -230,8 +231,8 @@ void *idr_get_next(struct idr *idr, int *nextid) { struct radix_tree_iter iter; void __rcu **slot; - int base = idr->idr_base; - int id = *nextid; + unsigned long base = idr->idr_base; + unsigned long id = *nextid; id = (id < base) ? 0 : id - base; slot = radix_tree_iter_find(&idr->idr_rt, &iter, id); @@ -431,7 +432,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id) bitmap = this_cpu_xchg(ida_bitmap, NULL); if (!bitmap) return -EAGAIN; - memset(bitmap, 0, sizeof(*bitmap)); bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT; rcu_assign_pointer(*slot, bitmap); } @@ -464,7 +464,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id) bitmap = this_cpu_xchg(ida_bitmap, NULL); if (!bitmap) return -EAGAIN; - memset(bitmap, 0, sizeof(*bitmap)); __set_bit(bit, bitmap->bitmap); radix_tree_iter_replace(root, &iter, slot, bitmap); } diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 0a7ae3288a24..8e00138d593f 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -2125,7 +2125,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp) preempt_enable(); if (!this_cpu_read(ida_bitmap)) { - struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp); + struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp); if (!bitmap) return 0; if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap)) diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore index f01b1cb04f91..3de0d8921286 100644 --- a/lib/raid6/.gitignore +++ b/lib/raid6/.gitignore @@ -4,3 +4,4 @@ int*.c tables.c neon?.c s390vx?.c +vpermxor*.c diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 4add700ddfe3..21f59443e99e 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -5,7 +5,8 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ int8.o int16.o int32.o raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o -raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o +raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ + vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o raid6_pq-$(CONFIG_TILEGX) += tilegx8.o raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o @@ -91,6 +92,30 @@ $(obj)/altivec8.c: UNROLL := 8 $(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) +CFLAGS_vpermxor1.o += $(altivec_flags) +targets += vpermxor1.c +$(obj)/vpermxor1.c: UNROLL := 1 +$(obj)/vpermxor1.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_vpermxor2.o += $(altivec_flags) +targets += vpermxor2.c +$(obj)/vpermxor2.c: UNROLL := 2 +$(obj)/vpermxor2.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_vpermxor4.o += $(altivec_flags) +targets += vpermxor4.c +$(obj)/vpermxor4.c: UNROLL := 4 +$(obj)/vpermxor4.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_vpermxor8.o += $(altivec_flags) +targets += vpermxor8.c +$(obj)/vpermxor8.c: UNROLL := 8 +$(obj)/vpermxor8.c: $(src)/vpermxor.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + CFLAGS_neon1.o += $(NEON_FLAGS) targets += neon1.c $(obj)/neon1.c: UNROLL := 1 diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 476994723258..b2e681018145 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -74,6 +74,10 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_altivec2, &raid6_altivec4, &raid6_altivec8, + &raid6_vpermxor1, + &raid6_vpermxor2, + &raid6_vpermxor4, + &raid6_vpermxor8, #endif #if defined(CONFIG_TILEGX) &raid6_tilegx8, diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index 682aae8a1fef..d20ed0d11411 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc @@ -24,10 +24,13 @@ #include <linux/raid/pq.h> +#ifdef CONFIG_ALTIVEC + #include <altivec.h> #ifdef __KERNEL__ # include <asm/cputable.h> # include <asm/switch_to.h> +#endif /* __KERNEL__ */ /* * This is the C data type to use. We use a vector of diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index be1010bdc435..5050e270c06b 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -45,10 +45,12 @@ else ifeq ($(HAS_NEON),yes) CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\ - gcc -c -x c - >&/dev/null && \ - rm ./-.o && echo yes) + gcc -c -x c - >/dev/null && rm ./-.o && echo yes) ifeq ($(HAS_ALTIVEC),yes) - OBJS += altivec1.o altivec2.o altivec4.o altivec8.o + CFLAGS += -I../../../arch/powerpc/include + CFLAGS += -DCONFIG_ALTIVEC + OBJS += altivec1.o altivec2.o altivec4.o altivec8.o \ + vpermxor1.o vpermxor2.o vpermxor4.o vpermxor8.o endif endif ifeq ($(ARCH),tilegx) @@ -98,6 +100,18 @@ altivec4.c: altivec.uc ../unroll.awk altivec8.c: altivec.uc ../unroll.awk $(AWK) ../unroll.awk -vN=8 < altivec.uc > $@ +vpermxor1.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=1 < vpermxor.uc > $@ + +vpermxor2.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=2 < vpermxor.uc > $@ + +vpermxor4.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=4 < vpermxor.uc > $@ + +vpermxor8.c: vpermxor.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < vpermxor.uc > $@ + int1.c: int.uc ../unroll.awk $(AWK) ../unroll.awk -vN=1 < int.uc > $@ @@ -123,7 +137,7 @@ tables.c: mktables ./mktables > tables.c clean: - rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test + rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c vpermxor*.c neon*.c tables.c raid6test rm -f tilegx*.c spotless: clean diff --git a/lib/raid6/vpermxor.uc b/lib/raid6/vpermxor.uc new file mode 100644 index 000000000000..10475dc423c1 --- /dev/null +++ b/lib/raid6/vpermxor.uc @@ -0,0 +1,105 @@ +/* + * Copyright 2017, Matt Brown, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * vpermxor$#.c + * + * Based on H. Peter Anvin's paper - The mathematics of RAID-6 + * + * $#-way unrolled portable integer math RAID-6 instruction set + * This file is postprocessed using unroll.awk + * + * vpermxor$#.c makes use of the vpermxor instruction to optimise the RAID6 Q + * syndrome calculations. + * This can be run on systems which have both Altivec and vpermxor instruction. + * + * This instruction was introduced in POWER8 - ISA v2.07. + */ + +#include <linux/raid/pq.h> +#ifdef CONFIG_ALTIVEC + +#include <altivec.h> +#ifdef __KERNEL__ +#include <asm/cputable.h> +#include <asm/ppc-opcode.h> +#include <asm/switch_to.h> +#endif + +typedef vector unsigned char unative_t; +#define NSIZE sizeof(unative_t) + +static const vector unsigned char gf_low = {0x1e, 0x1c, 0x1a, 0x18, 0x16, 0x14, + 0x12, 0x10, 0x0e, 0x0c, 0x0a, 0x08, + 0x06, 0x04, 0x02,0x00}; +static const vector unsigned char gf_high = {0xfd, 0xdd, 0xbd, 0x9d, 0x7d, 0x5d, + 0x3d, 0x1d, 0xe0, 0xc0, 0xa0, 0x80, + 0x60, 0x40, 0x20, 0x00}; + +static void noinline raid6_vpermxor$#_gen_syndrome_real(int disks, size_t bytes, + void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + unative_t wp$$, wq$$, wd$$; + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for (d = 0; d < bytes; d += NSIZE*$#) { + wp$$ = wq$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + + for (z = z0-1; z>=0; z--) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + /* P syndrome */ + wp$$ = vec_xor(wp$$, wd$$); + + /* Q syndrome */ + asm(VPERMXOR(%0,%1,%2,%3):"=v"(wq$$):"v"(gf_high), "v"(gf_low), "v"(wq$$)); + wq$$ = vec_xor(wq$$, wd$$); + } + *(unative_t *)&p[d+NSIZE*$$] = wp$$; + *(unative_t *)&q[d+NSIZE*$$] = wq$$; + } +} + +static void raid6_vpermxor$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + preempt_disable(); + enable_kernel_altivec(); + + raid6_vpermxor$#_gen_syndrome_real(disks, bytes, ptrs); + + disable_kernel_altivec(); + preempt_enable(); +} + +int raid6_have_altivec_vpermxor(void); +#if $# == 1 +int raid6_have_altivec_vpermxor(void) +{ + /* Check if arch has both altivec and the vpermxor instructions */ +# ifdef __KERNEL__ + return (cpu_has_feature(CPU_FTR_ALTIVEC_COMP) && + cpu_has_feature(CPU_FTR_ARCH_207S)); +# else + return 1; +#endif + +} +#endif + +const struct raid6_calls raid6_vpermxor$# = { + raid6_vpermxor$#_gen_syndrome, + NULL, + raid6_have_altivec_vpermxor, + "vpermxor$#", + 0 +}; +#endif diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 77ee6ced11b1..d7a708f82559 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1849,7 +1849,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, { const int default_width = 2 * sizeof(void *); - if (!ptr && *fmt != 'K') { + if (!ptr && *fmt != 'K' && *fmt != 'x') { /* * Print (null) with the same width as a pointer so it makes * tabular output look nice. |