diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 10 | ||||
-rw-r--r-- | lib/Kconfig.debug | 21 | ||||
-rw-r--r-- | lib/Makefile | 1 | ||||
-rw-r--r-- | lib/crc-t10dif.c | 1 | ||||
-rw-r--r-- | lib/debugobjects.c | 20 | ||||
-rw-r--r-- | lib/dump_stack.c | 4 | ||||
-rw-r--r-- | lib/dynamic_debug.c | 2 | ||||
-rw-r--r-- | lib/earlycpio.c | 29 | ||||
-rw-r--r-- | lib/kobject.c | 22 | ||||
-rw-r--r-- | lib/lockref.c | 128 | ||||
-rw-r--r-- | lib/lz4/lz4_compress.c | 4 | ||||
-rw-r--r-- | lib/lz4/lz4_decompress.c | 6 | ||||
-rw-r--r-- | lib/lz4/lz4hc_compress.c | 4 | ||||
-rw-r--r-- | lib/mpi/longlong.h | 17 | ||||
-rw-r--r-- | lib/percpu_counter.c | 2 | ||||
-rw-r--r-- | lib/raid6/.gitignore | 1 | ||||
-rw-r--r-- | lib/raid6/Makefile | 40 | ||||
-rw-r--r-- | lib/raid6/algos.c | 6 | ||||
-rw-r--r-- | lib/raid6/neon.c | 58 | ||||
-rw-r--r-- | lib/raid6/neon.uc | 80 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 26 | ||||
-rw-r--r-- | lib/swiotlb.c | 8 | ||||
-rw-r--r-- | lib/vsprintf.c | 82 |
23 files changed, 522 insertions, 50 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 35da51359d40..b3c8be0da17f 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -48,6 +48,16 @@ config STMP_DEVICE config PERCPU_RWSEM boolean +config ARCH_USE_CMPXCHG_LOCKREF + bool + +config CMPXCHG_LOCKREF + def_bool y if ARCH_USE_CMPXCHG_LOCKREF + depends on SMP + depends on !GENERIC_LOCKBREAK + depends on !DEBUG_SPINLOCK + depends on !DEBUG_LOCK_ALLOC + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 98ac17ed6222..444e1c12fea9 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -238,7 +238,7 @@ config DEBUG_SECTION_MISMATCH any use of code/data previously in these sections would most likely result in an oops. In the code, functions and variables are annotated with - __init, __cpuinit, etc. (see the full list in include/linux/init.h), + __init,, etc. (see the full list in include/linux/init.h), which results in the code/data being placed in specific sections. The section mismatch analysis is always performed after a full kernel build, and enabling this option causes the following @@ -981,6 +981,25 @@ config DEBUG_KOBJECT If you say Y here, some extra kobject debugging messages will be sent to the syslog. +config DEBUG_KOBJECT_RELEASE + bool "kobject release debugging" + depends on DEBUG_KERNEL + help + kobjects are reference counted objects. This means that their + last reference count put is not predictable, and the kobject can + live on past the point at which a driver decides to drop it's + initial reference to the kobject gained on allocation. An + example of this would be a struct device which has just been + unregistered. + + However, some buggy drivers assume that after such an operation, + the memory backing the kobject can be immediately freed. This + goes completely against the principles of a refcounted object. + + If you say Y here, the kernel will delay the release of kobjects + on the last reference count to improve the visibility of this + kind of kobject release bug. + config HAVE_DEBUG_BUGVERBOSE bool diff --git a/lib/Makefile b/lib/Makefile index 7baccfd8a4e9..f2cb3082697c 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -20,6 +20,7 @@ lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o klist.o +obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index fe3428c07b47..43bc5b071f96 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -52,3 +52,4 @@ module_exit(crc_t10dif_mod_fini); MODULE_DESCRIPTION("T10 DIF CRC calculation"); MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: crct10dif"); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 37061ede8b81..bf2c8b1043d8 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -381,19 +381,21 @@ void debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr) * debug_object_activate - debug checks when an object is activated * @addr: address of the object * @descr: pointer to an object specific debug description structure + * Returns 0 for success, -EINVAL for check failed. */ -void debug_object_activate(void *addr, struct debug_obj_descr *descr) +int debug_object_activate(void *addr, struct debug_obj_descr *descr) { enum debug_obj_state state; struct debug_bucket *db; struct debug_obj *obj; unsigned long flags; + int ret; struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr }; if (!debug_objects_enabled) - return; + return 0; db = get_bucket((unsigned long) addr); @@ -405,23 +407,26 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: obj->state = ODEBUG_STATE_ACTIVE; + ret = 0; break; case ODEBUG_STATE_ACTIVE: debug_print_object(obj, "activate"); state = obj->state; raw_spin_unlock_irqrestore(&db->lock, flags); - debug_object_fixup(descr->fixup_activate, addr, state); - return; + ret = debug_object_fixup(descr->fixup_activate, addr, state); + return ret ? -EINVAL : 0; case ODEBUG_STATE_DESTROYED: debug_print_object(obj, "activate"); + ret = -EINVAL; break; default: + ret = 0; break; } raw_spin_unlock_irqrestore(&db->lock, flags); - return; + return ret; } raw_spin_unlock_irqrestore(&db->lock, flags); @@ -431,8 +436,11 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr) * true or not. */ if (debug_object_fixup(descr->fixup_activate, addr, - ODEBUG_STATE_NOTAVAILABLE)) + ODEBUG_STATE_NOTAVAILABLE)) { debug_print_object(&o, "activate"); + return -EINVAL; + } + return 0; } /** diff --git a/lib/dump_stack.c b/lib/dump_stack.c index c03154173cc7..f23b63f0a1c3 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -23,7 +23,7 @@ static void __dump_stack(void) #ifdef CONFIG_SMP static atomic_t dump_lock = ATOMIC_INIT(-1); -void dump_stack(void) +asmlinkage void dump_stack(void) { int was_locked; int old; @@ -55,7 +55,7 @@ retry: preempt_enable(); } #else -void dump_stack(void) +asmlinkage void dump_stack(void) { __dump_stack(); } diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 99fec3ae405a..c37aeacd7651 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -309,7 +309,7 @@ static int ddebug_parse_query(char *words[], int nwords, struct ddebug_query *query, const char *modname) { unsigned int i; - int rc; + int rc = 0; /* check we have an even number of words */ if (nwords % 2 != 0) { diff --git a/lib/earlycpio.c b/lib/earlycpio.c index 8078ef49cb79..3eb3e4722b8e 100644 --- a/lib/earlycpio.c +++ b/lib/earlycpio.c @@ -49,22 +49,23 @@ enum cpio_fields { /** * cpio_data find_cpio_data - Search for files in an uncompressed cpio - * @path: The directory to search for, including a slash at the end - * @data: Pointer to the the cpio archive or a header inside - * @len: Remaining length of the cpio based on data pointer - * @offset: When a matching file is found, this is the offset to the - * beginning of the cpio. It can be used to iterate through - * the cpio to find all files inside of a directory path + * @path: The directory to search for, including a slash at the end + * @data: Pointer to the the cpio archive or a header inside + * @len: Remaining length of the cpio based on data pointer + * @nextoff: When a matching file is found, this is the offset from the + * beginning of the cpio to the beginning of the next file, not the + * matching file itself. It can be used to iterate through the cpio + * to find all files inside of a directory path. * - * @return: struct cpio_data containing the address, length and - * filename (with the directory path cut off) of the found file. - * If you search for a filename and not for files in a directory, - * pass the absolute path of the filename in the cpio and make sure - * the match returned an empty filename string. + * @return: struct cpio_data containing the address, length and + * filename (with the directory path cut off) of the found file. + * If you search for a filename and not for files in a directory, + * pass the absolute path of the filename in the cpio and make sure + * the match returned an empty filename string. */ -struct cpio_data __cpuinit find_cpio_data(const char *path, void *data, - size_t len, long *offset) +struct cpio_data find_cpio_data(const char *path, void *data, + size_t len, long *nextoff) { const size_t cpio_header_len = 8*C_NFIELDS - 2; struct cpio_data cd = { NULL, 0, "" }; @@ -124,7 +125,7 @@ struct cpio_data __cpuinit find_cpio_data(const char *path, void *data, if ((ch[C_MODE] & 0170000) == 0100000 && ch[C_NAMESIZE] >= mypathsize && !memcmp(p, path, mypathsize)) { - *offset = (long)nptr - (long)data; + *nextoff = (long)nptr - (long)data; if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) { pr_warn( "File %s exceeding MAX_CPIO_FILE_NAME [%d]\n", diff --git a/lib/kobject.c b/lib/kobject.c index 3bbde222c90f..962175134702 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -545,8 +545,8 @@ static void kobject_cleanup(struct kobject *kobj) struct kobj_type *t = get_ktype(kobj); const char *name = kobj->name; - pr_debug("kobject: '%s' (%p): %s\n", - kobject_name(kobj), kobj, __func__); + pr_debug("kobject: '%s' (%p): %s, parent %p\n", + kobject_name(kobj), kobj, __func__, kobj->parent); if (t && !t->release) pr_debug("kobject: '%s' (%p): does not have a release() " @@ -580,9 +580,25 @@ static void kobject_cleanup(struct kobject *kobj) } } +#ifdef CONFIG_DEBUG_KOBJECT_RELEASE +static void kobject_delayed_cleanup(struct work_struct *work) +{ + kobject_cleanup(container_of(to_delayed_work(work), + struct kobject, release)); +} +#endif + static void kobject_release(struct kref *kref) { - kobject_cleanup(container_of(kref, struct kobject, kref)); + struct kobject *kobj = container_of(kref, struct kobject, kref); +#ifdef CONFIG_DEBUG_KOBJECT_RELEASE + pr_debug("kobject: '%s' (%p): %s, parent %p (delayed)\n", + kobject_name(kobj), kobj, __func__, kobj->parent); + INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); + schedule_delayed_work(&kobj->release, HZ); +#else + kobject_cleanup(kobj); +#endif } /** diff --git a/lib/lockref.c b/lib/lockref.c new file mode 100644 index 000000000000..9d76f404ce9a --- /dev/null +++ b/lib/lockref.c @@ -0,0 +1,128 @@ +#include <linux/export.h> +#include <linux/lockref.h> + +#ifdef CONFIG_CMPXCHG_LOCKREF + +/* + * Note that the "cmpxchg()" reloads the "old" value for the + * failure case. + */ +#define CMPXCHG_LOOP(CODE, SUCCESS) do { \ + struct lockref old; \ + BUILD_BUG_ON(sizeof(old) != 8); \ + old.lock_count = ACCESS_ONCE(lockref->lock_count); \ + while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ + struct lockref new = old, prev = old; \ + CODE \ + old.lock_count = cmpxchg(&lockref->lock_count, \ + old.lock_count, new.lock_count); \ + if (likely(old.lock_count == prev.lock_count)) { \ + SUCCESS; \ + } \ + cpu_relax(); \ + } \ +} while (0) + +#else + +#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0) + +#endif + +/** + * lockref_get - Increments reference count unconditionally + * @lockcnt: pointer to lockref structure + * + * This operation is only valid if you already hold a reference + * to the object, so you know the count cannot be zero. + */ +void lockref_get(struct lockref *lockref) +{ + CMPXCHG_LOOP( + new.count++; + , + return; + ); + + spin_lock(&lockref->lock); + lockref->count++; + spin_unlock(&lockref->lock); +} +EXPORT_SYMBOL(lockref_get); + +/** + * lockref_get_not_zero - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count was zero + */ +int lockref_get_not_zero(struct lockref *lockref) +{ + int retval; + + CMPXCHG_LOOP( + new.count++; + if (!old.count) + return 0; + , + return 1; + ); + + spin_lock(&lockref->lock); + retval = 0; + if (lockref->count) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} +EXPORT_SYMBOL(lockref_get_not_zero); + +/** + * lockref_get_or_lock - Increments count unless the count is 0 + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count was zero + * and we got the lock instead. + */ +int lockref_get_or_lock(struct lockref *lockref) +{ + CMPXCHG_LOOP( + new.count++; + if (!old.count) + break; + , + return 1; + ); + + spin_lock(&lockref->lock); + if (!lockref->count) + return 0; + lockref->count++; + spin_unlock(&lockref->lock); + return 1; +} +EXPORT_SYMBOL(lockref_get_or_lock); + +/** + * lockref_put_or_lock - decrements count unless count <= 1 before decrement + * @lockcnt: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken + */ +int lockref_put_or_lock(struct lockref *lockref) +{ + CMPXCHG_LOOP( + new.count--; + if (old.count <= 1) + break; + , + return 1; + ); + + spin_lock(&lockref->lock); + if (lockref->count <= 1) + return 0; + lockref->count--; + spin_unlock(&lockref->lock); + return 1; +} +EXPORT_SYMBOL(lockref_put_or_lock); diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c index fd94058bd7f9..28321d8f75ef 100644 --- a/lib/lz4/lz4_compress.c +++ b/lib/lz4/lz4_compress.c @@ -437,7 +437,7 @@ int lz4_compress(const unsigned char *src, size_t src_len, exit: return ret; } -EXPORT_SYMBOL_GPL(lz4_compress); +EXPORT_SYMBOL(lz4_compress); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("LZ4 compressor"); diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index d3414eae73a1..411be80ddb46 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -299,7 +299,7 @@ exit_0: return ret; } #ifndef STATIC -EXPORT_SYMBOL_GPL(lz4_decompress); +EXPORT_SYMBOL(lz4_decompress); #endif int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, @@ -319,8 +319,8 @@ exit_0: return ret; } #ifndef STATIC -EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize); +EXPORT_SYMBOL(lz4_decompress_unknownoutputsize); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("LZ4 Decompressor"); #endif diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c index eb1a74f5e368..f344f76b6559 100644 --- a/lib/lz4/lz4hc_compress.c +++ b/lib/lz4/lz4hc_compress.c @@ -533,7 +533,7 @@ int lz4hc_compress(const unsigned char *src, size_t src_len, exit: return ret; } -EXPORT_SYMBOL_GPL(lz4hc_compress); +EXPORT_SYMBOL(lz4hc_compress); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("Dual BSD/GPL"); MODULE_DESCRIPTION("LZ4HC compressor"); diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h index d411355f238e..aac511417ad1 100644 --- a/lib/mpi/longlong.h +++ b/lib/mpi/longlong.h @@ -151,15 +151,12 @@ do { \ #endif /* __a29k__ */ #if defined(__alpha) && W_TYPE_SIZE == 64 -#define umul_ppmm(ph, pl, m0, m1) \ -do { \ - UDItype __m0 = (m0), __m1 = (m1); \ - __asm__ ("umulh %r1,%2,%0" \ - : "=r" ((UDItype) ph) \ - : "%rJ" (__m0), \ - "rI" (__m1)); \ - (pl) = __m0 * __m1; \ - } while (0) +#define umul_ppmm(ph, pl, m0, m1) \ +do { \ + UDItype __m0 = (m0), __m1 = (m1); \ + (ph) = __builtin_alpha_umulh(__m0, __m1); \ + (pl) = __m0 * __m1; \ +} while (0) #define UMUL_TIME 46 #ifndef LONGLONG_STANDALONE #define udiv_qrnnd(q, r, n1, n0, d) \ @@ -167,7 +164,7 @@ do { UDItype __r; \ (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \ (r) = __r; \ } while (0) -extern UDItype __udiv_qrnnd(); +extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype); #define UDIV_TIME 220 #endif /* LONGLONG_STANDALONE */ #endif /* __alpha */ diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 1fc23a3277e1..93c5d5ecff4e 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -158,7 +158,7 @@ static void compute_batch_value(void) percpu_counter_batch = max(32, nr*2); } -static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb, +static int percpu_counter_hotcpu_callback(struct notifier_block *nb, unsigned long action, void *hcpu) { #ifdef CONFIG_HOTPLUG_CPU diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore index 162becacf97c..0a7e494b2bcd 100644 --- a/lib/raid6/.gitignore +++ b/lib/raid6/.gitignore @@ -2,3 +2,4 @@ mktables altivec*.c int*.c tables.c +neon?.c diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 9f7c184725d7..b4625787c7ee 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -5,6 +5,7 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o +raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o hostprogs-y += mktables @@ -16,6 +17,21 @@ ifeq ($(CONFIG_ALTIVEC),y) altivec_flags := -maltivec -mabi=altivec endif +# The GCC option -ffreestanding is required in order to compile code containing +# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel) +ifeq ($(CONFIG_KERNEL_MODE_NEON),y) +NEON_FLAGS := -ffreestanding +ifeq ($(ARCH),arm) +NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon +endif +ifeq ($(ARCH),arm64) +CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only +CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only +CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only +CFLAGS_REMOVE_neon8.o += -mgeneral-regs-only +endif +endif + targets += int1.c $(obj)/int1.c: UNROLL := 1 $(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE @@ -70,6 +86,30 @@ $(obj)/altivec8.c: UNROLL := 8 $(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) +CFLAGS_neon1.o += $(NEON_FLAGS) +targets += neon1.c +$(obj)/neon1.c: UNROLL := 1 +$(obj)/neon1.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_neon2.o += $(NEON_FLAGS) +targets += neon2.c +$(obj)/neon2.c: UNROLL := 2 +$(obj)/neon2.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_neon4.o += $(NEON_FLAGS) +targets += neon4.c +$(obj)/neon4.c: UNROLL := 4 +$(obj)/neon4.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_neon8.o += $(NEON_FLAGS) +targets += neon8.c +$(obj)/neon8.c: UNROLL := 8 +$(obj)/neon8.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + quiet_cmd_mktable = TABLE $@ cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 6d7316fe9f30..74e6f5629dbc 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -70,6 +70,12 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_intx2, &raid6_intx4, &raid6_intx8, +#ifdef CONFIG_KERNEL_MODE_NEON + &raid6_neonx1, + &raid6_neonx2, + &raid6_neonx4, + &raid6_neonx8, +#endif NULL }; diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c new file mode 100644 index 000000000000..36ad4705df1a --- /dev/null +++ b/lib/raid6/neon.c @@ -0,0 +1,58 @@ +/* + * linux/lib/raid6/neon.c - RAID6 syndrome calculation using ARM NEON intrinsics + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/raid/pq.h> + +#ifdef __KERNEL__ +#include <asm/neon.h> +#else +#define kernel_neon_begin() +#define kernel_neon_end() +#define cpu_has_neon() (1) +#endif + +/* + * There are 2 reasons these wrappers are kept in a separate compilation unit + * from the actual implementations in neonN.c (generated from neon.uc by + * unroll.awk): + * - the actual implementations use NEON intrinsics, and the GCC support header + * (arm_neon.h) is not fully compatible (type wise) with the kernel; + * - the neonN.c files are compiled with -mfpu=neon and optimization enabled, + * and we have to make sure that we never use *any* NEON/VFP instructions + * outside a kernel_neon_begin()/kernel_neon_end() pair. + */ + +#define RAID6_NEON_WRAPPER(_n) \ + static void raid6_neon ## _n ## _gen_syndrome(int disks, \ + size_t bytes, void **ptrs) \ + { \ + void raid6_neon ## _n ## _gen_syndrome_real(int, \ + unsigned long, void**); \ + kernel_neon_begin(); \ + raid6_neon ## _n ## _gen_syndrome_real(disks, \ + (unsigned long)bytes, ptrs); \ + kernel_neon_end(); \ + } \ + struct raid6_calls const raid6_neonx ## _n = { \ + raid6_neon ## _n ## _gen_syndrome, \ + raid6_have_neon, \ + "neonx" #_n, \ + 0 \ + } + +static int raid6_have_neon(void) +{ + return cpu_has_neon(); +} + +RAID6_NEON_WRAPPER(1); +RAID6_NEON_WRAPPER(2); +RAID6_NEON_WRAPPER(4); +RAID6_NEON_WRAPPER(8); diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc new file mode 100644 index 000000000000..1b9ed793342d --- /dev/null +++ b/lib/raid6/neon.uc @@ -0,0 +1,80 @@ +/* ----------------------------------------------------------------------- + * + * neon.uc - RAID-6 syndrome calculation using ARM NEON instructions + * + * Copyright (C) 2012 Rob Herring + * + * Based on altivec.uc: + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * neon$#.c + * + * $#-way unrolled NEON intrinsics math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + */ + +#include <arm_neon.h> + +typedef uint8x16_t unative_t; + +#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) +#define NSIZE sizeof(unative_t) + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline unative_t SHLBYTE(unative_t v) +{ + return vshlq_n_u8(v, 1); +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline unative_t MASK(unative_t v) +{ + const uint8x16_t temp = NBYTES(0); + return (unative_t)vcltq_s8((int8x16_t)v, (int8x16_t)temp); +} + +void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) +{ + uint8_t **dptr = (uint8_t **)ptrs; + uint8_t *p, *q; + int d, z, z0; + + register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + const unative_t x1d = NBYTES(0x1d); + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + wq$$ = wp$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]); + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]); + wp$$ = veorq_u8(wp$$, wd$$); + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + + w2$$ = vandq_u8(w2$$, x1d); + w1$$ = veorq_u8(w1$$, w2$$); + wq$$ = veorq_u8(w1$$, wd$$); + } + vst1q_u8(&p[d+NSIZE*$$], wp$$); + vst1q_u8(&q[d+NSIZE*$$], wq$$); + } +} diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 087332dbf8aa..28afa1a06e03 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -22,11 +22,23 @@ ifeq ($(ARCH),x86_64) IS_X86 = yes endif +ifeq ($(ARCH),arm) + CFLAGS += -I../../../arch/arm/include -mfpu=neon + HAS_NEON = yes +endif +ifeq ($(ARCH),arm64) + CFLAGS += -I../../../arch/arm64/include + HAS_NEON = yes +endif + ifeq ($(IS_X86),yes) OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ gcc -c -x assembler - >&/dev/null && \ rm ./-.o && echo -DCONFIG_AS_AVX2=1) +else ifeq ($(HAS_NEON),yes) + OBJS += neon.o neon1.o neon2.o neon4.o neon8.o + CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\ gcc -c -x c - >&/dev/null && \ @@ -55,6 +67,18 @@ raid6.a: $(OBJS) raid6test: test.c raid6.a $(CC) $(CFLAGS) -o raid6test $^ +neon1.c: neon.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=1 < neon.uc > $@ + +neon2.c: neon.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=2 < neon.uc > $@ + +neon4.c: neon.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=4 < neon.uc > $@ + +neon8.c: neon.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < neon.uc > $@ + altivec1.c: altivec.uc ../unroll.awk $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@ @@ -89,7 +113,7 @@ tables.c: mktables ./mktables > tables.c clean: - rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test + rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test spotless: clean rm -f *~ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index d23762e6652c..4e8686c7e5a4 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -870,13 +870,13 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, swiotlb_full(hwdev, sg->length, dir, 0); swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, attrs); - sgl[0].dma_length = 0; + sg_dma_len(sgl) = 0; return 0; } sg->dma_address = phys_to_dma(hwdev, map); } else sg->dma_address = dev_addr; - sg->dma_length = sg->length; + sg_dma_len(sg) = sg->length; } return nelems; } @@ -904,7 +904,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) - unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); + unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir); } EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); @@ -934,7 +934,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, for_each_sg(sgl, sg, nelems, i) swiotlb_sync_single(hwdev, sg->dma_address, - sg->dma_length, dir, target); + sg_dma_len(sg), dir, target); } void diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 739a36366b79..26559bdb4c49 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -26,6 +26,7 @@ #include <linux/math64.h> #include <linux/uaccess.h> #include <linux/ioport.h> +#include <linux/dcache.h> #include <net/addrconf.h> #include <asm/page.h> /* for PAGE_SIZE */ @@ -532,6 +533,81 @@ char *string(char *buf, char *end, const char *s, struct printf_spec spec) return buf; } +static void widen(char *buf, char *end, unsigned len, unsigned spaces) +{ + size_t size; + if (buf >= end) /* nowhere to put anything */ + return; + size = end - buf; + if (size <= spaces) { + memset(buf, ' ', size); + return; + } + if (len) { + if (len > size - spaces) + len = size - spaces; + memmove(buf + spaces, buf, len); + } + memset(buf, ' ', spaces); +} + +static noinline_for_stack +char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_spec spec, + const char *fmt) +{ + const char *array[4], *s; + const struct dentry *p; + int depth; + int i, n; + + switch (fmt[1]) { + case '2': case '3': case '4': + depth = fmt[1] - '0'; + break; + default: + depth = 1; + } + + rcu_read_lock(); + for (i = 0; i < depth; i++, d = p) { + p = ACCESS_ONCE(d->d_parent); + array[i] = ACCESS_ONCE(d->d_name.name); + if (p == d) { + if (i) + array[i] = ""; + i++; + break; + } + } + s = array[--i]; + for (n = 0; n != spec.precision; n++, buf++) { + char c = *s++; + if (!c) { + if (!i) + break; + c = '/'; + s = array[--i]; + } + if (buf < end) + *buf = c; + } + rcu_read_unlock(); + if (n < spec.field_width) { + /* we want to pad the sucker */ + unsigned spaces = spec.field_width - n; + if (!(spec.flags & LEFT)) { + widen(buf - n, end, n, spaces); + return buf + spaces; + } + while (spaces--) { + if (buf < end) + *buf = ' '; + ++buf; + } + } + return buf; +} + static noinline_for_stack char *symbol_string(char *buf, char *end, void *ptr, struct printf_spec spec, const char *fmt) @@ -1253,6 +1329,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, spec.base = 16; return number(buf, end, (unsigned long long) *((phys_addr_t *)ptr), spec); + case 'd': + return dentry_name(buf, end, ptr, spec, fmt); + case 'D': + return dentry_name(buf, end, + ((const struct file *)ptr)->f_path.dentry, + spec, fmt); } spec.flags |= SMALL; if (spec.field_width == -1) { |