diff options
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/Makefile | 10 | ||||
-rw-r--r-- | arch/x86/lib/atomic64_32.c | 273 | ||||
-rw-r--r-- | arch/x86/lib/atomic64_386_32.S | 174 | ||||
-rw-r--r-- | arch/x86/lib/atomic64_cx8_32.S | 224 | ||||
-rw-r--r-- | arch/x86/lib/cache-smp.c | 19 | ||||
-rw-r--r-- | arch/x86/lib/copy_user_64.S | 6 | ||||
-rw-r--r-- | arch/x86/lib/io_64.c | 25 | ||||
-rw-r--r-- | arch/x86/lib/memcpy_64.S | 23 | ||||
-rw-r--r-- | arch/x86/lib/memset_64.S | 18 | ||||
-rw-r--r-- | arch/x86/lib/rwsem_64.S | 81 |
10 files changed, 569 insertions, 284 deletions
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index cffd754f3039..f871e04b6965 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -14,29 +14,31 @@ $(obj)/inat.o: $(obj)/inat-tables.c clean-files := inat-tables.c -obj-$(CONFIG_SMP) += msr-smp.o +obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o -lib-$(CONFIG_KPROBES) += insn.o inat.o +lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o obj-y += msr.o msr-reg.o msr-reg-export.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o + lib-y += atomic64_cx8_32.o lib-y += checksum_32.o lib-y += strstr_32.o lib-y += semaphore_32.o string_32.o ifneq ($(CONFIG_X86_CMPXCHG64),y) - lib-y += cmpxchg8b_emu.o + lib-y += cmpxchg8b_emu.o atomic64_386_32.o endif lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else - obj-y += io_64.o iomap_copy_64.o + obj-y += iomap_copy_64.o lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o + lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o endif diff --git a/arch/x86/lib/atomic64_32.c b/arch/x86/lib/atomic64_32.c index 824fa0be55a3..540179e8e9fa 100644 --- a/arch/x86/lib/atomic64_32.c +++ b/arch/x86/lib/atomic64_32.c @@ -6,225 +6,54 @@ #include <asm/cmpxchg.h> #include <asm/atomic.h> -static noinline u64 cmpxchg8b(u64 *ptr, u64 old, u64 new) -{ - u32 low = new; - u32 high = new >> 32; - - asm volatile( - LOCK_PREFIX "cmpxchg8b %1\n" - : "+A" (old), "+m" (*ptr) - : "b" (low), "c" (high) - ); - return old; -} - -u64 atomic64_cmpxchg(atomic64_t *ptr, u64 old_val, u64 new_val) -{ - return cmpxchg8b(&ptr->counter, old_val, new_val); -} -EXPORT_SYMBOL(atomic64_cmpxchg); - -/** - * atomic64_xchg - xchg atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign - * - * Atomically xchgs the value of @ptr to @new_val and returns - * the old value. - */ -u64 atomic64_xchg(atomic64_t *ptr, u64 new_val) -{ - /* - * Try first with a (possibly incorrect) assumption about - * what we have there. We'll do two loops most likely, - * but we'll get an ownership MESI transaction straight away - * instead of a read transaction followed by a - * flush-for-ownership transaction: - */ - u64 old_val, real_val = 0; - - do { - old_val = real_val; - - real_val = atomic64_cmpxchg(ptr, old_val, new_val); - - } while (real_val != old_val); - - return old_val; -} -EXPORT_SYMBOL(atomic64_xchg); - -/** - * atomic64_set - set atomic64 variable - * @ptr: pointer to type atomic64_t - * @new_val: value to assign - * - * Atomically sets the value of @ptr to @new_val. - */ -void atomic64_set(atomic64_t *ptr, u64 new_val) -{ - atomic64_xchg(ptr, new_val); -} -EXPORT_SYMBOL(atomic64_set); - -/** -EXPORT_SYMBOL(atomic64_read); - * atomic64_add_return - add and return - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr and returns @delta + *@ptr - */ -noinline u64 atomic64_add_return(u64 delta, atomic64_t *ptr) -{ - /* - * Try first with a (possibly incorrect) assumption about - * what we have there. We'll do two loops most likely, - * but we'll get an ownership MESI transaction straight away - * instead of a read transaction followed by a - * flush-for-ownership transaction: - */ - u64 old_val, new_val, real_val = 0; - - do { - old_val = real_val; - new_val = old_val + delta; - - real_val = atomic64_cmpxchg(ptr, old_val, new_val); - - } while (real_val != old_val); - - return new_val; -} -EXPORT_SYMBOL(atomic64_add_return); - -u64 atomic64_sub_return(u64 delta, atomic64_t *ptr) -{ - return atomic64_add_return(-delta, ptr); -} -EXPORT_SYMBOL(atomic64_sub_return); - -u64 atomic64_inc_return(atomic64_t *ptr) -{ - return atomic64_add_return(1, ptr); -} -EXPORT_SYMBOL(atomic64_inc_return); - -u64 atomic64_dec_return(atomic64_t *ptr) -{ - return atomic64_sub_return(1, ptr); -} -EXPORT_SYMBOL(atomic64_dec_return); - -/** - * atomic64_add - add integer to atomic64 variable - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr. - */ -void atomic64_add(u64 delta, atomic64_t *ptr) -{ - atomic64_add_return(delta, ptr); -} -EXPORT_SYMBOL(atomic64_add); - -/** - * atomic64_sub - subtract the atomic64 variable - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t - * - * Atomically subtracts @delta from @ptr. - */ -void atomic64_sub(u64 delta, atomic64_t *ptr) -{ - atomic64_add(-delta, ptr); -} -EXPORT_SYMBOL(atomic64_sub); - -/** - * atomic64_sub_and_test - subtract value from variable and test result - * @delta: integer value to subtract - * @ptr: pointer to type atomic64_t - * - * Atomically subtracts @delta from @ptr and returns - * true if the result is zero, or false for all - * other cases. - */ -int atomic64_sub_and_test(u64 delta, atomic64_t *ptr) -{ - u64 new_val = atomic64_sub_return(delta, ptr); - - return new_val == 0; -} -EXPORT_SYMBOL(atomic64_sub_and_test); - -/** - * atomic64_inc - increment atomic64 variable - * @ptr: pointer to type atomic64_t - * - * Atomically increments @ptr by 1. - */ -void atomic64_inc(atomic64_t *ptr) -{ - atomic64_add(1, ptr); -} -EXPORT_SYMBOL(atomic64_inc); - -/** - * atomic64_dec - decrement atomic64 variable - * @ptr: pointer to type atomic64_t - * - * Atomically decrements @ptr by 1. - */ -void atomic64_dec(atomic64_t *ptr) -{ - atomic64_sub(1, ptr); -} -EXPORT_SYMBOL(atomic64_dec); - -/** - * atomic64_dec_and_test - decrement and test - * @ptr: pointer to type atomic64_t - * - * Atomically decrements @ptr by 1 and - * returns true if the result is 0, or false for all other - * cases. - */ -int atomic64_dec_and_test(atomic64_t *ptr) -{ - return atomic64_sub_and_test(1, ptr); -} -EXPORT_SYMBOL(atomic64_dec_and_test); - -/** - * atomic64_inc_and_test - increment and test - * @ptr: pointer to type atomic64_t - * - * Atomically increments @ptr by 1 - * and returns true if the result is zero, or false for all - * other cases. - */ -int atomic64_inc_and_test(atomic64_t *ptr) -{ - return atomic64_sub_and_test(-1, ptr); -} -EXPORT_SYMBOL(atomic64_inc_and_test); - -/** - * atomic64_add_negative - add and test if negative - * @delta: integer value to add - * @ptr: pointer to type atomic64_t - * - * Atomically adds @delta to @ptr and returns true - * if the result is negative, or false when - * result is greater than or equal to zero. - */ -int atomic64_add_negative(u64 delta, atomic64_t *ptr) -{ - s64 new_val = atomic64_add_return(delta, ptr); - - return new_val < 0; -} -EXPORT_SYMBOL(atomic64_add_negative); +long long atomic64_read_cx8(long long, const atomic64_t *v); +EXPORT_SYMBOL(atomic64_read_cx8); +long long atomic64_set_cx8(long long, const atomic64_t *v); +EXPORT_SYMBOL(atomic64_set_cx8); +long long atomic64_xchg_cx8(long long, unsigned high); +EXPORT_SYMBOL(atomic64_xchg_cx8); +long long atomic64_add_return_cx8(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_add_return_cx8); +long long atomic64_sub_return_cx8(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_sub_return_cx8); +long long atomic64_inc_return_cx8(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_inc_return_cx8); +long long atomic64_dec_return_cx8(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_dec_return_cx8); +long long atomic64_dec_if_positive_cx8(atomic64_t *v); +EXPORT_SYMBOL(atomic64_dec_if_positive_cx8); +int atomic64_inc_not_zero_cx8(atomic64_t *v); +EXPORT_SYMBOL(atomic64_inc_not_zero_cx8); +int atomic64_add_unless_cx8(atomic64_t *v, long long a, long long u); +EXPORT_SYMBOL(atomic64_add_unless_cx8); + +#ifndef CONFIG_X86_CMPXCHG64 +long long atomic64_read_386(long long, const atomic64_t *v); +EXPORT_SYMBOL(atomic64_read_386); +long long atomic64_set_386(long long, const atomic64_t *v); +EXPORT_SYMBOL(atomic64_set_386); +long long atomic64_xchg_386(long long, unsigned high); +EXPORT_SYMBOL(atomic64_xchg_386); +long long atomic64_add_return_386(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_add_return_386); +long long atomic64_sub_return_386(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_sub_return_386); +long long atomic64_inc_return_386(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_inc_return_386); +long long atomic64_dec_return_386(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_dec_return_386); +long long atomic64_add_386(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_add_386); +long long atomic64_sub_386(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_sub_386); +long long atomic64_inc_386(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_inc_386); +long long atomic64_dec_386(long long a, atomic64_t *v); +EXPORT_SYMBOL(atomic64_dec_386); +long long atomic64_dec_if_positive_386(atomic64_t *v); +EXPORT_SYMBOL(atomic64_dec_if_positive_386); +int atomic64_inc_not_zero_386(atomic64_t *v); +EXPORT_SYMBOL(atomic64_inc_not_zero_386); +int atomic64_add_unless_386(atomic64_t *v, long long a, long long u); +EXPORT_SYMBOL(atomic64_add_unless_386); +#endif diff --git a/arch/x86/lib/atomic64_386_32.S b/arch/x86/lib/atomic64_386_32.S new file mode 100644 index 000000000000..4a5979aa6883 --- /dev/null +++ b/arch/x86/lib/atomic64_386_32.S @@ -0,0 +1,174 @@ +/* + * atomic64_t for 386/486 + * + * Copyright © 2010 Luca Barbieri + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/linkage.h> +#include <asm/alternative-asm.h> +#include <asm/dwarf2.h> + +/* if you want SMP support, implement these with real spinlocks */ +.macro LOCK reg + pushfl + CFI_ADJUST_CFA_OFFSET 4 + cli +.endm + +.macro UNLOCK reg + popfl + CFI_ADJUST_CFA_OFFSET -4 +.endm + +.macro BEGIN func reg +$v = \reg + +ENTRY(atomic64_\func\()_386) + CFI_STARTPROC + LOCK $v + +.macro RETURN + UNLOCK $v + ret +.endm + +.macro END_ + CFI_ENDPROC +ENDPROC(atomic64_\func\()_386) +.purgem RETURN +.purgem END_ +.purgem END +.endm + +.macro END +RETURN +END_ +.endm +.endm + +BEGIN read %ecx + movl ($v), %eax + movl 4($v), %edx +END + +BEGIN set %esi + movl %ebx, ($v) + movl %ecx, 4($v) +END + +BEGIN xchg %esi + movl ($v), %eax + movl 4($v), %edx + movl %ebx, ($v) + movl %ecx, 4($v) +END + +BEGIN add %ecx + addl %eax, ($v) + adcl %edx, 4($v) +END + +BEGIN add_return %ecx + addl ($v), %eax + adcl 4($v), %edx + movl %eax, ($v) + movl %edx, 4($v) +END + +BEGIN sub %ecx + subl %eax, ($v) + sbbl %edx, 4($v) +END + +BEGIN sub_return %ecx + negl %edx + negl %eax + sbbl $0, %edx + addl ($v), %eax + adcl 4($v), %edx + movl %eax, ($v) + movl %edx, 4($v) +END + +BEGIN inc %esi + addl $1, ($v) + adcl $0, 4($v) +END + +BEGIN inc_return %esi + movl ($v), %eax + movl 4($v), %edx + addl $1, %eax + adcl $0, %edx + movl %eax, ($v) + movl %edx, 4($v) +END + +BEGIN dec %esi + subl $1, ($v) + sbbl $0, 4($v) +END + +BEGIN dec_return %esi + movl ($v), %eax + movl 4($v), %edx + subl $1, %eax + sbbl $0, %edx + movl %eax, ($v) + movl %edx, 4($v) +END + +BEGIN add_unless %ecx + addl %eax, %esi + adcl %edx, %edi + addl ($v), %eax + adcl 4($v), %edx + cmpl %eax, %esi + je 3f +1: + movl %eax, ($v) + movl %edx, 4($v) + movl $1, %eax +2: +RETURN +3: + cmpl %edx, %edi + jne 1b + xorl %eax, %eax + jmp 2b +END_ + +BEGIN inc_not_zero %esi + movl ($v), %eax + movl 4($v), %edx + testl %eax, %eax + je 3f +1: + addl $1, %eax + adcl $0, %edx + movl %eax, ($v) + movl %edx, 4($v) + movl $1, %eax +2: +RETURN +3: + testl %edx, %edx + jne 1b + jmp 2b +END_ + +BEGIN dec_if_positive %esi + movl ($v), %eax + movl 4($v), %edx + subl $1, %eax + sbbl $0, %edx + js 1f + movl %eax, ($v) + movl %edx, 4($v) +1: +END diff --git a/arch/x86/lib/atomic64_cx8_32.S b/arch/x86/lib/atomic64_cx8_32.S new file mode 100644 index 000000000000..71e080de3352 --- /dev/null +++ b/arch/x86/lib/atomic64_cx8_32.S @@ -0,0 +1,224 @@ +/* + * atomic64_t for 586+ + * + * Copyright © 2010 Luca Barbieri + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/linkage.h> +#include <asm/alternative-asm.h> +#include <asm/dwarf2.h> + +.macro SAVE reg + pushl %\reg + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET \reg, 0 +.endm + +.macro RESTORE reg + popl %\reg + CFI_ADJUST_CFA_OFFSET -4 + CFI_RESTORE \reg +.endm + +.macro read64 reg + movl %ebx, %eax + movl %ecx, %edx +/* we need LOCK_PREFIX since otherwise cmpxchg8b always does the write */ + LOCK_PREFIX + cmpxchg8b (\reg) +.endm + +ENTRY(atomic64_read_cx8) + CFI_STARTPROC + + read64 %ecx + ret + CFI_ENDPROC +ENDPROC(atomic64_read_cx8) + +ENTRY(atomic64_set_cx8) + CFI_STARTPROC + +1: +/* we don't need LOCK_PREFIX since aligned 64-bit writes + * are atomic on 586 and newer */ + cmpxchg8b (%esi) + jne 1b + + ret + CFI_ENDPROC +ENDPROC(atomic64_set_cx8) + +ENTRY(atomic64_xchg_cx8) + CFI_STARTPROC + + movl %ebx, %eax + movl %ecx, %edx +1: + LOCK_PREFIX + cmpxchg8b (%esi) + jne 1b + + ret + CFI_ENDPROC +ENDPROC(atomic64_xchg_cx8) + +.macro addsub_return func ins insc +ENTRY(atomic64_\func\()_return_cx8) + CFI_STARTPROC + SAVE ebp + SAVE ebx + SAVE esi + SAVE edi + + movl %eax, %esi + movl %edx, %edi + movl %ecx, %ebp + + read64 %ebp +1: + movl %eax, %ebx + movl %edx, %ecx + \ins\()l %esi, %ebx + \insc\()l %edi, %ecx + LOCK_PREFIX + cmpxchg8b (%ebp) + jne 1b + +10: + movl %ebx, %eax + movl %ecx, %edx + RESTORE edi + RESTORE esi + RESTORE ebx + RESTORE ebp + ret + CFI_ENDPROC +ENDPROC(atomic64_\func\()_return_cx8) +.endm + +addsub_return add add adc +addsub_return sub sub sbb + +.macro incdec_return func ins insc +ENTRY(atomic64_\func\()_return_cx8) + CFI_STARTPROC + SAVE ebx + + read64 %esi +1: + movl %eax, %ebx + movl %edx, %ecx + \ins\()l $1, %ebx + \insc\()l $0, %ecx + LOCK_PREFIX + cmpxchg8b (%esi) + jne 1b + +10: + movl %ebx, %eax + movl %ecx, %edx + RESTORE ebx + ret + CFI_ENDPROC +ENDPROC(atomic64_\func\()_return_cx8) +.endm + +incdec_return inc add adc +incdec_return dec sub sbb + +ENTRY(atomic64_dec_if_positive_cx8) + CFI_STARTPROC + SAVE ebx + + read64 %esi +1: + movl %eax, %ebx + movl %edx, %ecx + subl $1, %ebx + sbb $0, %ecx + js 2f + LOCK_PREFIX + cmpxchg8b (%esi) + jne 1b + +2: + movl %ebx, %eax + movl %ecx, %edx + RESTORE ebx + ret + CFI_ENDPROC +ENDPROC(atomic64_dec_if_positive_cx8) + +ENTRY(atomic64_add_unless_cx8) + CFI_STARTPROC + SAVE ebp + SAVE ebx +/* these just push these two parameters on the stack */ + SAVE edi + SAVE esi + + movl %ecx, %ebp + movl %eax, %esi + movl %edx, %edi + + read64 %ebp +1: + cmpl %eax, 0(%esp) + je 4f +2: + movl %eax, %ebx + movl %edx, %ecx + addl %esi, %ebx + adcl %edi, %ecx + LOCK_PREFIX + cmpxchg8b (%ebp) + jne 1b + + movl $1, %eax +3: + addl $8, %esp + CFI_ADJUST_CFA_OFFSET -8 + RESTORE ebx + RESTORE ebp + ret +4: + cmpl %edx, 4(%esp) + jne 2b + xorl %eax, %eax + jmp 3b + CFI_ENDPROC +ENDPROC(atomic64_add_unless_cx8) + +ENTRY(atomic64_inc_not_zero_cx8) + CFI_STARTPROC + SAVE ebx + + read64 %esi +1: + testl %eax, %eax + je 4f +2: + movl %eax, %ebx + movl %edx, %ecx + addl $1, %ebx + adcl $0, %ecx + LOCK_PREFIX + cmpxchg8b (%esi) + jne 1b + + movl $1, %eax +3: + RESTORE ebx + ret +4: + testl %edx, %edx + jne 2b + jmp 3b + CFI_ENDPROC +ENDPROC(atomic64_inc_not_zero_cx8) diff --git a/arch/x86/lib/cache-smp.c b/arch/x86/lib/cache-smp.c new file mode 100644 index 000000000000..a3c668875038 --- /dev/null +++ b/arch/x86/lib/cache-smp.c @@ -0,0 +1,19 @@ +#include <linux/smp.h> +#include <linux/module.h> + +static void __wbinvd(void *dummy) +{ + wbinvd(); +} + +void wbinvd_on_cpu(int cpu) +{ + smp_call_function_single(cpu, __wbinvd, NULL, 1); +} +EXPORT_SYMBOL(wbinvd_on_cpu); + +int wbinvd_on_all_cpus(void) +{ + return on_each_cpu(__wbinvd, NULL, 1); +} +EXPORT_SYMBOL(wbinvd_on_all_cpus); diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S index cf889d4e076a..71100c98e337 100644 --- a/arch/x86/lib/copy_user_64.S +++ b/arch/x86/lib/copy_user_64.S @@ -90,12 +90,6 @@ ENTRY(_copy_from_user) CFI_ENDPROC ENDPROC(_copy_from_user) -ENTRY(copy_user_generic) - CFI_STARTPROC - ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string - CFI_ENDPROC -ENDPROC(copy_user_generic) - .section .fixup,"ax" /* must zero dest */ ENTRY(bad_from_user) diff --git a/arch/x86/lib/io_64.c b/arch/x86/lib/io_64.c deleted file mode 100644 index 3f1eb59b5f08..000000000000 --- a/arch/x86/lib/io_64.c +++ /dev/null @@ -1,25 +0,0 @@ -#include <linux/string.h> -#include <linux/module.h> -#include <asm/io.h> - -void __memcpy_toio(unsigned long dst, const void *src, unsigned len) -{ - __inline_memcpy((void *)dst, src, len); -} -EXPORT_SYMBOL(__memcpy_toio); - -void __memcpy_fromio(void *dst, unsigned long src, unsigned len) -{ - __inline_memcpy(dst, (const void *)src, len); -} -EXPORT_SYMBOL(__memcpy_fromio); - -void memset_io(volatile void __iomem *a, int b, size_t c) -{ - /* - * TODO: memset can mangle the IO patterns quite a bit. - * perhaps it would be better to use a dumb one: - */ - memset((void *)a, b, c); -} -EXPORT_SYMBOL(memset_io); diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index ad5441ed1b57..f82e884928af 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -20,12 +20,11 @@ /* * memcpy_c() - fast string ops (REP MOVSQ) based variant. * - * Calls to this get patched into the kernel image via the + * This gets patched over the unrolled variant (below) via the * alternative instructions framework: */ - ALIGN -memcpy_c: - CFI_STARTPROC + .section .altinstr_replacement, "ax", @progbits +.Lmemcpy_c: movq %rdi, %rax movl %edx, %ecx @@ -35,8 +34,8 @@ memcpy_c: movl %edx, %ecx rep movsb ret - CFI_ENDPROC -ENDPROC(memcpy_c) +.Lmemcpy_e: + .previous ENTRY(__memcpy) ENTRY(memcpy) @@ -128,16 +127,10 @@ ENDPROC(__memcpy) * It is also a lot simpler. Use this when possible: */ - .section .altinstr_replacement, "ax" -1: .byte 0xeb /* jmp <disp8> */ - .byte (memcpy_c - memcpy) - (2f - 1b) /* offset */ -2: - .previous - .section .altinstructions, "a" .align 8 .quad memcpy - .quad 1b + .quad .Lmemcpy_c .byte X86_FEATURE_REP_GOOD /* @@ -145,6 +138,6 @@ ENDPROC(__memcpy) * so it is silly to overwrite itself with nops - reboot is the * only outcome... */ - .byte 2b - 1b - .byte 2b - 1b + .byte .Lmemcpy_e - .Lmemcpy_c + .byte .Lmemcpy_e - .Lmemcpy_c .previous diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index 2c5948116bd2..e88d3b81644a 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -12,9 +12,8 @@ * * rax original destination */ - ALIGN -memset_c: - CFI_STARTPROC + .section .altinstr_replacement, "ax", @progbits +.Lmemset_c: movq %rdi,%r9 movl %edx,%r8d andl $7,%r8d @@ -29,8 +28,8 @@ memset_c: rep stosb movq %r9,%rax ret - CFI_ENDPROC -ENDPROC(memset_c) +.Lmemset_e: + .previous ENTRY(memset) ENTRY(__memset) @@ -118,16 +117,11 @@ ENDPROC(__memset) #include <asm/cpufeature.h> - .section .altinstr_replacement,"ax" -1: .byte 0xeb /* jmp <disp8> */ - .byte (memset_c - memset) - (2f - 1b) /* offset */ -2: - .previous .section .altinstructions,"a" .align 8 .quad memset - .quad 1b + .quad .Lmemset_c .byte X86_FEATURE_REP_GOOD .byte .Lfinal - memset - .byte 2b - 1b + .byte .Lmemset_e - .Lmemset_c .previous diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S new file mode 100644 index 000000000000..41fcf00e49df --- /dev/null +++ b/arch/x86/lib/rwsem_64.S @@ -0,0 +1,81 @@ +/* + * x86-64 rwsem wrappers + * + * This interfaces the inline asm code to the slow-path + * C routines. We need to save the call-clobbered regs + * that the asm does not mark as clobbered, and move the + * argument from %rax to %rdi. + * + * NOTE! We don't need to save %rax, because the functions + * will always return the semaphore pointer in %rax (which + * is also the input argument to these helpers) + * + * The following can clobber %rdx because the asm clobbers it: + * call_rwsem_down_write_failed + * call_rwsem_wake + * but %rdi, %rsi, %rcx, %r8-r11 always need saving. + */ + +#include <linux/linkage.h> +#include <asm/rwlock.h> +#include <asm/alternative-asm.h> +#include <asm/frame.h> +#include <asm/dwarf2.h> + +#define save_common_regs \ + pushq %rdi; \ + pushq %rsi; \ + pushq %rcx; \ + pushq %r8; \ + pushq %r9; \ + pushq %r10; \ + pushq %r11 + +#define restore_common_regs \ + popq %r11; \ + popq %r10; \ + popq %r9; \ + popq %r8; \ + popq %rcx; \ + popq %rsi; \ + popq %rdi + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_down_read_failed) + save_common_regs + pushq %rdx + movq %rax,%rdi + call rwsem_down_read_failed + popq %rdx + restore_common_regs + ret + ENDPROC(call_rwsem_down_read_failed) + +ENTRY(call_rwsem_down_write_failed) + save_common_regs + movq %rax,%rdi + call rwsem_down_write_failed + restore_common_regs + ret + ENDPROC(call_rwsem_down_write_failed) + +ENTRY(call_rwsem_wake) + decl %edx /* do nothing if still outstanding active readers */ + jnz 1f + save_common_regs + movq %rax,%rdi + call rwsem_wake + restore_common_regs +1: ret + ENDPROC(call_rwsem_wake) + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_downgrade_wake) + save_common_regs + pushq %rdx + movq %rax,%rdi + call rwsem_downgrade_wake + popq %rdx + restore_common_regs + ret + ENDPROC(call_rwsem_downgrade_wake) |