diff options
Diffstat (limited to 'arch')
-rw-r--r-- | arch/x86/lib/csum-copy_64.S | 240 |
1 files changed, 120 insertions, 120 deletions
diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index ebf753e48ba9..fb903b758da8 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -1,6 +1,6 @@ /* - * Copyright 2002,2003 Andi Kleen, SuSE Labs. - * + * Copyright 2002, 2003 Andi Kleen, SuSE Labs. + * * This file is subject to the terms and conditions of the GNU General Public * License. See the file COPYING in the main directory of this archive * for more details. No warranty for anything given at all. @@ -11,82 +11,82 @@ /* * Checksum copy with exception handling. - * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the + * On exceptions src_err_ptr or dst_err_ptr is set to -EFAULT and the * destination is zeroed. - * + * * Input * rdi source * rsi destination * edx len (32bit) - * ecx sum (32bit) + * ecx sum (32bit) * r8 src_err_ptr (int) * r9 dst_err_ptr (int) * * Output * eax 64bit sum. undefined in case of exception. - * - * Wrappers need to take care of valid exception sum and zeroing. + * + * Wrappers need to take care of valid exception sum and zeroing. * They also should align source or destination to 8 bytes. */ .macro source 10: - .section __ex_table,"a" + .section __ex_table, "a" .align 8 - .quad 10b,.Lbad_source + .quad 10b, .Lbad_source .previous .endm - + .macro dest 20: - .section __ex_table,"a" + .section __ex_table, "a" .align 8 - .quad 20b,.Lbad_dest + .quad 20b, .Lbad_dest .previous .endm - + .macro ignore L=.Lignore 30: - .section __ex_table,"a" + .section __ex_table, "a" .align 8 - .quad 30b,\L + .quad 30b, \L .previous .endm - - + + ENTRY(csum_partial_copy_generic) CFI_STARTPROC - cmpl $3*64,%edx - jle .Lignore + cmpl $3*64, %edx + jle .Lignore -.Lignore: - subq $7*8,%rsp +.Lignore: + subq $7*8, %rsp CFI_ADJUST_CFA_OFFSET 7*8 - movq %rbx,2*8(%rsp) + movq %rbx, 2*8(%rsp) CFI_REL_OFFSET rbx, 2*8 - movq %r12,3*8(%rsp) + movq %r12, 3*8(%rsp) CFI_REL_OFFSET r12, 3*8 - movq %r14,4*8(%rsp) + movq %r14, 4*8(%rsp) CFI_REL_OFFSET r14, 4*8 - movq %r13,5*8(%rsp) + movq %r13, 5*8(%rsp) CFI_REL_OFFSET r13, 5*8 - movq %rbp,6*8(%rsp) + movq %rbp, 6*8(%rsp) CFI_REL_OFFSET rbp, 6*8 - movq %r8,(%rsp) - movq %r9,1*8(%rsp) - - movl %ecx,%eax - movl %edx,%ecx + movq %r8, (%rsp) + movq %r9, 1*8(%rsp) - xorl %r9d,%r9d - movq %rcx,%r12 + movl %ecx, %eax + movl %edx, %ecx - shrq $6,%r12 - jz .Lhandle_tail /* < 64 */ + xorl %r9d, %r9d + movq %rcx, %r12 + + shrq $6, %r12 + jz .Lhandle_tail /* < 64 */ clc - + /* main loop. clear in 64 byte blocks */ /* r9: zero, r8: temp2, rbx: temp1, rax: sum, rcx: saved length */ /* r11: temp3, rdx: temp4, r12 loopcnt */ @@ -94,156 +94,156 @@ ENTRY(csum_partial_copy_generic) .p2align 4 .Lloop: source - movq (%rdi),%rbx + movq (%rdi), %rbx source - movq 8(%rdi),%r8 + movq 8(%rdi), %r8 source - movq 16(%rdi),%r11 + movq 16(%rdi), %r11 source - movq 24(%rdi),%rdx + movq 24(%rdi), %rdx source - movq 32(%rdi),%r10 + movq 32(%rdi), %r10 source - movq 40(%rdi),%rbp + movq 40(%rdi), %rbp source - movq 48(%rdi),%r14 + movq 48(%rdi), %r14 source - movq 56(%rdi),%r13 - + movq 56(%rdi), %r13 + ignore 2f prefetcht0 5*64(%rdi) -2: - adcq %rbx,%rax - adcq %r8,%rax - adcq %r11,%rax - adcq %rdx,%rax - adcq %r10,%rax - adcq %rbp,%rax - adcq %r14,%rax - adcq %r13,%rax +2: + adcq %rbx, %rax + adcq %r8, %rax + adcq %r11, %rax + adcq %rdx, %rax + adcq %r10, %rax + adcq %rbp, %rax + adcq %r14, %rax + adcq %r13, %rax decl %r12d - + dest - movq %rbx,(%rsi) + movq %rbx, (%rsi) dest - movq %r8,8(%rsi) + movq %r8, 8(%rsi) dest - movq %r11,16(%rsi) + movq %r11, 16(%rsi) dest - movq %rdx,24(%rsi) + movq %rdx, 24(%rsi) dest - movq %r10,32(%rsi) + movq %r10, 32(%rsi) dest - movq %rbp,40(%rsi) + movq %rbp, 40(%rsi) dest - movq %r14,48(%rsi) + movq %r14, 48(%rsi) dest - movq %r13,56(%rsi) - + movq %r13, 56(%rsi) + 3: - - leaq 64(%rdi),%rdi - leaq 64(%rsi),%rsi - jnz .Lloop + leaq 64(%rdi), %rdi + leaq 64(%rsi), %rsi - adcq %r9,%rax + jnz .Lloop + + adcq %r9, %rax /* do last up to 56 bytes */ .Lhandle_tail: /* ecx: count */ - movl %ecx,%r10d - andl $63,%ecx - shrl $3,%ecx - jz .Lfold + movl %ecx, %r10d + andl $63, %ecx + shrl $3, %ecx + jz .Lfold clc .p2align 4 -.Lloop_8: +.Lloop_8: source - movq (%rdi),%rbx - adcq %rbx,%rax + movq (%rdi), %rbx + adcq %rbx, %rax decl %ecx dest - movq %rbx,(%rsi) - leaq 8(%rsi),%rsi /* preserve carry */ - leaq 8(%rdi),%rdi + movq %rbx, (%rsi) + leaq 8(%rsi), %rsi /* preserve carry */ + leaq 8(%rdi), %rdi jnz .Lloop_8 - adcq %r9,%rax /* add in carry */ + adcq %r9, %rax /* add in carry */ .Lfold: /* reduce checksum to 32bits */ - movl %eax,%ebx - shrq $32,%rax - addl %ebx,%eax - adcl %r9d,%eax + movl %eax, %ebx + shrq $32, %rax + addl %ebx, %eax + adcl %r9d, %eax - /* do last up to 6 bytes */ + /* do last up to 6 bytes */ .Lhandle_7: - movl %r10d,%ecx - andl $7,%ecx - shrl $1,%ecx + movl %r10d, %ecx + andl $7, %ecx + shrl $1, %ecx jz .Lhandle_1 - movl $2,%edx - xorl %ebx,%ebx - clc + movl $2, %edx + xorl %ebx, %ebx + clc .p2align 4 -.Lloop_1: +.Lloop_1: source - movw (%rdi),%bx - adcl %ebx,%eax + movw (%rdi), %bx + adcl %ebx, %eax decl %ecx dest - movw %bx,(%rsi) - leaq 2(%rdi),%rdi - leaq 2(%rsi),%rsi + movw %bx, (%rsi) + leaq 2(%rdi), %rdi + leaq 2(%rsi), %rsi jnz .Lloop_1 - adcl %r9d,%eax /* add in carry */ - + adcl %r9d, %eax /* add in carry */ + /* handle last odd byte */ .Lhandle_1: - testl $1,%r10d + testl $1, %r10d jz .Lende - xorl %ebx,%ebx + xorl %ebx, %ebx source - movb (%rdi),%bl + movb (%rdi), %bl dest - movb %bl,(%rsi) - addl %ebx,%eax - adcl %r9d,%eax /* carry */ - + movb %bl, (%rsi) + addl %ebx, %eax + adcl %r9d, %eax /* carry */ + CFI_REMEMBER_STATE .Lende: - movq 2*8(%rsp),%rbx + movq 2*8(%rsp), %rbx CFI_RESTORE rbx - movq 3*8(%rsp),%r12 + movq 3*8(%rsp), %r12 CFI_RESTORE r12 - movq 4*8(%rsp),%r14 + movq 4*8(%rsp), %r14 CFI_RESTORE r14 - movq 5*8(%rsp),%r13 + movq 5*8(%rsp), %r13 CFI_RESTORE r13 - movq 6*8(%rsp),%rbp + movq 6*8(%rsp), %rbp CFI_RESTORE rbp - addq $7*8,%rsp + addq $7*8, %rsp CFI_ADJUST_CFA_OFFSET -7*8 ret CFI_RESTORE_STATE /* Exception handlers. Very simple, zeroing is done in the wrappers */ .Lbad_source: - movq (%rsp),%rax - testq %rax,%rax + movq (%rsp), %rax + testq %rax, %rax jz .Lende - movl $-EFAULT,(%rax) + movl $-EFAULT, (%rax) jmp .Lende - + .Lbad_dest: - movq 8(%rsp),%rax - testq %rax,%rax - jz .Lende - movl $-EFAULT,(%rax) + movq 8(%rsp), %rax + testq %rax, %rax + jz .Lende + movl $-EFAULT, (%rax) jmp .Lende CFI_ENDPROC ENDPROC(csum_partial_copy_generic) |