Merge with Linus' 2.6 tree

author: Russell King <rmk@dyn-67.arm.linux.org.uk> 2005-07-28 10:30:20 +0200
committer: Russell King <rmk+kernel@arm.linux.org.uk> 2005-07-28 10:30:20 +0200
commit: 661299d9d0437a0ff72240f3d60016ac3a361a6e (patch)
tree: 765512576314fc3612b503f182b9ae4e60fcf849 /arch/cris/arch-v32/lib/checksum.S
parent: [SERIAL] Convert parport_serial to use new 8250_pci interfaces (diff)
parent: [PATCH] new alpha syscalls (diff)
download: linux-661299d9d0437a0ff72240f3d60016ac3a361a6e.tar.xz
linux-661299d9d0437a0ff72240f3d60016ac3a361a6e.zip
1 files changed, 111 insertions, 0 deletions
diff --git a/arch/cris/arch-v32/lib/checksum.S b/arch/cris/arch-v32/lib/checksum.S
new file mode 100644
index 000000000000..32e66181b826
--- /dev/null
+++ b/arch/cris/arch-v32/lib/checksum.S
@@ -0,0 +1,111 @@
+/*
+ * A fast checksum routine using movem
+ * Copyright (c) 1998-2001, 2003 Axis Communications AB
+ *
+ * csum_partial(const unsigned char * buff, int len, unsigned int sum)
+ */
+
+	.globl	csum_partial
+csum_partial:
+
+	;; r10 - src
+	;; r11 - length
+	;; r12 - checksum
+
+	;; check for breakeven length between movem and normal word looping versions
+	;; we also do _NOT_ want to compute a checksum over more than the
+	;; actual length when length < 40
+
+	cmpu.w	80,$r11
+	blo	_word_loop
+	nop
+
+	;; need to save the registers we use below in the movem loop
+	;; this overhead is why we have a check above for breakeven length
+	;; only r0 - r8 have to be saved, the other ones are clobber-able
+	;; according to the ABI
+
+	subq	9*4,$sp
+	subq	10*4,$r11	; update length for the first loop
+	movem	$r8,[$sp]
+
+	;; do a movem checksum
+
+_mloop:	movem	[$r10+],$r9	; read 10 longwords
+
+	;; perform dword checksumming on the 10 longwords
+
+	add.d	$r0,$r12
+	addc	$r1,$r12
+	addc	$r2,$r12
+	addc	$r3,$r12
+	addc	$r4,$r12
+	addc	$r5,$r12
+	addc	$r6,$r12
+	addc	$r7,$r12
+	addc	$r8,$r12
+	addc	$r9,$r12
+
+	;; fold the carry into the checksum, to avoid having to loop the carry
+	;; back into the top
+
+	addc	0,$r12
+	addc	0,$r12		; do it again, since we might have generated a carry
+
+	subq	10*4,$r11
+	bge	_mloop
+	nop
+
+	addq	10*4,$r11	; compensate for last loop underflowing length
+
+	movem	[$sp+],$r8	; restore regs
+
+_word_loop:
+	;; only fold if there is anything to fold.
+
+	cmpq	0,$r12
+	beq	_no_fold
+
+	;; fold 32-bit checksum into a 16-bit checksum, to avoid carries below.
+	;; r9 and r13 can be used as temporaries.
+
+	moveq	-1,$r9		; put 0xffff in r9, faster than move.d 0xffff,r9
+	lsrq	16,$r9
+
+	move.d	$r12,$r13
+	lsrq	16,$r13		; r13 = checksum >> 16
+	and.d	$r9,$r12		; checksum = checksum & 0xffff
+	add.d	$r13,$r12		; checksum += r13
+	move.d	$r12,$r13		; do the same again, maybe we got a carry last add
+	lsrq	16,$r13
+	and.d	$r9,$r12
+	add.d	$r13,$r12
+
+_no_fold:
+	cmpq	2,$r11
+	blt	_no_words
+	nop
+
+	;; checksum the rest of the words
+
+	subq	2,$r11
+
+_wloop:	subq	2,$r11
+	bge	_wloop
+	addu.w	[$r10+],$r12
+
+	addq	2,$r11
+
+_no_words:
+	;; see if we have one odd byte more
+	cmpq	1,$r11
+	beq	_do_byte
+	nop
+	ret
+	move.d	$r12,$r10
+
+_do_byte:
+	;; copy and checksum the last byte
+	addu.b	[$r10],$r12
+	ret
+	move.d	$r12,$r10
author	Russell King <rmk@dyn-67.arm.linux.org.uk>	2005-07-28 10:30:20 +0200
committer	Russell King <rmk+kernel@arm.linux.org.uk>	2005-07-28 10:30:20 +0200
commit	661299d9d0437a0ff72240f3d60016ac3a361a6e (patch)
tree	765512576314fc3612b503f182b9ae4e60fcf849 /arch/cris/arch-v32/lib/checksum.S
parent	[SERIAL] Convert parport_serial to use new 8250_pci interfaces (diff)
parent	[PATCH] new alpha syscalls (diff)
download	linux-661299d9d0437a0ff72240f3d60016ac3a361a6e.tar.xz linux-661299d9d0437a0ff72240f3d60016ac3a361a6e.zip