diff options
author | Paul Mackerras <paulus@samba.org> | 2008-06-09 06:01:46 +0200 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2008-06-10 13:40:22 +0200 |
commit | 917f0af9e5a9ceecf9e72537fabb501254ba321d (patch) | |
tree | 1ef207755c6d83ce4af93ef2b5e4645eebd65886 /arch/ppc/lib | |
parent | powerpc: Improve (in|out)_[bl]eXX() asm code (diff) | |
download | linux-917f0af9e5a9ceecf9e72537fabb501254ba321d.tar.xz linux-917f0af9e5a9ceecf9e72537fabb501254ba321d.zip |
powerpc: Remove arch/ppc and include/asm-ppc
All the maintained platforms are now in arch/powerpc, so the old
arch/ppc stuff can now go away.
Acked-by: Adrian Bunk <bunk@kernel.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Becky Bruce <becky.bruce@freescale.com>
Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Grant Likely <grant.likely@secretlab.ca>
Acked-by: Jochen Friedrich <jochen@scram.de>
Acked-by: John Linn <john.linn@xilinx.com>
Acked-by: Jon Loeliger <jdl@freescale.com>
Acked-by: Josh Boyer <jwboyer@linux.vnet.ibm.com>
Acked-by: Kumar Gala <galak@kernel.crashing.org>
Acked-by: Olof Johansson <olof@lixom.net>
Acked-by: Peter Korsgaard <jacmet@sunsite.dk>
Acked-by: Scott Wood <scottwood@freescale.com>
Acked-by: Sean MacLennan <smaclennan@pikatech.com>
Acked-by: Segher Boessenkool <segher@kernel.crashing.org>
Acked-by: Stefan Roese <sr@denx.de>
Acked-by: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Acked-by: Wolfgang Denk <wd@denx.de>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Diffstat (limited to 'arch/ppc/lib')
-rw-r--r-- | arch/ppc/lib/Makefile | 5 | ||||
-rw-r--r-- | arch/ppc/lib/checksum.S | 225 | ||||
-rw-r--r-- | arch/ppc/lib/div64.S | 58 | ||||
-rw-r--r-- | arch/ppc/lib/locks.c | 189 | ||||
-rw-r--r-- | arch/ppc/lib/string.S | 732 |
5 files changed, 0 insertions, 1209 deletions
diff --git a/arch/ppc/lib/Makefile b/arch/ppc/lib/Makefile deleted file mode 100644 index 095e661e79dd..000000000000 --- a/arch/ppc/lib/Makefile +++ /dev/null @@ -1,5 +0,0 @@ -# -# Makefile for ppc-specific library files.. -# - -obj-y := checksum.o string.o div64.o diff --git a/arch/ppc/lib/checksum.S b/arch/ppc/lib/checksum.S deleted file mode 100644 index 7874e8a80455..000000000000 --- a/arch/ppc/lib/checksum.S +++ /dev/null @@ -1,225 +0,0 @@ -/* - * This file contains assembly-language implementations - * of IP-style 1's complement checksum routines. - * - * Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Severely hacked about by Paul Mackerras (paulus@cs.anu.edu.au). - */ - -#include <linux/sys.h> -#include <asm/processor.h> -#include <asm/errno.h> -#include <asm/ppc_asm.h> - - .text - -/* - * ip_fast_csum(buf, len) -- Optimized for IP header - * len is in words and is always >= 5. - */ -_GLOBAL(ip_fast_csum) - lwz r0,0(r3) - lwzu r5,4(r3) - addic. r4,r4,-2 - addc r0,r0,r5 - mtctr r4 - blelr- -1: lwzu r4,4(r3) - adde r0,r0,r4 - bdnz 1b - addze r0,r0 /* add in final carry */ - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwi r3,r3,16 - blr - -/* - * Compute checksum of TCP or UDP pseudo-header: - * csum_tcpudp_magic(saddr, daddr, len, proto, sum) - */ -_GLOBAL(csum_tcpudp_magic) - rlwimi r5,r6,16,0,15 /* put proto in upper half of len */ - addc r0,r3,r4 /* add 4 32-bit words together */ - adde r0,r0,r5 - adde r0,r0,r7 - addze r0,r0 /* add in final carry */ - rlwinm r3,r0,16,0,31 /* fold two halves together */ - add r3,r0,r3 - not r3,r3 - srwi r3,r3,16 - blr - -/* - * computes the checksum of a memory block at buff, length len, - * and adds in "sum" (32-bit) - * - * csum_partial(buff, len, sum) - */ -_GLOBAL(csum_partial) - addic r0,r5,0 - subi r3,r3,4 - srwi. r6,r4,2 - beq 3f /* if we're doing < 4 bytes */ - andi. r5,r3,2 /* Align buffer to longword boundary */ - beq+ 1f - lhz r5,4(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 - subi r4,r4,2 - addc r0,r0,r5 - srwi. r6,r4,2 /* # words to do */ - beq 3f -1: mtctr r6 -2: lwzu r5,4(r3) /* the bdnz has zero overhead, so it should */ - adde r0,r0,r5 /* be unnecessary to unroll this loop */ - bdnz 2b - andi. r4,r4,3 -3: cmpwi 0,r4,2 - blt+ 4f - lhz r5,4(r3) - addi r3,r3,2 - subi r4,r4,2 - adde r0,r0,r5 -4: cmpwi 0,r4,1 - bne+ 5f - lbz r5,4(r3) - slwi r5,r5,8 /* Upper byte of word */ - adde r0,r0,r5 -5: addze r3,r0 /* add in final carry */ - blr - -/* - * Computes the checksum of a memory block at src, length len, - * and adds in "sum" (32-bit), while copying the block to dst. - * If an access exception occurs on src or dst, it stores -EFAULT - * to *src_err or *dst_err respectively, and (for an error on - * src) zeroes the rest of dst. - * - * csum_partial_copy_generic(src, dst, len, sum, src_err, dst_err) - */ -_GLOBAL(csum_partial_copy_generic) - addic r0,r6,0 - subi r3,r3,4 - subi r4,r4,4 - srwi. r6,r5,2 - beq 3f /* if we're doing < 4 bytes */ - andi. r9,r4,2 /* Align dst to longword boundary */ - beq+ 1f -81: lhz r6,4(r3) /* do 2 bytes to get aligned */ - addi r3,r3,2 - subi r5,r5,2 -91: sth r6,4(r4) - addi r4,r4,2 - addc r0,r0,r6 - srwi. r6,r5,2 /* # words to do */ - beq 3f -1: srwi. r6,r5,4 /* # groups of 4 words to do */ - beq 10f - mtctr r6 -71: lwz r6,4(r3) -72: lwz r9,8(r3) -73: lwz r10,12(r3) -74: lwzu r11,16(r3) - adde r0,r0,r6 -75: stw r6,4(r4) - adde r0,r0,r9 -76: stw r9,8(r4) - adde r0,r0,r10 -77: stw r10,12(r4) - adde r0,r0,r11 -78: stwu r11,16(r4) - bdnz 71b -10: rlwinm. r6,r5,30,30,31 /* # words left to do */ - beq 13f - mtctr r6 -82: lwzu r9,4(r3) -92: stwu r9,4(r4) - adde r0,r0,r9 - bdnz 82b -13: andi. r5,r5,3 -3: cmpwi 0,r5,2 - blt+ 4f -83: lhz r6,4(r3) - addi r3,r3,2 - subi r5,r5,2 -93: sth r6,4(r4) - addi r4,r4,2 - adde r0,r0,r6 -4: cmpwi 0,r5,1 - bne+ 5f -84: lbz r6,4(r3) -94: stb r6,4(r4) - slwi r6,r6,8 /* Upper byte of word */ - adde r0,r0,r6 -5: addze r3,r0 /* add in final carry */ - blr - -/* These shouldn't go in the fixup section, since that would - cause the ex_table addresses to get out of order. */ - -src_error_4: - mfctr r6 /* update # bytes remaining from ctr */ - rlwimi r5,r6,4,0,27 - b 79f -src_error_1: - li r6,0 - subi r5,r5,2 -95: sth r6,4(r4) - addi r4,r4,2 -79: srwi. r6,r5,2 - beq 3f - mtctr r6 -src_error_2: - li r6,0 -96: stwu r6,4(r4) - bdnz 96b -3: andi. r5,r5,3 - beq src_error -src_error_3: - li r6,0 - mtctr r5 - addi r4,r4,3 -97: stbu r6,1(r4) - bdnz 97b -src_error: - cmpwi 0,r7,0 - beq 1f - li r6,-EFAULT - stw r6,0(r7) -1: addze r3,r0 - blr - -dst_error: - cmpwi 0,r8,0 - beq 1f - li r6,-EFAULT - stw r6,0(r8) -1: addze r3,r0 - blr - -.section __ex_table,"a" - .long 81b,src_error_1 - .long 91b,dst_error - .long 71b,src_error_4 - .long 72b,src_error_4 - .long 73b,src_error_4 - .long 74b,src_error_4 - .long 75b,dst_error - .long 76b,dst_error - .long 77b,dst_error - .long 78b,dst_error - .long 82b,src_error_2 - .long 92b,dst_error - .long 83b,src_error_3 - .long 93b,dst_error - .long 84b,src_error_3 - .long 94b,dst_error - .long 95b,dst_error - .long 96b,dst_error - .long 97b,dst_error diff --git a/arch/ppc/lib/div64.S b/arch/ppc/lib/div64.S deleted file mode 100644 index 3527569e9926..000000000000 --- a/arch/ppc/lib/div64.S +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Divide a 64-bit unsigned number by a 32-bit unsigned number. - * This routine assumes that the top 32 bits of the dividend are - * non-zero to start with. - * On entry, r3 points to the dividend, which get overwritten with - * the 64-bit quotient, and r4 contains the divisor. - * On exit, r3 contains the remainder. - * - * Copyright (C) 2002 Paul Mackerras, IBM Corp. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <asm/ppc_asm.h> -#include <asm/processor.h> - -_GLOBAL(__div64_32) - lwz r5,0(r3) # get the dividend into r5/r6 - lwz r6,4(r3) - cmplw r5,r4 - li r7,0 - li r8,0 - blt 1f - divwu r7,r5,r4 # if dividend.hi >= divisor, - mullw r0,r7,r4 # quotient.hi = dividend.hi / divisor - subf. r5,r0,r5 # dividend.hi %= divisor - beq 3f -1: mr r11,r5 # here dividend.hi != 0 - andis. r0,r5,0xc000 - bne 2f - cntlzw r0,r5 # we are shifting the dividend right - li r10,-1 # to make it < 2^32, and shifting - srw r10,r10,r0 # the divisor right the same amount, - add r9,r4,r10 # rounding up (so the estimate cannot - andc r11,r6,r10 # ever be too large, only too small) - andc r9,r9,r10 - or r11,r5,r11 - rotlw r9,r9,r0 - rotlw r11,r11,r0 - divwu r11,r11,r9 # then we divide the shifted quantities -2: mullw r10,r11,r4 # to get an estimate of the quotient, - mulhwu r9,r11,r4 # multiply the estimate by the divisor, - subfc r6,r10,r6 # take the product from the divisor, - add r8,r8,r11 # and add the estimate to the accumulated - subfe. r5,r9,r5 # quotient - bne 1b -3: cmplw r6,r4 - blt 4f - divwu r0,r6,r4 # perform the remaining 32-bit division - mullw r10,r0,r4 # and get the remainder - add r8,r8,r0 - subf r6,r10,r6 -4: stw r7,0(r3) # return the quotient in *r3 - stw r8,4(r3) - mr r3,r6 # return the remainder in r3 - blr diff --git a/arch/ppc/lib/locks.c b/arch/ppc/lib/locks.c deleted file mode 100644 index ea4aee6b20e6..000000000000 --- a/arch/ppc/lib/locks.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - * Locks for smp ppc - * - * Written by Cort Dougan (cort@cs.nmt.edu) - */ - -#include <linux/sched.h> -#include <linux/spinlock.h> -#include <linux/module.h> -#include <asm/ppc_asm.h> -#include <asm/smp.h> - -#ifdef CONFIG_DEBUG_SPINLOCK - -#undef INIT_STUCK -#define INIT_STUCK 200000000 /*0xffffffff*/ - -/* - * Try to acquire a spinlock. - * Only does the stwcx. if the load returned 0 - the Programming - * Environments Manual suggests not doing unnecessary stcwx.'s - * since they may inhibit forward progress by other CPUs in getting - * a lock. - */ -static inline unsigned long __spin_trylock(volatile unsigned long *lock) -{ - unsigned long ret; - - __asm__ __volatile__ ("\n\ -1: lwarx %0,0,%1\n\ - cmpwi 0,%0,0\n\ - bne 2f\n" - PPC405_ERR77(0,%1) -" stwcx. %2,0,%1\n\ - bne- 1b\n\ - isync\n\ -2:" - : "=&r"(ret) - : "r"(lock), "r"(1) - : "cr0", "memory"); - - return ret; -} - -void _raw_spin_lock(spinlock_t *lock) -{ - int cpu = smp_processor_id(); - unsigned int stuck = INIT_STUCK; - while (__spin_trylock(&lock->lock)) { - while ((unsigned volatile long)lock->lock != 0) { - if (!--stuck) { - printk("_spin_lock(%p) CPU#%d NIP %p" - " holder: cpu %ld pc %08lX\n", - lock, cpu, __builtin_return_address(0), - lock->owner_cpu,lock->owner_pc); - stuck = INIT_STUCK; - /* steal the lock */ - /*xchg_u32((void *)&lock->lock,0);*/ - } - } - } - lock->owner_pc = (unsigned long)__builtin_return_address(0); - lock->owner_cpu = cpu; -} -EXPORT_SYMBOL(_raw_spin_lock); - -int _raw_spin_trylock(spinlock_t *lock) -{ - if (__spin_trylock(&lock->lock)) - return 0; - lock->owner_cpu = smp_processor_id(); - lock->owner_pc = (unsigned long)__builtin_return_address(0); - return 1; -} -EXPORT_SYMBOL(_raw_spin_trylock); - -void _raw_spin_unlock(spinlock_t *lp) -{ - if ( !lp->lock ) - printk("_spin_unlock(%p): no lock cpu %d curr PC %p %s/%d\n", - lp, smp_processor_id(), __builtin_return_address(0), - current->comm, current->pid); - if ( lp->owner_cpu != smp_processor_id() ) - printk("_spin_unlock(%p): cpu %d trying clear of cpu %d pc %lx val %lx\n", - lp, smp_processor_id(), (int)lp->owner_cpu, - lp->owner_pc,lp->lock); - lp->owner_pc = lp->owner_cpu = 0; - wmb(); - lp->lock = 0; -} -EXPORT_SYMBOL(_raw_spin_unlock); - -/* - * For rwlocks, zero is unlocked, -1 is write-locked, - * positive is read-locked. - */ -static __inline__ int __read_trylock(rwlock_t *rw) -{ - signed int tmp; - - __asm__ __volatile__( -"2: lwarx %0,0,%1 # __read_trylock\n\ - addic. %0,%0,1\n\ - ble- 1f\n" - PPC405_ERR77(0,%1) -" stwcx. %0,0,%1\n\ - bne- 2b\n\ - isync\n\ -1:" - : "=&r"(tmp) - : "r"(&rw->lock) - : "cr0", "memory"); - - return tmp; -} - -int _raw_read_trylock(rwlock_t *rw) -{ - return __read_trylock(rw) > 0; -} -EXPORT_SYMBOL(_raw_read_trylock); - -void _raw_read_lock(rwlock_t *rw) -{ - unsigned int stuck; - - while (__read_trylock(rw) <= 0) { - stuck = INIT_STUCK; - while (!read_can_lock(rw)) { - if (--stuck == 0) { - printk("_read_lock(%p) CPU#%d lock %d\n", - rw, raw_smp_processor_id(), rw->lock); - stuck = INIT_STUCK; - } - } - } -} -EXPORT_SYMBOL(_raw_read_lock); - -void _raw_read_unlock(rwlock_t *rw) -{ - if ( rw->lock == 0 ) - printk("_read_unlock(): %s/%d (nip %08lX) lock %d\n", - current->comm,current->pid,current->thread.regs->nip, - rw->lock); - wmb(); - atomic_dec((atomic_t *) &(rw)->lock); -} -EXPORT_SYMBOL(_raw_read_unlock); - -void _raw_write_lock(rwlock_t *rw) -{ - unsigned int stuck; - - while (cmpxchg(&rw->lock, 0, -1) != 0) { - stuck = INIT_STUCK; - while (!write_can_lock(rw)) { - if (--stuck == 0) { - printk("write_lock(%p) CPU#%d lock %d)\n", - rw, raw_smp_processor_id(), rw->lock); - stuck = INIT_STUCK; - } - } - } - wmb(); -} -EXPORT_SYMBOL(_raw_write_lock); - -int _raw_write_trylock(rwlock_t *rw) -{ - if (cmpxchg(&rw->lock, 0, -1) != 0) - return 0; - wmb(); - return 1; -} -EXPORT_SYMBOL(_raw_write_trylock); - -void _raw_write_unlock(rwlock_t *rw) -{ - if (rw->lock >= 0) - printk("_write_lock(): %s/%d (nip %08lX) lock %d\n", - current->comm,current->pid,current->thread.regs->nip, - rw->lock); - wmb(); - rw->lock = 0; -} -EXPORT_SYMBOL(_raw_write_unlock); - -#endif diff --git a/arch/ppc/lib/string.S b/arch/ppc/lib/string.S deleted file mode 100644 index 927253bfc826..000000000000 --- a/arch/ppc/lib/string.S +++ /dev/null @@ -1,732 +0,0 @@ -/* - * String handling functions for PowerPC. - * - * Copyright (C) 1996 Paul Mackerras. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -#include <asm/processor.h> -#include <asm/cache.h> -#include <asm/errno.h> -#include <asm/ppc_asm.h> - -#define COPY_16_BYTES \ - lwz r7,4(r4); \ - lwz r8,8(r4); \ - lwz r9,12(r4); \ - lwzu r10,16(r4); \ - stw r7,4(r6); \ - stw r8,8(r6); \ - stw r9,12(r6); \ - stwu r10,16(r6) - -#define COPY_16_BYTES_WITHEX(n) \ -8 ## n ## 0: \ - lwz r7,4(r4); \ -8 ## n ## 1: \ - lwz r8,8(r4); \ -8 ## n ## 2: \ - lwz r9,12(r4); \ -8 ## n ## 3: \ - lwzu r10,16(r4); \ -8 ## n ## 4: \ - stw r7,4(r6); \ -8 ## n ## 5: \ - stw r8,8(r6); \ -8 ## n ## 6: \ - stw r9,12(r6); \ -8 ## n ## 7: \ - stwu r10,16(r6) - -#define COPY_16_BYTES_EXCODE(n) \ -9 ## n ## 0: \ - addi r5,r5,-(16 * n); \ - b 104f; \ -9 ## n ## 1: \ - addi r5,r5,-(16 * n); \ - b 105f; \ -.section __ex_table,"a"; \ - .align 2; \ - .long 8 ## n ## 0b,9 ## n ## 0b; \ - .long 8 ## n ## 1b,9 ## n ## 0b; \ - .long 8 ## n ## 2b,9 ## n ## 0b; \ - .long 8 ## n ## 3b,9 ## n ## 0b; \ - .long 8 ## n ## 4b,9 ## n ## 1b; \ - .long 8 ## n ## 5b,9 ## n ## 1b; \ - .long 8 ## n ## 6b,9 ## n ## 1b; \ - .long 8 ## n ## 7b,9 ## n ## 1b; \ - .text - - .text - .stabs "arch/ppc/lib/",N_SO,0,0,0f - .stabs "string.S",N_SO,0,0,0f - -CACHELINE_BYTES = L1_CACHE_BYTES -LG_CACHELINE_BYTES = L1_CACHE_SHIFT -CACHELINE_MASK = (L1_CACHE_BYTES-1) - -_GLOBAL(strcpy) - addi r5,r3,-1 - addi r4,r4,-1 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - stbu r0,1(r5) - bne 1b - blr - -/* This clears out any unused part of the destination buffer, - just as the libc version does. -- paulus */ -_GLOBAL(strncpy) - cmpwi 0,r5,0 - beqlr - mtctr r5 - addi r6,r3,-1 - addi r4,r4,-1 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - stbu r0,1(r6) - bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ - bnelr /* if we didn't hit a null char, we're done */ - mfctr r5 - cmpwi 0,r5,0 /* any space left in destination buffer? */ - beqlr /* we know r0 == 0 here */ -2: stbu r0,1(r6) /* clear it out if so */ - bdnz 2b - blr - -_GLOBAL(strcat) - addi r5,r3,-1 - addi r4,r4,-1 -1: lbzu r0,1(r5) - cmpwi 0,r0,0 - bne 1b - addi r5,r5,-1 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - stbu r0,1(r5) - bne 1b - blr - -_GLOBAL(strcmp) - addi r5,r3,-1 - addi r4,r4,-1 -1: lbzu r3,1(r5) - cmpwi 1,r3,0 - lbzu r0,1(r4) - subf. r3,r0,r3 - beqlr 1 - beq 1b - blr - -_GLOBAL(strncmp) - PPC_LCMPI r5,0 - beqlr - mtctr r5 - addi r5,r3,-1 - addi r4,r4,-1 -1: lbzu r3,1(r5) - cmpwi 1,r3,0 - lbzu r0,1(r4) - subf. r3,r0,r3 - beqlr 1 - bdnzt eq,1b - blr - -_GLOBAL(strlen) - addi r4,r3,-1 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - bne 1b - subf r3,r3,r4 - blr - -/* - * Use dcbz on the complete cache lines in the destination - * to set them to zero. This requires that the destination - * area is cacheable. -- paulus - */ -_GLOBAL(cacheable_memzero) - mr r5,r4 - li r4,0 - addi r6,r3,-4 - cmplwi 0,r5,4 - blt 7f - stwu r4,4(r6) - beqlr - andi. r0,r6,3 - add r5,r0,r5 - subf r6,r0,r6 - clrlwi r7,r6,32-LG_CACHELINE_BYTES - add r8,r7,r5 - srwi r9,r8,LG_CACHELINE_BYTES - addic. r9,r9,-1 /* total number of complete cachelines */ - ble 2f - xori r0,r7,CACHELINE_MASK & ~3 - srwi. r0,r0,2 - beq 3f - mtctr r0 -4: stwu r4,4(r6) - bdnz 4b -3: mtctr r9 - li r7,4 -#if !defined(CONFIG_8xx) -10: dcbz r7,r6 -#else -10: stw r4, 4(r6) - stw r4, 8(r6) - stw r4, 12(r6) - stw r4, 16(r6) -#if CACHE_LINE_SIZE >= 32 - stw r4, 20(r6) - stw r4, 24(r6) - stw r4, 28(r6) - stw r4, 32(r6) -#endif /* CACHE_LINE_SIZE */ -#endif - addi r6,r6,CACHELINE_BYTES - bdnz 10b - clrlwi r5,r8,32-LG_CACHELINE_BYTES - addi r5,r5,4 -2: srwi r0,r5,2 - mtctr r0 - bdz 6f -1: stwu r4,4(r6) - bdnz 1b -6: andi. r5,r5,3 -7: cmpwi 0,r5,0 - beqlr - mtctr r5 - addi r6,r6,3 -8: stbu r4,1(r6) - bdnz 8b - blr - -_GLOBAL(memset) - rlwimi r4,r4,8,16,23 - rlwimi r4,r4,16,0,15 - addi r6,r3,-4 - cmplwi 0,r5,4 - blt 7f - stwu r4,4(r6) - beqlr - andi. r0,r6,3 - add r5,r0,r5 - subf r6,r0,r6 - srwi r0,r5,2 - mtctr r0 - bdz 6f -1: stwu r4,4(r6) - bdnz 1b -6: andi. r5,r5,3 -7: cmpwi 0,r5,0 - beqlr - mtctr r5 - addi r6,r6,3 -8: stbu r4,1(r6) - bdnz 8b - blr - -/* - * This version uses dcbz on the complete cache lines in the - * destination area to reduce memory traffic. This requires that - * the destination area is cacheable. - * We only use this version if the source and dest don't overlap. - * -- paulus. - */ -_GLOBAL(cacheable_memcpy) - add r7,r3,r5 /* test if the src & dst overlap */ - add r8,r4,r5 - cmplw 0,r4,r7 - cmplw 1,r3,r8 - crand 0,0,4 /* cr0.lt &= cr1.lt */ - blt memcpy /* if regions overlap */ - - addi r4,r4,-4 - addi r6,r3,-4 - neg r0,r3 - andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ - beq 58f - - cmplw 0,r5,r0 /* is this more than total to do? */ - blt 63f /* if not much to do */ - andi. r8,r0,3 /* get it word-aligned first */ - subf r5,r0,r5 - mtctr r8 - beq+ 61f -70: lbz r9,4(r4) /* do some bytes */ - stb r9,4(r6) - addi r4,r4,1 - addi r6,r6,1 - bdnz 70b -61: srwi. r0,r0,2 - mtctr r0 - beq 58f -72: lwzu r9,4(r4) /* do some words */ - stwu r9,4(r6) - bdnz 72b - -58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ - clrlwi r5,r5,32-LG_CACHELINE_BYTES - li r11,4 - mtctr r0 - beq 63f -53: -#if !defined(CONFIG_8xx) - dcbz r11,r6 -#endif - COPY_16_BYTES -#if L1_CACHE_BYTES >= 32 - COPY_16_BYTES -#if L1_CACHE_BYTES >= 64 - COPY_16_BYTES - COPY_16_BYTES -#if L1_CACHE_BYTES >= 128 - COPY_16_BYTES - COPY_16_BYTES - COPY_16_BYTES - COPY_16_BYTES -#endif -#endif -#endif - bdnz 53b - -63: srwi. r0,r5,2 - mtctr r0 - beq 64f -30: lwzu r0,4(r4) - stwu r0,4(r6) - bdnz 30b - -64: andi. r0,r5,3 - mtctr r0 - beq+ 65f -40: lbz r0,4(r4) - stb r0,4(r6) - addi r4,r4,1 - addi r6,r6,1 - bdnz 40b -65: blr - -_GLOBAL(memmove) - cmplw 0,r3,r4 - bgt backwards_memcpy - /* fall through */ - -_GLOBAL(memcpy) - srwi. r7,r5,3 - addi r6,r3,-4 - addi r4,r4,-4 - beq 2f /* if less than 8 bytes to do */ - andi. r0,r6,3 /* get dest word aligned */ - mtctr r7 - bne 5f -1: lwz r7,4(r4) - lwzu r8,8(r4) - stw r7,4(r6) - stwu r8,8(r6) - bdnz 1b - andi. r5,r5,7 -2: cmplwi 0,r5,4 - blt 3f - lwzu r0,4(r4) - addi r5,r5,-4 - stwu r0,4(r6) -3: cmpwi 0,r5,0 - beqlr - mtctr r5 - addi r4,r4,3 - addi r6,r6,3 -4: lbzu r0,1(r4) - stbu r0,1(r6) - bdnz 4b - blr -5: subfic r0,r0,4 - mtctr r0 -6: lbz r7,4(r4) - addi r4,r4,1 - stb r7,4(r6) - addi r6,r6,1 - bdnz 6b - subf r5,r0,r5 - rlwinm. r7,r5,32-3,3,31 - beq 2b - mtctr r7 - b 1b - -_GLOBAL(backwards_memcpy) - rlwinm. r7,r5,32-3,3,31 /* r0 = r5 >> 3 */ - add r6,r3,r5 - add r4,r4,r5 - beq 2f - andi. r0,r6,3 - mtctr r7 - bne 5f -1: lwz r7,-4(r4) - lwzu r8,-8(r4) - stw r7,-4(r6) - stwu r8,-8(r6) - bdnz 1b - andi. r5,r5,7 -2: cmplwi 0,r5,4 - blt 3f - lwzu r0,-4(r4) - subi r5,r5,4 - stwu r0,-4(r6) -3: cmpwi 0,r5,0 - beqlr - mtctr r5 -4: lbzu r0,-1(r4) - stbu r0,-1(r6) - bdnz 4b - blr -5: mtctr r0 -6: lbzu r7,-1(r4) - stbu r7,-1(r6) - bdnz 6b - subf r5,r0,r5 - rlwinm. r7,r5,32-3,3,31 - beq 2b - mtctr r7 - b 1b - -_GLOBAL(memcmp) - cmpwi 0,r5,0 - ble- 2f - mtctr r5 - addi r6,r3,-1 - addi r4,r4,-1 -1: lbzu r3,1(r6) - lbzu r0,1(r4) - subf. r3,r0,r3 - bdnzt 2,1b - blr -2: li r3,0 - blr - -_GLOBAL(memchr) - cmpwi 0,r5,0 - ble- 2f - mtctr r5 - addi r3,r3,-1 -1: lbzu r0,1(r3) - cmpw 0,r0,r4 - bdnzf 2,1b - beqlr -2: li r3,0 - blr - -_GLOBAL(__copy_tofrom_user) - addi r4,r4,-4 - addi r6,r3,-4 - neg r0,r3 - andi. r0,r0,CACHELINE_MASK /* # bytes to start of cache line */ - beq 58f - - cmplw 0,r5,r0 /* is this more than total to do? */ - blt 63f /* if not much to do */ - andi. r8,r0,3 /* get it word-aligned first */ - mtctr r8 - beq+ 61f -70: lbz r9,4(r4) /* do some bytes */ -71: stb r9,4(r6) - addi r4,r4,1 - addi r6,r6,1 - bdnz 70b -61: subf r5,r0,r5 - srwi. r0,r0,2 - mtctr r0 - beq 58f -72: lwzu r9,4(r4) /* do some words */ -73: stwu r9,4(r6) - bdnz 72b - - .section __ex_table,"a" - .align 2 - .long 70b,100f - .long 71b,101f - .long 72b,102f - .long 73b,103f - .text - -58: srwi. r0,r5,LG_CACHELINE_BYTES /* # complete cachelines */ - clrlwi r5,r5,32-LG_CACHELINE_BYTES - li r11,4 - beq 63f - -#ifdef CONFIG_8xx - /* Don't use prefetch on 8xx */ - mtctr r0 - li r0,0 -53: COPY_16_BYTES_WITHEX(0) - bdnz 53b - -#else /* not CONFIG_8xx */ - /* Here we decide how far ahead to prefetch the source */ - li r3,4 - cmpwi r0,1 - li r7,0 - ble 114f - li r7,1 -#if MAX_COPY_PREFETCH > 1 - /* Heuristically, for large transfers we prefetch - MAX_COPY_PREFETCH cachelines ahead. For small transfers - we prefetch 1 cacheline ahead. */ - cmpwi r0,MAX_COPY_PREFETCH - ble 112f - li r7,MAX_COPY_PREFETCH -112: mtctr r7 -111: dcbt r3,r4 - addi r3,r3,CACHELINE_BYTES - bdnz 111b -#else - dcbt r3,r4 - addi r3,r3,CACHELINE_BYTES -#endif /* MAX_COPY_PREFETCH > 1 */ - -114: subf r8,r7,r0 - mr r0,r7 - mtctr r8 - -53: dcbt r3,r4 -54: dcbz r11,r6 - .section __ex_table,"a" - .align 2 - .long 54b,105f - .text -/* the main body of the cacheline loop */ - COPY_16_BYTES_WITHEX(0) -#if L1_CACHE_BYTES >= 32 - COPY_16_BYTES_WITHEX(1) -#if L1_CACHE_BYTES >= 64 - COPY_16_BYTES_WITHEX(2) - COPY_16_BYTES_WITHEX(3) -#if L1_CACHE_BYTES >= 128 - COPY_16_BYTES_WITHEX(4) - COPY_16_BYTES_WITHEX(5) - COPY_16_BYTES_WITHEX(6) - COPY_16_BYTES_WITHEX(7) -#endif -#endif -#endif - bdnz 53b - cmpwi r0,0 - li r3,4 - li r7,0 - bne 114b -#endif /* CONFIG_8xx */ - -63: srwi. r0,r5,2 - mtctr r0 - beq 64f -30: lwzu r0,4(r4) -31: stwu r0,4(r6) - bdnz 30b - -64: andi. r0,r5,3 - mtctr r0 - beq+ 65f -40: lbz r0,4(r4) -41: stb r0,4(r6) - addi r4,r4,1 - addi r6,r6,1 - bdnz 40b -65: li r3,0 - blr - -/* read fault, initial single-byte copy */ -100: li r9,0 - b 90f -/* write fault, initial single-byte copy */ -101: li r9,1 -90: subf r5,r8,r5 - li r3,0 - b 99f -/* read fault, initial word copy */ -102: li r9,0 - b 91f -/* write fault, initial word copy */ -103: li r9,1 -91: li r3,2 - b 99f - -/* - * this stuff handles faults in the cacheline loop and branches to either - * 104f (if in read part) or 105f (if in write part), after updating r5 - */ - COPY_16_BYTES_EXCODE(0) -#if L1_CACHE_BYTES >= 32 - COPY_16_BYTES_EXCODE(1) -#if L1_CACHE_BYTES >= 64 - COPY_16_BYTES_EXCODE(2) - COPY_16_BYTES_EXCODE(3) -#if L1_CACHE_BYTES >= 128 - COPY_16_BYTES_EXCODE(4) - COPY_16_BYTES_EXCODE(5) - COPY_16_BYTES_EXCODE(6) - COPY_16_BYTES_EXCODE(7) -#endif -#endif -#endif - -/* read fault in cacheline loop */ -104: li r9,0 - b 92f -/* fault on dcbz (effectively a write fault) */ -/* or write fault in cacheline loop */ -105: li r9,1 -92: li r3,LG_CACHELINE_BYTES - mfctr r8 - add r0,r0,r8 - b 106f -/* read fault in final word loop */ -108: li r9,0 - b 93f -/* write fault in final word loop */ -109: li r9,1 -93: andi. r5,r5,3 - li r3,2 - b 99f -/* read fault in final byte loop */ -110: li r9,0 - b 94f -/* write fault in final byte loop */ -111: li r9,1 -94: li r5,0 - li r3,0 -/* - * At this stage the number of bytes not copied is - * r5 + (ctr << r3), and r9 is 0 for read or 1 for write. - */ -99: mfctr r0 -106: slw r3,r0,r3 - add. r3,r3,r5 - beq 120f /* shouldn't happen */ - cmpwi 0,r9,0 - bne 120f -/* for a read fault, first try to continue the copy one byte at a time */ - mtctr r3 -130: lbz r0,4(r4) -131: stb r0,4(r6) - addi r4,r4,1 - addi r6,r6,1 - bdnz 130b -/* then clear out the destination: r3 bytes starting at 4(r6) */ -132: mfctr r3 - srwi. r0,r3,2 - li r9,0 - mtctr r0 - beq 113f -112: stwu r9,4(r6) - bdnz 112b -113: andi. r0,r3,3 - mtctr r0 - beq 120f -114: stb r9,4(r6) - addi r6,r6,1 - bdnz 114b -120: blr - - .section __ex_table,"a" - .align 2 - .long 30b,108b - .long 31b,109b - .long 40b,110b - .long 41b,111b - .long 130b,132b - .long 131b,120b - .long 112b,120b - .long 114b,120b - .text - -_GLOBAL(__clear_user) - addi r6,r3,-4 - li r3,0 - li r5,0 - cmplwi 0,r4,4 - blt 7f - /* clear a single word */ -11: stwu r5,4(r6) - beqlr - /* clear word sized chunks */ - andi. r0,r6,3 - add r4,r0,r4 - subf r6,r0,r6 - srwi r0,r4,2 - andi. r4,r4,3 - mtctr r0 - bdz 7f -1: stwu r5,4(r6) - bdnz 1b - /* clear byte sized chunks */ -7: cmpwi 0,r4,0 - beqlr - mtctr r4 - addi r6,r6,3 -8: stbu r5,1(r6) - bdnz 8b - blr -90: mr r3,r4 - blr -91: mfctr r3 - slwi r3,r3,2 - add r3,r3,r4 - blr -92: mfctr r3 - blr - - .section __ex_table,"a" - .align 2 - .long 11b,90b - .long 1b,91b - .long 8b,92b - .text - -_GLOBAL(__strncpy_from_user) - addi r6,r3,-1 - addi r4,r4,-1 - cmpwi 0,r5,0 - beq 2f - mtctr r5 -1: lbzu r0,1(r4) - cmpwi 0,r0,0 - stbu r0,1(r6) - bdnzf 2,1b /* dec ctr, branch if ctr != 0 && !cr0.eq */ - beq 3f -2: addi r6,r6,1 -3: subf r3,r3,r6 - blr -99: li r3,-EFAULT - blr - - .section __ex_table,"a" - .align 2 - .long 1b,99b - .text - -/* r3 = str, r4 = len (> 0), r5 = top (highest addr) */ -_GLOBAL(__strnlen_user) - addi r7,r3,-1 - subf r6,r7,r5 /* top+1 - str */ - cmplw 0,r4,r6 - bge 0f - mr r6,r4 -0: mtctr r6 /* ctr = min(len, top - str) */ -1: lbzu r0,1(r7) /* get next byte */ - cmpwi 0,r0,0 - bdnzf 2,1b /* loop if --ctr != 0 && byte != 0 */ - addi r7,r7,1 - subf r3,r3,r7 /* number of bytes we have looked at */ - beqlr /* return if we found a 0 byte */ - cmpw 0,r3,r4 /* did we look at all len bytes? */ - blt 99f /* if not, must have hit top */ - addi r3,r4,1 /* return len + 1 to indicate no null found */ - blr -99: li r3,0 /* bad address, return 0 */ - blr - - .section __ex_table,"a" - .align 2 - .long 1b,99b |