diff options
author | Nicolas Pitre <nico@cam.org> | 2008-04-12 03:04:28 +0200 |
---|---|---|
committer | Lennert Buytenhek <buytenh@marvell.com> | 2008-06-22 22:44:39 +0200 |
commit | f91a8dcc25398c5d708056de081d6cebf3f2023e (patch) | |
tree | 1cd31c180bb1600de415bee67f8fcce5bf128413 /arch/arm/lib/memset.S | |
parent | [ARM] cache align destination pointer when copying memory for some processors (diff) | |
download | linux-f91a8dcc25398c5d708056de081d6cebf3f2023e.tar.xz linux-f91a8dcc25398c5d708056de081d6cebf3f2023e.zip |
[ARM] cache align memset and memzero
This is a natural extension following the previous patch.
Non Feroceon based targets are unchanged.
Signed-off-by: Nicolas Pitre <nico@marvell.com>
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Diffstat (limited to 'arch/arm/lib/memset.S')
-rw-r--r-- | arch/arm/lib/memset.S | 46 |
1 files changed, 46 insertions, 0 deletions
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S index 95b110b07a89..b477d4ac88ef 100644 --- a/arch/arm/lib/memset.S +++ b/arch/arm/lib/memset.S @@ -39,6 +39,9 @@ ENTRY(memset) mov r3, r1 cmp r2, #16 blt 4f + +#if ! CALGN(1)+0 + /* * We need an extra register for this loop - save the return address and * use the LR @@ -64,6 +67,49 @@ ENTRY(memset) stmneia r0!, {r1, r3, ip, lr} ldr lr, [sp], #4 +#else + +/* + * This version aligns the destination pointer in order to write + * whole cache lines at once. + */ + + stmfd sp!, {r4-r7, lr} + mov r4, r1 + mov r5, r1 + mov r6, r1 + mov r7, r1 + mov ip, r1 + mov lr, r1 + + cmp r2, #96 + tstgt r0, #31 + ble 3f + + and ip, r0, #31 + rsb ip, ip, #32 + sub r2, r2, ip + movs ip, ip, lsl #(32 - 4) + stmcsia r0!, {r4, r5, r6, r7} + stmmiia r0!, {r4, r5} + tst ip, #(1 << 30) + mov ip, r1 + strne r1, [r0], #4 + +3: subs r2, r2, #64 + stmgeia r0!, {r1, r3-r7, ip, lr} + stmgeia r0!, {r1, r3-r7, ip, lr} + bgt 3b + ldmeqfd sp!, {r4-r7, pc} + + tst r2, #32 + stmneia r0!, {r1, r3-r7, ip, lr} + tst r2, #16 + stmneia r0!, {r4-r7} + ldmfd sp!, {r4-r7, lr} + +#endif + 4: tst r2, #8 stmneia r0!, {r1, r3} tst r2, #4 |