From a583158c9ce822c96a718fbf877cec1e5f9ad75d Mon Sep 17 00:00:00 2001 From: Atsushi Nemoto Date: Mon, 18 Dec 2006 00:07:40 +0900 Subject: [MIPS] Unify memset.S The 32-bit version and 64-bit version are almost equal. Unify them. This makes further improvements (for example, supporting CDEX, etc.) easier. Signed-off-by: Atsushi Nemoto Signed-off-by: Ralf Baechle --- arch/mips/lib-32/Makefile | 2 +- arch/mips/lib-32/memset.S | 145 ---------------------------------------- arch/mips/lib-64/Makefile | 2 +- arch/mips/lib-64/memset.S | 142 --------------------------------------- arch/mips/lib/Makefile | 2 +- arch/mips/lib/memset.S | 166 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 169 insertions(+), 290 deletions(-) delete mode 100644 arch/mips/lib-32/memset.S delete mode 100644 arch/mips/lib-64/memset.S create mode 100644 arch/mips/lib/memset.S (limited to 'arch') diff --git a/arch/mips/lib-32/Makefile b/arch/mips/lib-32/Makefile index dcd4d2ed2ac4..2036cf5e6857 100644 --- a/arch/mips/lib-32/Makefile +++ b/arch/mips/lib-32/Makefile @@ -2,7 +2,7 @@ # Makefile for MIPS-specific library files.. # -lib-y += memset.o watch.o +lib-y += watch.o obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o diff --git a/arch/mips/lib-32/memset.S b/arch/mips/lib-32/memset.S deleted file mode 100644 index 1981485bd48b..000000000000 --- a/arch/mips/lib-32/memset.S +++ /dev/null @@ -1,145 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998, 1999, 2000 by Ralf Baechle - * Copyright (C) 1999, 2000 Silicon Graphics, Inc. - */ -#include -#include -#include - -#define EX(insn,reg,addr,handler) \ -9: insn reg, addr; \ - .section __ex_table,"a"; \ - PTR 9b, handler; \ - .previous - - .macro f_fill64 dst, offset, val, fixup - EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 8 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 9 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup) - .endm - -/* - * memset(void *s, int c, size_t n) - * - * a0: start of area to clear - * a1: char to fill with - * a2: size of area to clear - */ - .set noreorder - .align 5 -LEAF(memset) - beqz a1, 1f - move v0, a0 /* result */ - - andi a1, 0xff /* spread fillword */ - sll t1, a1, 8 - or a1, t1 - sll t1, a1, 16 - or a1, t1 -1: - -FEXPORT(__bzero) - sltiu t0, a2, LONGSIZE /* very small region? */ - bnez t0, small_memset - andi t0, a0, LONGMASK /* aligned? */ - - beqz t0, 1f - PTR_SUBU t0, LONGSIZE /* alignment in bytes */ - -#ifdef __MIPSEB__ - EX(swl, a1, (a0), first_fixup) /* make word aligned */ -#endif -#ifdef __MIPSEL__ - EX(swr, a1, (a0), first_fixup) /* make word aligned */ -#endif - PTR_SUBU a0, t0 /* long align ptr */ - PTR_ADDU a2, t0 /* correct size */ - -1: ori t1, a2, 0x3f /* # of full blocks */ - xori t1, 0x3f - beqz t1, memset_partial /* no block to fill */ - andi t0, a2, 0x3c - - PTR_ADDU t1, a0 /* end address */ - .set reorder -1: PTR_ADDIU a0, 64 - f_fill64 a0, -64, a1, fwd_fixup - bne t1, a0, 1b - .set noreorder - -memset_partial: - PTR_LA t1, 2f /* where to start */ - PTR_SUBU t1, t0 - jr t1 - PTR_ADDU a0, t0 /* dest ptr */ - - .set push - .set noreorder - .set nomacro - f_fill64 a0, -64, a1, partial_fixup /* ... but first do longs ... */ -2: .set pop - andi a2, LONGMASK /* At most one long to go */ - - beqz a2, 1f - PTR_ADDU a0, a2 /* What's left */ -#ifdef __MIPSEB__ - EX(swr, a1, -1(a0), last_fixup) -#endif -#ifdef __MIPSEL__ - EX(swl, a1, -1(a0), last_fixup) -#endif -1: jr ra - move a2, zero - -small_memset: - beqz a2, 2f - PTR_ADDU t1, a0, a2 - -1: PTR_ADDIU a0, 1 /* fill bytewise */ - bne t1, a0, 1b - sb a1, -1(a0) - -2: jr ra /* done */ - move a2, zero - END(memset) - -first_fixup: - jr ra - nop - -fwd_fixup: - PTR_L t0, TI_TASK($28) - LONG_L t0, THREAD_BUADDR(t0) - andi a2, 0x3f - LONG_ADDU a2, t1 - jr ra - LONG_SUBU a2, t0 - -partial_fixup: - PTR_L t0, TI_TASK($28) - LONG_L t0, THREAD_BUADDR(t0) - andi a2, LONGMASK - LONG_ADDU a2, t1 - jr ra - LONG_SUBU a2, t0 - -last_fixup: - jr ra - andi v1, a2, LONGMASK diff --git a/arch/mips/lib-64/Makefile b/arch/mips/lib-64/Makefile index dcd4d2ed2ac4..2036cf5e6857 100644 --- a/arch/mips/lib-64/Makefile +++ b/arch/mips/lib-64/Makefile @@ -2,7 +2,7 @@ # Makefile for MIPS-specific library files.. # -lib-y += memset.o watch.o +lib-y += watch.o obj-$(CONFIG_CPU_MIPS32) += dump_tlb.o obj-$(CONFIG_CPU_MIPS64) += dump_tlb.o diff --git a/arch/mips/lib-64/memset.S b/arch/mips/lib-64/memset.S deleted file mode 100644 index e2c42c85113b..000000000000 --- a/arch/mips/lib-64/memset.S +++ /dev/null @@ -1,142 +0,0 @@ -/* - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - * - * Copyright (C) 1998, 1999, 2000 by Ralf Baechle - * Copyright (C) 1999, 2000 Silicon Graphics, Inc. - */ -#include -#include -#include - -#define EX(insn,reg,addr,handler) \ -9: insn reg, addr; \ - .section __ex_table,"a"; \ - PTR 9b, handler; \ - .previous - - .macro f_fill64 dst, offset, val, fixup - EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup) - EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup) - .endm - -/* - * memset(void *s, int c, size_t n) - * - * a0: start of area to clear - * a1: char to fill with - * a2: size of area to clear - */ - .set noreorder - .align 5 -LEAF(memset) - beqz a1, 1f - move v0, a0 /* result */ - - andi a1, 0xff /* spread fillword */ - dsll t1, a1, 8 - or a1, t1 - dsll t1, a1, 16 - or a1, t1 - dsll t1, a1, 32 - or a1, t1 -1: - -FEXPORT(__bzero) - sltiu t0, a2, LONGSIZE /* very small region? */ - bnez t0, small_memset - andi t0, a0, LONGMASK /* aligned? */ - - beqz t0, 1f - PTR_SUBU t0, LONGSIZE /* alignment in bytes */ - -#ifdef __MIPSEB__ - EX(sdl, a1, (a0), first_fixup) /* make dword aligned */ -#endif -#ifdef __MIPSEL__ - EX(sdr, a1, (a0), first_fixup) /* make dword aligned */ -#endif - PTR_SUBU a0, t0 /* long align ptr */ - PTR_ADDU a2, t0 /* correct size */ - -1: ori t1, a2, 0x3f /* # of full blocks */ - xori t1, 0x3f - beqz t1, memset_partial /* no block to fill */ - andi t0, a2, 0x38 - - PTR_ADDU t1, a0 /* end address */ - .set reorder -1: PTR_ADDIU a0, 64 - f_fill64 a0, -64, a1, fwd_fixup - bne t1, a0, 1b - .set noreorder - -memset_partial: - PTR_LA t1, 2f /* where to start */ - .set noat - dsrl AT, t0, 1 - PTR_SUBU t1, AT - .set noat - jr t1 - PTR_ADDU a0, t0 /* dest ptr */ - - .set push - .set noreorder - .set nomacro - f_fill64 a0, -64, a1, partial_fixup /* ... but first do longs ... */ -2: .set pop - andi a2, LONGMASK /* At most one long to go */ - - beqz a2, 1f - PTR_ADDU a0, a2 /* What's left */ -#ifdef __MIPSEB__ - EX(sdr, a1, -1(a0), last_fixup) -#endif -#ifdef __MIPSEL__ - EX(sdl, a1, -1(a0), last_fixup) -#endif -1: jr ra - move a2, zero - -small_memset: - beqz a2, 2f - PTR_ADDU t1, a0, a2 - -1: PTR_ADDIU a0, 1 /* fill bytewise */ - bne t1, a0, 1b - sb a1, -1(a0) - -2: jr ra /* done */ - move a2, zero - END(memset) - -first_fixup: - jr ra - nop - -fwd_fixup: - PTR_L t0, TI_TASK($28) - LONG_L t0, THREAD_BUADDR(t0) - andi a2, 0x3f - LONG_ADDU a2, t1 - jr ra - LONG_SUBU a2, t0 - -partial_fixup: - PTR_L t0, TI_TASK($28) - LONG_L t0, THREAD_BUADDR(t0) - andi a2, LONGMASK - LONG_ADDU a2, t1 - jr ra - LONG_SUBU a2, t0 - -last_fixup: - jr ra - andi v1, a2, LONGMASK diff --git a/arch/mips/lib/Makefile b/arch/mips/lib/Makefile index 989c900b8b14..5ad501b30b43 100644 --- a/arch/mips/lib/Makefile +++ b/arch/mips/lib/Makefile @@ -2,7 +2,7 @@ # Makefile for MIPS-specific library files.. # -lib-y += csum_partial.o memcpy.o promlib.o \ +lib-y += csum_partial.o memcpy.o memset.o promlib.o \ strlen_user.o strncpy_user.o strnlen_user.o uncached.o obj-y += iomap.o diff --git a/arch/mips/lib/memset.S b/arch/mips/lib/memset.S new file mode 100644 index 000000000000..3f8b8b3d0b23 --- /dev/null +++ b/arch/mips/lib/memset.S @@ -0,0 +1,166 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998, 1999, 2000 by Ralf Baechle + * Copyright (C) 1999, 2000 Silicon Graphics, Inc. + */ +#include +#include +#include + +#if LONGSIZE == 4 +#define LONG_S_L swl +#define LONG_S_R swr +#else +#define LONG_S_L sdl +#define LONG_S_R sdr +#endif + +#define EX(insn,reg,addr,handler) \ +9: insn reg, addr; \ + .section __ex_table,"a"; \ + PTR 9b, handler; \ + .previous + + .macro f_fill64 dst, offset, val, fixup + EX(LONG_S, \val, (\offset + 0 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 1 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 2 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 3 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 4 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 5 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 6 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 7 * LONGSIZE)(\dst), \fixup) +#if LONGSIZE == 4 + EX(LONG_S, \val, (\offset + 8 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 9 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup) + EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup) +#endif + .endm + +/* + * memset(void *s, int c, size_t n) + * + * a0: start of area to clear + * a1: char to fill with + * a2: size of area to clear + */ + .set noreorder + .align 5 +LEAF(memset) + beqz a1, 1f + move v0, a0 /* result */ + + andi a1, 0xff /* spread fillword */ + LONG_SLL t1, a1, 8 + or a1, t1 + LONG_SLL t1, a1, 16 +#if LONGSIZE == 8 + or a1, t1 + LONG_SLL t1, a1, 32 +#endif + or a1, t1 +1: + +FEXPORT(__bzero) + sltiu t0, a2, LONGSIZE /* very small region? */ + bnez t0, small_memset + andi t0, a0, LONGMASK /* aligned? */ + + beqz t0, 1f + PTR_SUBU t0, LONGSIZE /* alignment in bytes */ + +#ifdef __MIPSEB__ + EX(LONG_S_L, a1, (a0), first_fixup) /* make word/dword aligned */ +#endif +#ifdef __MIPSEL__ + EX(LONG_S_R, a1, (a0), first_fixup) /* make word/dword aligned */ +#endif + PTR_SUBU a0, t0 /* long align ptr */ + PTR_ADDU a2, t0 /* correct size */ + +1: ori t1, a2, 0x3f /* # of full blocks */ + xori t1, 0x3f + beqz t1, memset_partial /* no block to fill */ + andi t0, a2, 0x40-LONGSIZE + + PTR_ADDU t1, a0 /* end address */ + .set reorder +1: PTR_ADDIU a0, 64 + f_fill64 a0, -64, a1, fwd_fixup + bne t1, a0, 1b + .set noreorder + +memset_partial: + PTR_LA t1, 2f /* where to start */ +#if LONGSIZE == 4 + PTR_SUBU t1, t0 +#else + .set noat + LONG_SRL AT, t0, 1 + PTR_SUBU t1, AT + .set noat +#endif + jr t1 + PTR_ADDU a0, t0 /* dest ptr */ + + .set push + .set noreorder + .set nomacro + f_fill64 a0, -64, a1, partial_fixup /* ... but first do longs ... */ +2: .set pop + andi a2, LONGMASK /* At most one long to go */ + + beqz a2, 1f + PTR_ADDU a0, a2 /* What's left */ +#ifdef __MIPSEB__ + EX(LONG_S_R, a1, -1(a0), last_fixup) +#endif +#ifdef __MIPSEL__ + EX(LONG_S_L, a1, -1(a0), last_fixup) +#endif +1: jr ra + move a2, zero + +small_memset: + beqz a2, 2f + PTR_ADDU t1, a0, a2 + +1: PTR_ADDIU a0, 1 /* fill bytewise */ + bne t1, a0, 1b + sb a1, -1(a0) + +2: jr ra /* done */ + move a2, zero + END(memset) + +first_fixup: + jr ra + nop + +fwd_fixup: + PTR_L t0, TI_TASK($28) + LONG_L t0, THREAD_BUADDR(t0) + andi a2, 0x3f + LONG_ADDU a2, t1 + jr ra + LONG_SUBU a2, t0 + +partial_fixup: + PTR_L t0, TI_TASK($28) + LONG_L t0, THREAD_BUADDR(t0) + andi a2, LONGMASK + LONG_ADDU a2, t1 + jr ra + LONG_SUBU a2, t0 + +last_fixup: + jr ra + andi v1, a2, LONGMASK -- cgit v1.2.3