summaryrefslogtreecommitdiffstats
path: root/arch/blackfin/lib
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-10 12:04:41 +0100
committerIngo Molnar <mingo@elte.hu>2009-01-10 12:04:41 +0100
commitb17304245f0db0ac69b795c411407808f3f2796d (patch)
tree63ed3915d9295bd08f640bf25c322064ba787fad /arch/blackfin/lib
parentbzip2/lzma: centralize format detection (diff)
parentMerge git://git.kernel.org/pub/scm/linux/kernel/git/arjan/linux-2.6-async-2 (diff)
downloadlinux-b17304245f0db0ac69b795c411407808f3f2796d.tar.xz
linux-b17304245f0db0ac69b795c411407808f3f2796d.zip
Merge branch 'linus' into x86/setup-lzma
Conflicts: init/do_mounts_rd.c
Diffstat (limited to 'arch/blackfin/lib')
-rw-r--r--arch/blackfin/lib/checksum.c4
-rw-r--r--arch/blackfin/lib/ins.S272
-rw-r--r--arch/blackfin/lib/muldi3.S68
-rw-r--r--arch/blackfin/lib/muldi3.c99
4 files changed, 156 insertions, 287 deletions
diff --git a/arch/blackfin/lib/checksum.c b/arch/blackfin/lib/checksum.c
index 5c87505165d3..762a7f02970a 100644
--- a/arch/blackfin/lib/checksum.c
+++ b/arch/blackfin/lib/checksum.c
@@ -29,6 +29,7 @@
* 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
+#include <linux/module.h>
#include <net/checksum.h>
#include <asm/checksum.h>
@@ -76,6 +77,7 @@ __sum16 ip_fast_csum(unsigned char *iph, unsigned int ihl)
{
return (__force __sum16)~do_csum(iph, ihl * 4);
}
+EXPORT_SYMBOL(ip_fast_csum);
/*
* computes the checksum of a memory block at buff, length len,
@@ -104,6 +106,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum)
return sum;
}
+EXPORT_SYMBOL(csum_partial);
/*
* this routine is used for miscellaneous IP-like checksums, mainly
@@ -137,3 +140,4 @@ __wsum csum_partial_copy(const void *src, void *dst, int len, __wsum sum)
memcpy(dst, src, len);
return csum_partial(dst, len, sum);
}
+EXPORT_SYMBOL(csum_partial_copy);
diff --git a/arch/blackfin/lib/ins.S b/arch/blackfin/lib/ins.S
index d60554dce87b..1863a6ba507c 100644
--- a/arch/blackfin/lib/ins.S
+++ b/arch/blackfin/lib/ins.S
@@ -1,31 +1,9 @@
/*
- * File: arch/blackfin/lib/ins.S
- * Based on:
- * Author: Bas Vermeulen <bas@buyways.nl>
+ * arch/blackfin/lib/ins.S - ins{bwl} using hardware loops
*
- * Created: Tue Mar 22 15:27:24 CEST 2005
- * Description: Implementation of ins{bwl} for BlackFin processors using zero overhead loops.
- *
- * Modified:
- * Copyright 2004-2008 Analog Devices Inc.
- * Copyright (C) 2005 Bas Vermeulen, BuyWays BV <bas@buyways.nl>
- *
- * Bugs: Enter bugs at http://blackfin.uclinux.org/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see the file COPYING, or write
- * to the Free Software Foundation, Inc.,
- * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ * Copyright 2004-2008 Analog Devices Inc.
+ * Copyright (C) 2005 Bas Vermeulen, BuyWays BV <bas@buyways.nl>
+ * Licensed under the GPL-2 or later.
*/
#include <linux/linkage.h>
@@ -33,6 +11,46 @@
.align 2
+#ifdef CONFIG_IPIPE
+# define DO_CLI \
+ [--sp] = rets; \
+ [--sp] = (P5:0); \
+ sp += -12; \
+ call ___ipipe_stall_root_raw; \
+ sp += 12; \
+ (P5:0) = [sp++];
+# define CLI_INNER_NOP
+#else
+# define DO_CLI cli R3;
+# define CLI_INNER_NOP nop; nop; nop;
+#endif
+
+#ifdef CONFIG_IPIPE
+# define DO_STI \
+ sp += -12; \
+ call ___ipipe_unstall_root_raw; \
+ sp += 12; \
+2: rets = [sp++];
+#else
+# define DO_STI 2: sti R3;
+#endif
+
+#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
+# define CLI_OUTER DO_CLI;
+# define STI_OUTER DO_STI;
+# define CLI_INNER 1:
+# if ANOMALY_05000416
+# define STI_INNER nop; 2: nop;
+# else
+# define STI_INNER 2:
+# endif
+#else
+# define CLI_OUTER
+# define STI_OUTER
+# define CLI_INNER 1: DO_CLI; CLI_INNER_NOP;
+# define STI_INNER DO_STI;
+#endif
+
/*
* Reads on the Blackfin are speculative. In Blackfin terms, this means they
* can be interrupted at any time (even after they have been issued on to the
@@ -53,170 +71,48 @@
* buffers in/out of FIFOs.
*/
-ENTRY(_insl)
-#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
- P0 = R0; /* P0 = port */
- cli R3;
- P1 = R1; /* P1 = address */
- P2 = R2; /* P2 = count */
- SSYNC;
- LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2;
-.Llong_loop_s: R0 = [P0];
- [P1++] = R0;
- NOP;
-.Llong_loop_e: NOP;
- sti R3;
- RTS;
-#else
- P0 = R0; /* P0 = port */
- P1 = R1; /* P1 = address */
- P2 = R2; /* P2 = count */
- SSYNC;
- LSETUP( .Llong_loop_s, .Llong_loop_e) LC0 = P2;
-.Llong_loop_s:
- CLI R3;
- NOP; NOP; NOP;
- R0 = [P0];
- [P1++] = R0;
-.Llong_loop_e:
- STI R3;
+#define COMMON_INS(func, ops) \
+ENTRY(_ins##func) \
+ P0 = R0; /* P0 = port */ \
+ CLI_OUTER; /* 3 instructions before first read access */ \
+ P1 = R1; /* P1 = address */ \
+ P2 = R2; /* P2 = count */ \
+ SSYNC; \
+ \
+ LSETUP(1f, 2f) LC0 = P2; \
+ CLI_INNER; \
+ ops; \
+ STI_INNER; \
+ \
+ STI_OUTER; \
+ RTS; \
+ENDPROC(_ins##func)
- RTS;
-#endif
-ENDPROC(_insl)
-
-ENTRY(_insw)
-#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
- P0 = R0; /* P0 = port */
- cli R3;
- P1 = R1; /* P1 = address */
- P2 = R2; /* P2 = count */
- SSYNC;
- LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
-.Lword_loop_s: R0 = W[P0];
- W[P1++] = R0;
- NOP;
-.Lword_loop_e: NOP;
- sti R3;
- RTS;
-#else
- P0 = R0; /* P0 = port */
- P1 = R1; /* P1 = address */
- P2 = R2; /* P2 = count */
- SSYNC;
- LSETUP( .Lword_loop_s, .Lword_loop_e) LC0 = P2;
-.Lword_loop_s:
- CLI R3;
- NOP; NOP; NOP;
- R0 = W[P0];
- W[P1++] = R0;
-.Lword_loop_e:
- STI R3;
- RTS;
-
-#endif
-ENDPROC(_insw)
-
-ENTRY(_insw_8)
-#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
- P0 = R0; /* P0 = port */
- cli R3;
- P1 = R1; /* P1 = address */
- P2 = R2; /* P2 = count */
- SSYNC;
- LSETUP( .Lword8_loop_s, .Lword8_loop_e) LC0 = P2;
-.Lword8_loop_s: R0 = W[P0];
- B[P1++] = R0;
- R0 = R0 >> 8;
- B[P1++] = R0;
- NOP;
-.Lword8_loop_e: NOP;
- sti R3;
- RTS;
-#else
- P0 = R0; /* P0 = port */
- P1 = R1; /* P1 = address */
- P2 = R2; /* P2 = count */
- SSYNC;
- LSETUP( .Lword8_loop_s, .Lword8_loop_e) LC0 = P2;
-.Lword8_loop_s:
- CLI R3;
- NOP; NOP; NOP;
- R0 = W[P0];
- B[P1++] = R0;
- R0 = R0 >> 8;
- B[P1++] = R0;
- NOP;
-.Lword8_loop_e:
- STI R3;
+COMMON_INS(l, \
+ R0 = [P0]; \
+ [P1++] = R0; \
+)
- RTS;
-#endif
-ENDPROC(_insw_8)
+COMMON_INS(w, \
+ R0 = W[P0]; \
+ W[P1++] = R0; \
+)
-ENTRY(_insb)
-#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
- P0 = R0; /* P0 = port */
- cli R3;
- P1 = R1; /* P1 = address */
- P2 = R2; /* P2 = count */
- SSYNC;
- LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
-.Lbyte_loop_s: R0 = B[P0];
- B[P1++] = R0;
- NOP;
-.Lbyte_loop_e: NOP;
- sti R3;
- RTS;
-#else
- P0 = R0; /* P0 = port */
- P1 = R1; /* P1 = address */
- P2 = R2; /* P2 = count */
- SSYNC;
- LSETUP( .Lbyte_loop_s, .Lbyte_loop_e) LC0 = P2;
-.Lbyte_loop_s:
- CLI R3;
- NOP; NOP; NOP;
- R0 = B[P0];
- B[P1++] = R0;
-.Lbyte_loop_e:
- STI R3;
+COMMON_INS(w_8, \
+ R0 = W[P0]; \
+ B[P1++] = R0; \
+ R0 = R0 >> 8; \
+ B[P1++] = R0; \
+)
- RTS;
-#endif
-ENDPROC(_insb)
+COMMON_INS(b, \
+ R0 = B[P0]; \
+ B[P1++] = R0; \
+)
-ENTRY(_insl_16)
-#ifdef CONFIG_BFIN_INS_LOWOVERHEAD
- P0 = R0; /* P0 = port */
- cli R3;
- P1 = R1; /* P1 = address */
- P2 = R2; /* P2 = count */
- SSYNC;
- LSETUP( .Llong16_loop_s, .Llong16_loop_e) LC0 = P2;
-.Llong16_loop_s: R0 = [P0];
- W[P1++] = R0;
- R0 = R0 >> 16;
- W[P1++] = R0;
- NOP;
-.Llong16_loop_e: NOP;
- sti R3;
- RTS;
-#else
- P0 = R0; /* P0 = port */
- P1 = R1; /* P1 = address */
- P2 = R2; /* P2 = count */
- SSYNC;
- LSETUP( .Llong16_loop_s, .Llong16_loop_e) LC0 = P2;
-.Llong16_loop_s:
- CLI R3;
- NOP; NOP; NOP;
- R0 = [P0];
- W[P1++] = R0;
- R0 = R0 >> 16;
- W[P1++] = R0;
-.Llong16_loop_e:
- STI R3;
- RTS;
-#endif
-ENDPROC(_insl_16)
+COMMON_INS(l_16, \
+ R0 = [P0]; \
+ W[P1++] = R0; \
+ R0 = R0 >> 16; \
+ W[P1++] = R0; \
+)
diff --git a/arch/blackfin/lib/muldi3.S b/arch/blackfin/lib/muldi3.S
new file mode 100644
index 000000000000..abde120ee230
--- /dev/null
+++ b/arch/blackfin/lib/muldi3.S
@@ -0,0 +1,68 @@
+.align 2
+.global ___muldi3;
+.type ___muldi3, STT_FUNC;
+
+#ifdef CONFIG_ARITHMETIC_OPS_L1
+.section .l1.text
+#else
+.text
+#endif
+
+/*
+ R1:R0 * R3:R2
+ = R1.h:R1.l:R0.h:R0.l * R3.h:R3.l:R2.h:R2.l
+[X] = (R1.h * R3.h) * 2^96
+[X] + (R1.h * R3.l + R1.l * R3.h) * 2^80
+[X] + (R1.h * R2.h + R1.l * R3.l + R3.h * R0.h) * 2^64
+[T1] + (R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h) * 2^48
+[T2] + (R1.l * R2.l + R3.l * R0.l + R0.h * R2.h) * 2^32
+[T3] + (R0.l * R2.h + R2.l * R0.h) * 2^16
+[T4] + (R0.l * R2.l)
+
+ We can discard the first three lines marked "X" since we produce
+ only a 64 bit result. So, we need ten 16-bit multiplies.
+
+ Individual mul-acc results:
+[E1] = R1.h * R2.l + R3.h * R0.l + R1.l * R2.h + R3.l * R0.h
+[E2] = R1.l * R2.l + R3.l * R0.l + R0.h * R2.h
+[E3] = R0.l * R2.h + R2.l * R0.h
+[E4] = R0.l * R2.l
+
+ We also need to add high parts from lower-level results to higher ones:
+ E[n]c = E[n] + (E[n+1]c >> 16), where E4c := E4
+
+ One interesting property is that all parts of the result that depend
+ on the sign of the multiplication are discarded. Those would be the
+ multiplications involving R1.h and R3.h, but only the top 16 bit of
+ the 32 bit result depend on the sign, and since R1.h and R3.h only
+ occur in E1, the top half of these results is cut off.
+ So, we can just use FU mode for all of the 16-bit multiplies, and
+ ignore questions of when to use mixed mode. */
+
+___muldi3:
+ /* [SP] technically is part of the caller's frame, but we can
+ use it as scratch space. */
+ A0 = R2.H * R1.L, A1 = R2.L * R1.H (FU) || R3 = [SP + 12]; /* E1 */
+ A0 += R3.H * R0.L, A1 += R3.L * R0.H (FU) || [SP] = R4; /* E1 */
+ A0 += A1; /* E1 */
+ R4 = A0.w;
+ A0 = R0.l * R3.l (FU); /* E2 */
+ A0 += R2.l * R1.l (FU); /* E2 */
+
+ A1 = R2.L * R0.L (FU); /* E4 */
+ R3 = A1.w;
+ A1 = A1 >> 16; /* E3c */
+ A0 += R2.H * R0.H, A1 += R2.L * R0.H (FU); /* E2, E3c */
+ A1 += R0.L * R2.H (FU); /* E3c */
+ R0 = A1.w;
+ A1 = A1 >> 16; /* E2c */
+ A0 += A1; /* E2c */
+ R1 = A0.w;
+
+ /* low(result) = low(E3c):low(E4) */
+ R0 = PACK (R0.l, R3.l);
+ /* high(result) = E2c + (E1 << 16) */
+ R1.h = R1.h + R4.l (NS) || R4 = [SP];
+ RTS;
+
+.size ___muldi3, .-___muldi3
diff --git a/arch/blackfin/lib/muldi3.c b/arch/blackfin/lib/muldi3.c
deleted file mode 100644
index 303d0c6a6dba..000000000000
--- a/arch/blackfin/lib/muldi3.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * File: arch/blackfin/lib/muldi3.c
- * Based on:
- * Author:
- *
- * Created:
- * Description:
- *
- * Modified:
- * Copyright 2004-2006 Analog Devices Inc.
- *
- * Bugs: Enter bugs at http://blackfin.uclinux.org/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see the file COPYING, or write
- * to the Free Software Foundation, Inc.,
- * 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifndef SI_TYPE_SIZE
-#define SI_TYPE_SIZE 32
-#endif
-#define __ll_b (1L << (SI_TYPE_SIZE / 2))
-#define __ll_lowpart(t) ((usitype) (t) % __ll_b)
-#define __ll_highpart(t) ((usitype) (t) / __ll_b)
-#define BITS_PER_UNIT 8
-
-#if !defined(umul_ppmm)
-#define umul_ppmm(w1, w0, u, v) \
- do { \
- usitype __x0, __x1, __x2, __x3; \
- usitype __ul, __vl, __uh, __vh; \
- \
- __ul = __ll_lowpart (u); \
- __uh = __ll_highpart (u); \
- __vl = __ll_lowpart (v); \
- __vh = __ll_highpart (v); \
- \
- __x0 = (usitype) __ul * __vl; \
- __x1 = (usitype) __ul * __vh; \
- __x2 = (usitype) __uh * __vl; \
- __x3 = (usitype) __uh * __vh; \
- \
- __x1 += __ll_highpart (__x0);/* this can't give carry */ \
- __x1 += __x2; /* but this indeed can */ \
- if (__x1 < __x2) /* did we get it? */ \
- __x3 += __ll_b; /* yes, add it in the proper pos. */ \
- \
- (w1) = __x3 + __ll_highpart (__x1); \
- (w0) = __ll_lowpart (__x1) * __ll_b + __ll_lowpart (__x0); \
- } while (0)
-#endif
-
-#if !defined(__umulsidi3)
-#define __umulsidi3(u, v) \
- ({diunion __w; \
- umul_ppmm (__w.s.high, __w.s.low, u, v); \
- __w.ll; })
-#endif
-
-typedef unsigned int usitype __attribute__ ((mode(SI)));
-typedef int sitype __attribute__ ((mode(SI)));
-typedef int ditype __attribute__ ((mode(DI)));
-typedef int word_type __attribute__ ((mode(__word__)));
-
-struct distruct {
- sitype low, high;
-};
-typedef union {
- struct distruct s;
- ditype ll;
-} diunion;
-
-#ifdef CONFIG_ARITHMETIC_OPS_L1
-ditype __muldi3(ditype u, ditype v)__attribute__((l1_text));
-#endif
-
-ditype __muldi3(ditype u, ditype v)
-{
- diunion w;
- diunion uu, vv;
-
- uu.ll = u, vv.ll = v;
- w.ll = __umulsidi3(uu.s.low, vv.s.low);
- w.s.high += ((usitype) uu.s.low * (usitype) vv.s.high
- + (usitype) uu.s.high * (usitype) vv.s.low);
-
- return w.ll;
-}