summaryrefslogtreecommitdiffstats
path: root/mpi
diff options
context:
space:
mode:
authorWerner Koch <wk@gnupg.org>1997-12-23 18:30:18 +0100
committerWerner Koch <wk@gnupg.org>1997-12-23 18:30:18 +0100
commitc351df1dc5294dfd81619fea3c1ff6a7e25ba774 (patch)
treeb0fa86a4d1455e53c3a51d696eac51abe0308b0d /mpi
parentNow created by config.links (diff)
downloadgnupg2-c351df1dc5294dfd81619fea3c1ff6a7e25ba774.tar.xz
gnupg2-c351df1dc5294dfd81619fea3c1ff6a7e25ba774.zip
changed configuration stuff, replaced some Makefile.am by distfiles.
Diffstat (limited to 'mpi')
-rw-r--r--mpi/Makefile.am3
-rw-r--r--mpi/Makefile.in112
-rw-r--r--mpi/alpha/distfiles3
-rw-r--r--mpi/alpha/udiv-qrnnd.S161
-rw-r--r--mpi/config.links68
-rw-r--r--mpi/generic/distfiles7
-rw-r--r--mpi/hppa/distfiles4
-rw-r--r--mpi/hppa/mpih-add1.S70
-rw-r--r--mpi/hppa/udiv-qrnnd.S297
-rw-r--r--mpi/i386/distfiles8
-rw-r--r--mpi/mpi-inline.h1
-rw-r--r--mpi/mpi-internal.h18
-rw-r--r--mpi/mpi-pow.c21
-rw-r--r--mpi/mpih-mul.c27
14 files changed, 689 insertions, 111 deletions
diff --git a/mpi/Makefile.am b/mpi/Makefile.am
index 33e1ac456..0daf86042 100644
--- a/mpi/Makefile.am
+++ b/mpi/Makefile.am
@@ -5,12 +5,11 @@ CFLAGS += -O2
SUFFIXES = .S .s
-SUBDIRS = generic i386
EXTRA_DIST = config.links
noinst_LIBRARIES = mpi
-noinst_HEADERS = sysdep.h
+# noinst_HEADERS =
mpi_SOURCES = longlong.h \
mpi-add.c \
diff --git a/mpi/Makefile.in b/mpi/Makefile.in
index 382a222cd..c179f53f9 100644
--- a/mpi/Makefile.in
+++ b/mpi/Makefile.in
@@ -42,11 +42,10 @@ INCLUDES = -I$(top_srcdir)/include
SUFFIXES = .S .s
-SUBDIRS = generic i386
EXTRA_DIST = config.links
noinst_LIBRARIES = mpi
-noinst_HEADERS = sysdep.h
+# noinst_HEADERS =
mpi_SOURCES = longlong.h \
mpi-add.c \
@@ -99,8 +98,6 @@ EXTRA_mpi_SOURCES =
LIBFILES = libmpi.a
AR = ar
RANLIB = @RANLIB@
-HEADERS = $(noinst_HEADERS)
-
DIST_COMMON = Makefile.am Makefile.in
@@ -161,45 +158,13 @@ libmpi.a: $(mpi_OBJECTS) $(mpi_LIBADD)
$(AR) cru libmpi.a $(mpi_OBJECTS) $(mpi_LIBADD)
$(RANLIB) libmpi.a
-# This directory's subdirectories are mostly independent; you can cd
-# into them and run `make' without going through this Makefile.
-# To change the values of `make' variables: instead of editing Makefiles,
-# (1) if the variable is set in `config.status', edit `config.status'
-# (which will cause the Makefiles to be regenerated when you run `make');
-# (2) otherwise, pass the desired values on the `make' command line.
-
-@SET_MAKE@
-
-all-recursive install-data-recursive install-exec-recursive \
-installdirs-recursive install-recursive uninstall-recursive \
-check-recursive installcheck-recursive info-recursive dvi-recursive \
-mostlyclean-recursive clean-recursive distclean-recursive \
-maintainer-clean-recursive:
- for subdir in $(SUBDIRS); do \
- target=`echo $@ | sed s/-recursive//`; \
- echo making $$target in $$subdir; \
- (cd $$subdir && $(MAKE) $$target) \
- || case "$(MFLAGS)" in *k*) fail=yes;; *) exit 1;; esac; \
- done && test -z "$$fail"
+ID: $(HEADERS) $(SOURCES)
+ here=`pwd` && cd $(srcdir) && mkid -f$$here/ID $(SOURCES) $(HEADERS)
tags: TAGS
-tags-recursive:
- list="$(SUBDIRS)"; for subdir in $$list; do \
- (cd $$subdir && $(MAKE) tags); \
- done
-
-TAGS: tags-recursive $(HEADERS) $(SOURCES) $(CONFIG_HEADER) \
- $(TAGS_DEPENDENCIES)
- tags=; \
- here=`pwd`; \
- for subdir in $(SUBDIRS); do \
- test -f $$subdir/TAGS && { \
- tags="$$tags -i $$here/$$subdir/TAGS"; \
- }; \
- done; \
- test -z "$(ETAGS_ARGS)$(CONFIG_HEADER)$(SOURCES)$(HEADERS)$$tags" \
- || etags $(ETAGS_ARGS) $$tags $(CONFIG_HEADER) $(SOURCES) $(HEADERS)
+TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES)
+ here=`pwd` && cd $(srcdir) && etags $(ETAGS_ARGS) $(SOURCES) $(HEADERS) -o $$here/TAGS
mostlyclean-tags:
@@ -218,14 +183,6 @@ distdir: $(DEP_DISTFILES)
|| ln $(srcdir)/$$file $(distdir)/$$file 2> /dev/null \
|| cp -p $(srcdir)/$$file $(distdir)/$$file; \
done
- for subdir in $(SUBDIRS); do \
- test -d $(distdir)/$$subdir \
- || mkdir $(distdir)/$$subdir \
- || exit 1; \
- chmod 777 $(distdir)/$$subdir; \
- (cd $$subdir && $(MAKE) distdir=../$(distdir)/$$subdir distdir) \
- || exit 1; \
- done
# This fragment is probably only useful for maintainers. It relies on
# GNU make and gcc. It is only included in the generated Makefile.in
@@ -253,30 +210,28 @@ $(srcdir)/.deps/%.P: $(srcdir)/%.c
fi
# End of maintainer-only section
-info: info-recursive
-
-dvi: dvi-recursive
+info:
-check: all check-recursive
+dvi:
-installcheck: installcheck-recursive
+check: all
-all-am: $(LIBFILES) $(HEADERS) Makefile
+installcheck:
-install-exec: install-exec-recursive
+install-exec:
-install-data: install-data-recursive
+install-data:
-install: install-recursive
+install: install-exec install-data all
@:
-uninstall: uninstall-recursive
+uninstall:
-all: all-recursive all-am
+all: $(LIBFILES) Makefile
install-strip:
$(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install
-installdirs: installdirs-recursive
+installdirs:
mostlyclean-generic:
@@ -292,42 +247,29 @@ distclean-generic:
maintainer-clean-generic:
test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES)
test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES)
-mostlyclean-am: mostlyclean-noinstLIBRARIES mostlyclean-compile \
+mostlyclean: mostlyclean-noinstLIBRARIES mostlyclean-compile \
mostlyclean-tags mostlyclean-generic
-clean-am: clean-noinstLIBRARIES clean-compile clean-tags clean-generic \
- mostlyclean-am
-
-distclean-am: distclean-noinstLIBRARIES distclean-compile \
- distclean-tags distclean-generic clean-am
+clean: clean-noinstLIBRARIES clean-compile clean-tags clean-generic \
+ mostlyclean
-maintainer-clean-am: maintainer-clean-noinstLIBRARIES \
- maintainer-clean-compile maintainer-clean-tags \
- maintainer-clean-generic distclean-am
-
-mostlyclean: mostlyclean-am mostlyclean-recursive
-
-clean: clean-am clean-recursive
-
-distclean: distclean-am distclean-recursive
+distclean: distclean-noinstLIBRARIES distclean-compile distclean-tags \
+ distclean-generic clean
rm -f config.status
-maintainer-clean: maintainer-clean-am maintainer-clean-recursive
+maintainer-clean: maintainer-clean-noinstLIBRARIES \
+ maintainer-clean-compile maintainer-clean-tags \
+ maintainer-clean-generic distclean
@echo "This command is intended for maintainers to use;"
@echo "it deletes files that may require special tools to rebuild."
.PHONY: default mostlyclean-noinstLIBRARIES distclean-noinstLIBRARIES \
clean-noinstLIBRARIES maintainer-clean-noinstLIBRARIES \
mostlyclean-compile distclean-compile clean-compile \
-maintainer-clean-compile install-data-recursive \
-uninstall-data-recursive install-exec-recursive \
-uninstall-exec-recursive installdirs-recursive uninstalldirs-recursive \
-all-recursive check-recursive installcheck-recursive info-recursive \
-dvi-recursive mostlyclean-recursive distclean-recursive clean-recursive \
-maintainer-clean-recursive tags tags-recursive mostlyclean-tags \
-distclean-tags clean-tags maintainer-clean-tags distdir info dvi check \
-installcheck all-am install-exec install-data install uninstall all \
-installdirs mostlyclean-generic distclean-generic clean-generic \
+maintainer-clean-compile tags mostlyclean-tags distclean-tags \
+clean-tags maintainer-clean-tags distdir info dvi check installcheck \
+install-exec install-data install uninstall all installdirs \
+mostlyclean-generic distclean-generic clean-generic \
maintainer-clean-generic clean mostlyclean distclean maintainer-clean
CFLAGS += -O2
diff --git a/mpi/alpha/distfiles b/mpi/alpha/distfiles
new file mode 100644
index 000000000..4dd0ffe3a
--- /dev/null
+++ b/mpi/alpha/distfiles
@@ -0,0 +1,3 @@
+
+udiv-qrnnd.S
+
diff --git a/mpi/alpha/udiv-qrnnd.S b/mpi/alpha/udiv-qrnnd.S
new file mode 100644
index 000000000..487991cd8
--- /dev/null
+++ b/mpi/alpha/udiv-qrnnd.S
@@ -0,0 +1,161 @@
+/* Alpha 21064 __udiv_qrnnd
+ *
+ * Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc.
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+
+ .set noreorder
+ .set noat
+.text
+ .align 3
+ .globl __udiv_qrnnd
+ .ent __udiv_qrnnd
+__udiv_qrnnd:
+ .frame $30,0,$26,0
+ .prologue 0
+#define cnt $2
+#define tmp $3
+#define rem_ptr $16
+#define n1 $17
+#define n0 $18
+#define d $19
+#define qb $20
+
+ ldiq cnt,16
+ blt d,.Largedivisor
+
+.Loop1: cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule d,n1,qb
+ subq n1,d,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ subq cnt,1,cnt
+ bgt cnt,.Loop1
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+.Largedivisor:
+ and n0,1,$4
+
+ srl n0,1,n0
+ sll n1,63,tmp
+ or tmp,n0,n0
+ srl n1,1,n1
+
+ and d,1,$6
+ srl d,1,$5
+ addq $5,$6,$5
+
+.Loop2: cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ cmplt n0,0,tmp
+ addq n1,n1,n1
+ bis n1,tmp,n1
+ addq n0,n0,n0
+ cmpule $5,n1,qb
+ subq n1,$5,tmp
+ cmovne qb,tmp,n1
+ bis n0,qb,n0
+ subq cnt,1,cnt
+ bgt cnt,.Loop2
+
+ addq n1,n1,n1
+ addq $4,n1,n1
+ bne $6,.LOdd
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+.LOdd:
+ /* q' in n0. r' in n1 */
+ addq n1,n0,n1
+ cmpult n1,n0,tmp # tmp := carry from addq
+ beq tmp,.LLp6
+ addq n0,1,n0
+ subq n1,d,n1
+.LLp6: cmpult n1,d,tmp
+ bne tmp,.LLp7
+ addq n0,1,n0
+ subq n1,d,n1
+.LLp7:
+ stq n1,0(rem_ptr)
+ bis $31,n0,$0
+ ret $31,($26),1
+
+ .end __udiv_qrnnd
diff --git a/mpi/config.links b/mpi/config.links
index e48cf7a08..0560b7de5 100644
--- a/mpi/config.links
+++ b/mpi/config.links
@@ -2,9 +2,22 @@
# this should set $mpi_ln_src and mpi_ln_dst.
# Note: this is called from the above directory.
-echo '# created by config.links - do not edit' >./mpi/asm-syntax.h
+
+mpi_extra_modules=
+
+echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h
case "${target}" in
+ i[345]86*-*-linuxaout* | i[345]86*-*-linuxoldld* | i[345]86*-*-*bsd*)
+ echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
+ echo '#include "./i386/syntax.h"' >>./mpi/asm-syntax.h
+ path="i386"
+ ;;
+ i[56]86*-*-linuxaout* | i[56]86*-*-linuxoldld* | i[56]86*-*-*bsd*)
+ echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h
+ echo '#include "./i386/syntax.h"' >>./mpi/asm-syntax.h
+ path="i586 i386"
+ ;;
i[3456]86*-*-*)
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
echo '#include "./i386/syntax.h"' >>./mpi/asm-syntax.h
@@ -13,7 +26,27 @@ case "${target}" in
i[56]86*-*-* | pentium-*-* | pentiumpro-*-*)
echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h
echo '#include "./i586/syntax.h"' >>./mpi/asm-syntax.h
- path="i586"
+ path="i586 i386"
+ ;;
+ alpha*-*-*)
+ echo '/* configured for alpha */' >>./mpi/asm-syntax.h
+ path="alpha"
+ mpi_extra_modules="udiv-qrnnd"
+ ;;
+ hppa7000*-*-*)
+ echo '/* configured for HPPA (pa7000) */' >>./mpi/asm-syntax.h
+ path="hppa1_1 hppa"
+ mpi_extra_modules="udiv-qrnnd"
+ ;;
+ hppa1.0*-*-*)
+ echo '/* configured for HPPA 1.0 */' >>./mpi/asm-syntax.h
+ path="hppa"
+ mpi_extra_modules="udiv-qrnnd"
+ ;;
+ hppa*-*-*) # assume pa7100
+ echo '/* configured for HPPA (pa7100) */' >>./mpi/asm-syntax.h
+ path="pa7100 hppa1_1 hppa"
+ mpi_extra_modules="udiv-qrnnd"
;;
*)
echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h
@@ -21,9 +54,38 @@ case "${target}" in
;;
esac
+case "${target}" in
+ *-*-linuxaout* | *-*-linuxoldld*)
+ needs_underscore="y"
+ ;;
+ *-*-linux* | *-sysv* | *-solaris*)
+ needs_underscore="n"
+ ;;
+ *)
+ needs_underscore="y"
+ ;;
+esac
+
+
+# Make sysdep.h
+echo '/* created by config.links - do not edit */' >./mpi/sysdep.h
+if test "$needs_underscore" = "y" ; then
+ cat <<EOF >>./mpi/sysdep.h
+#if __STDC__
+#define C_SYMBOL_NAME(name) _##name
+#else
+#define C_SYMBOL_NAME(name) _/**/name
+#endif
+EOF
+else
+ cat <<EOF >>./mpi/sysdep.h
+#define C_SYMBOL_NAME(name) name
+EOF
+fi
+
# fixme: grep these modules from Makefile.in
-mpi_ln_modules="mpih-add1 mpih-mul1 mpih-mul2 mpih-mul3 \
+mpi_ln_modules="${mpi_extra_modules} mpih-add1 mpih-mul1 mpih-mul2 mpih-mul3 \
mpih-shift mpih-sub1"
mpi_ln_objects=
diff --git a/mpi/generic/distfiles b/mpi/generic/distfiles
new file mode 100644
index 000000000..1febb49dd
--- /dev/null
+++ b/mpi/generic/distfiles
@@ -0,0 +1,7 @@
+mpih-add1.c
+mpih-mul1.c
+mpih-mul2.c
+mpih-mul3.c
+mpih-shift.c
+mpih-sub1.c
+
diff --git a/mpi/hppa/distfiles b/mpi/hppa/distfiles
new file mode 100644
index 000000000..7ca77f801
--- /dev/null
+++ b/mpi/hppa/distfiles
@@ -0,0 +1,4 @@
+
+mpih-add1.S
+udiv-qrnnd.S
+
diff --git a/mpi/hppa/mpih-add1.S b/mpi/hppa/mpih-add1.S
new file mode 100644
index 000000000..a30e80b83
--- /dev/null
+++ b/mpi/hppa/mpih-add1.S
@@ -0,0 +1,70 @@
+/* hppa add_n -- Add two limb vectors of the same length > 0 and store
+ * sum in a third limb vector.
+ *
+ * Copyright (C) 1992, 1994 Free Software Foundation, Inc.
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+
+/*******************
+ * mpi_limb_t
+ * mpihelp_add_n( mpi_ptr_t res_ptr, (gr26)
+ * mpi_ptr_t s1_ptr, (gr25)
+ * mpi_ptr_t s2_ptr, (gr24)
+ * mpi_size_t size) (gr23)
+ *
+ * One might want to unroll this as for other processors, but it turns
+ * out that the data cache contention after a store makes such
+ * unrolling useless. We can't come under 5 cycles/limb anyway.
+ */
+
+ .code
+ .export __mpihelp_add_n
+__mpihelp_add_n
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+
+ addib,= -1,%r23,L$end ; check for (SIZE == 1)
+ add %r20,%r19,%r28 ; add first limbs ignoring cy
+
+L$loop ldws,ma 4(0,%r25),%r20
+ ldws,ma 4(0,%r24),%r19
+ stws,ma %r28,4(0,%r26)
+ addib,<> -1,%r23,L$loop
+ addc %r20,%r19,%r28
+
+L$end stws %r28,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r0,%r28
+
+ .exit
+ .procend
diff --git a/mpi/hppa/udiv-qrnnd.S b/mpi/hppa/udiv-qrnnd.S
new file mode 100644
index 000000000..849238349
--- /dev/null
+++ b/mpi/hppa/udiv-qrnnd.S
@@ -0,0 +1,297 @@
+/* HP-PA __udiv_qrnnd division support, used from longlong.h.
+ * This version runs fast on pre-PA7000 CPUs.
+ *
+ * Copyright (C) 1993, 1994 Free Software Foundation, Inc.
+ * Copyright (c) 1997 by Werner Koch (dd9jn)
+ *
+ * This file is part of G10.
+ *
+ * G10 is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * G10 is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ * Actually it's the same code with only minor changes in the
+ * way the data is stored; this is to support the abstraction
+ * of an optional secure memory allocation which may be used
+ * to avoid revealing of sensitive data due to paging etc.
+ * The GNU MP Library itself is published under the LGPL;
+ * however I decided to publish this code under the plain GPL.
+ */
+
+
+
+/* INPUT PARAMETERS
+ * rem_ptr gr26
+ * n1 gr25
+ * n0 gr24
+ * d gr23
+ *
+ * The code size is a bit excessive. We could merge the last two ds;addc
+ * sequences by simply moving the "bb,< Odd" instruction down. The only
+ * trouble is the FFFFFFFF code that would need some hacking.
+ */
+
+ .code
+ .export __udiv_qrnnd
+__udiv_qrnnd
+ .proc
+ .callinfo frame=0,no_calls
+ .entry
+
+ comb,< %r23,0,L$largedivisor
+ sub %r0,%r23,%r1 ; clear cy as side-effect
+ ds %r0,%r1,%r0
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r23,%r25
+ addc %r24,%r24,%r28
+ ds %r25,%r23,%r25
+ comclr,>= %r25,%r0,%r0
+ addl %r25,%r23,%r25
+ stws %r25,0(0,%r26)
+ bv 0(%r2)
+ addc %r28,%r28,%r28
+
+L$largedivisor
+ extru %r24,31,1,%r19 ; r19 = n0 & 1
+ bb,< %r23,31,L$odd
+ extru %r23,30,31,%r22 ; r22 = d >> 1
+ shd %r25,%r24,1,%r24 ; r24 = new n0
+ extru %r25,30,31,%r25 ; r25 = new n1
+ sub %r0,%r22,%r21
+ ds %r0,%r21,%r0
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ comclr,>= %r25,%r0,%r0
+ addl %r25,%r22,%r25
+ sh1addl %r25,%r19,%r25
+ stws %r25,0(0,%r26)
+ bv 0(%r2)
+ addc %r24,%r24,%r28
+
+L$odd addib,sv,n 1,%r22,L$FF.. ; r22 = (d / 2 + 1)
+ shd %r25,%r24,1,%r24 ; r24 = new n0
+ extru %r25,30,31,%r25 ; r25 = new n1
+ sub %r0,%r22,%r21
+ ds %r0,%r21,%r0
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r24
+ ds %r25,%r22,%r25
+ addc %r24,%r24,%r28
+ comclr,>= %r25,%r0,%r0
+ addl %r25,%r22,%r25
+ sh1addl %r25,%r19,%r25
+; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25
+ add,nuv %r28,%r25,%r25
+ addl %r25,%r1,%r25
+ addc %r0,%r28,%r28
+ sub,<< %r25,%r23,%r0
+ addl %r25,%r1,%r25
+ stws %r25,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r28,%r28
+
+; This is just a special case of the code above.
+; We come here when d == 0xFFFFFFFF
+L$FF.. add,uv %r25,%r24,%r24
+ sub,<< %r24,%r23,%r0
+ ldo 1(%r24),%r24
+ stws %r24,0(0,%r26)
+ bv 0(%r2)
+ addc %r0,%r25,%r28
+
+ .exit
+ .procend
diff --git a/mpi/i386/distfiles b/mpi/i386/distfiles
new file mode 100644
index 000000000..34de91574
--- /dev/null
+++ b/mpi/i386/distfiles
@@ -0,0 +1,8 @@
+mpih-add1.S
+mpih-mul1.S
+mpih-mul2.S
+mpih-mul3.S
+mpih-shift.S
+mpih-sub1.S
+syntax.h
+
diff --git a/mpi/mpi-inline.h b/mpi/mpi-inline.h
index 4d19942ad..03b5fbc86 100644
--- a/mpi/mpi-inline.h
+++ b/mpi/mpi-inline.h
@@ -123,5 +123,4 @@ mpihelp_sub( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
}
-
#endif /*G10_MPI_INLINE_H*/
diff --git a/mpi/mpi-internal.h b/mpi/mpi-internal.h
index 93ed688ae..f084c7e8a 100644
--- a/mpi/mpi-internal.h
+++ b/mpi/mpi-internal.h
@@ -32,6 +32,21 @@
#include "mpi.h"
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+ * value which is good on most machines. */
+
+/* tested 4, 16, 32 and 64, where 16 gave the best performance when
+ * checking a 768 and a 1024 bit ElGamal signature.
+ * (wk 22.12.97) */
+#ifndef KARATSUBA_THRESHOLD
+ #define KARATSUBA_THRESHOLD 16
+#endif
+
+/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
+#if KARATSUBA_THRESHOLD < 2
+ #undef KARATSUBA_THRESHOLD
+ #define KARATSUBA_THRESHOLD 2
+#endif
typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */
@@ -174,6 +189,9 @@ void mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
mpi_size_t size);
mpi_limb_t mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
mpi_ptr_t vp, mpi_size_t vsize);
+void mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size );
+void mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
+ mpi_ptr_t tspace);
/*-- mpihelp-mul_1.c (or xxx/cpu/*.S) --*/
mpi_limb_t mpihelp_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
diff --git a/mpi/mpi-pow.c b/mpi/mpi-pow.c
index 43514567b..fcf500c36 100644
--- a/mpi/mpi-pow.c
+++ b/mpi/mpi-pow.c
@@ -51,6 +51,8 @@ mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
mpi_ptr_t mp_marker=NULL, bp_marker=NULL, ep_marker=NULL;
mpi_ptr_t xp_marker=NULL;
int assign_rp=0;
+ mpi_ptr_t tspace = NULL;
+ mpi_size_t tsize;
esize = exp->nlimbs;
msize = mod->nlimbs;
@@ -179,7 +181,23 @@ mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
mpi_ptr_t tp;
mpi_size_t xsize;
- mpihelp_mul_n(xp, rp, rp, rsize);
+ /*mpihelp_mul_n(xp, rp, rp, rsize);*/
+ if( rsize < KARATSUBA_THRESHOLD )
+ mpih_sqr_n_basecase( xp, rp, rsize );
+ else {
+ if( !tspace ) {
+ tsize = 2 * rsize;
+ tspace = mpi_alloc_limb_space( tsize, 0 );
+ }
+ else if( tsize < (2*rsize) ) {
+ mpi_free_limb_space( tspace );
+ tsize = 2 * rsize;
+ tspace = mpi_alloc_limb_space( tsize, 0 );
+
+ }
+ mpih_sqr_n( xp, rp, rsize, tspace );
+ }
+
xsize = 2 * rsize;
if( xsize > msize ) {
mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize);
@@ -258,5 +276,6 @@ mpi_powm( MPI res, MPI base, MPI exp, MPI mod)
if( bp_marker ) mpi_free_limb_space( bp_marker );
if( ep_marker ) mpi_free_limb_space( ep_marker );
if( xp_marker ) mpi_free_limb_space( xp_marker );
+ if( tspace ) mpi_free_limb_space( tspace );
}
diff --git a/mpi/mpih-mul.c b/mpi/mpih-mul.c
index 0e52488ec..3b6b732b6 100644
--- a/mpi/mpih-mul.c
+++ b/mpi/mpih-mul.c
@@ -33,17 +33,6 @@
#include "mpi-internal.h"
#include "longlong.h"
-/* If KARATSUBA_THRESHOLD is not already defined, define it to a
- * value which is good on most machines. */
-#ifndef KARATSUBA_THRESHOLD
- #define KARATSUBA_THRESHOLD 32
-#endif
-
-/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */
-#if KARATSUBA_THRESHOLD < 2
- #undef KARATSUBA_THRESHOLD
- #define KARATSUBA_THRESHOLD 2
-#endif
#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
@@ -57,9 +46,9 @@
#define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \
do { \
if ((size) < KARATSUBA_THRESHOLD) \
- sqr_n_basecase (prodp, up, size); \
+ mpih_sqr_n_basecase (prodp, up, size); \
else \
- sqr_n (prodp, up, size, tspace); \
+ mpih_sqr_n (prodp, up, size, tspace); \
} while (0);
@@ -235,8 +224,8 @@ mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
}
-static void
-sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size )
+void
+mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size )
{
mpi_size_t i;
mpi_limb_t cy_limb;
@@ -276,8 +265,8 @@ sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size )
}
-static void
-sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
+void
+mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
{
if( size & 1 ) {
/* The size is odd, the code code below doesn't handle that.
@@ -361,11 +350,11 @@ mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
if( up == vp ) {
if( size < KARATSUBA_THRESHOLD )
- sqr_n_basecase( prodp, up, size );
+ mpih_sqr_n_basecase( prodp, up, size );
else {
mpi_ptr_t tspace;
tspace = mpi_alloc_limb_space( 2 * size, 0 );
- sqr_n( prodp, up, size, tspace );
+ mpih_sqr_n( prodp, up, size, tspace );
mpi_free_limb_space( tspace );
}
}