diff options
author | Werner Koch <wk@gnupg.org> | 1997-12-23 18:30:18 +0100 |
---|---|---|
committer | Werner Koch <wk@gnupg.org> | 1997-12-23 18:30:18 +0100 |
commit | c351df1dc5294dfd81619fea3c1ff6a7e25ba774 (patch) | |
tree | b0fa86a4d1455e53c3a51d696eac51abe0308b0d /mpi | |
parent | Now created by config.links (diff) | |
download | gnupg2-c351df1dc5294dfd81619fea3c1ff6a7e25ba774.tar.xz gnupg2-c351df1dc5294dfd81619fea3c1ff6a7e25ba774.zip |
changed configuration stuff, replaced some Makefile.am by distfiles.
Diffstat (limited to 'mpi')
-rw-r--r-- | mpi/Makefile.am | 3 | ||||
-rw-r--r-- | mpi/Makefile.in | 112 | ||||
-rw-r--r-- | mpi/alpha/distfiles | 3 | ||||
-rw-r--r-- | mpi/alpha/udiv-qrnnd.S | 161 | ||||
-rw-r--r-- | mpi/config.links | 68 | ||||
-rw-r--r-- | mpi/generic/distfiles | 7 | ||||
-rw-r--r-- | mpi/hppa/distfiles | 4 | ||||
-rw-r--r-- | mpi/hppa/mpih-add1.S | 70 | ||||
-rw-r--r-- | mpi/hppa/udiv-qrnnd.S | 297 | ||||
-rw-r--r-- | mpi/i386/distfiles | 8 | ||||
-rw-r--r-- | mpi/mpi-inline.h | 1 | ||||
-rw-r--r-- | mpi/mpi-internal.h | 18 | ||||
-rw-r--r-- | mpi/mpi-pow.c | 21 | ||||
-rw-r--r-- | mpi/mpih-mul.c | 27 |
14 files changed, 689 insertions, 111 deletions
diff --git a/mpi/Makefile.am b/mpi/Makefile.am index 33e1ac456..0daf86042 100644 --- a/mpi/Makefile.am +++ b/mpi/Makefile.am @@ -5,12 +5,11 @@ CFLAGS += -O2 SUFFIXES = .S .s -SUBDIRS = generic i386 EXTRA_DIST = config.links noinst_LIBRARIES = mpi -noinst_HEADERS = sysdep.h +# noinst_HEADERS = mpi_SOURCES = longlong.h \ mpi-add.c \ diff --git a/mpi/Makefile.in b/mpi/Makefile.in index 382a222cd..c179f53f9 100644 --- a/mpi/Makefile.in +++ b/mpi/Makefile.in @@ -42,11 +42,10 @@ INCLUDES = -I$(top_srcdir)/include SUFFIXES = .S .s -SUBDIRS = generic i386 EXTRA_DIST = config.links noinst_LIBRARIES = mpi -noinst_HEADERS = sysdep.h +# noinst_HEADERS = mpi_SOURCES = longlong.h \ mpi-add.c \ @@ -99,8 +98,6 @@ EXTRA_mpi_SOURCES = LIBFILES = libmpi.a AR = ar RANLIB = @RANLIB@ -HEADERS = $(noinst_HEADERS) - DIST_COMMON = Makefile.am Makefile.in @@ -161,45 +158,13 @@ libmpi.a: $(mpi_OBJECTS) $(mpi_LIBADD) $(AR) cru libmpi.a $(mpi_OBJECTS) $(mpi_LIBADD) $(RANLIB) libmpi.a -# This directory's subdirectories are mostly independent; you can cd -# into them and run `make' without going through this Makefile. -# To change the values of `make' variables: instead of editing Makefiles, -# (1) if the variable is set in `config.status', edit `config.status' -# (which will cause the Makefiles to be regenerated when you run `make'); -# (2) otherwise, pass the desired values on the `make' command line. - -@SET_MAKE@ - -all-recursive install-data-recursive install-exec-recursive \ -installdirs-recursive install-recursive uninstall-recursive \ -check-recursive installcheck-recursive info-recursive dvi-recursive \ -mostlyclean-recursive clean-recursive distclean-recursive \ -maintainer-clean-recursive: - for subdir in $(SUBDIRS); do \ - target=`echo $@ | sed s/-recursive//`; \ - echo making $$target in $$subdir; \ - (cd $$subdir && $(MAKE) $$target) \ - || case "$(MFLAGS)" in *k*) fail=yes;; *) exit 1;; esac; \ - done && test -z "$$fail" +ID: $(HEADERS) $(SOURCES) + here=`pwd` && cd $(srcdir) && mkid -f$$here/ID $(SOURCES) $(HEADERS) tags: TAGS -tags-recursive: - list="$(SUBDIRS)"; for subdir in $$list; do \ - (cd $$subdir && $(MAKE) tags); \ - done - -TAGS: tags-recursive $(HEADERS) $(SOURCES) $(CONFIG_HEADER) \ - $(TAGS_DEPENDENCIES) - tags=; \ - here=`pwd`; \ - for subdir in $(SUBDIRS); do \ - test -f $$subdir/TAGS && { \ - tags="$$tags -i $$here/$$subdir/TAGS"; \ - }; \ - done; \ - test -z "$(ETAGS_ARGS)$(CONFIG_HEADER)$(SOURCES)$(HEADERS)$$tags" \ - || etags $(ETAGS_ARGS) $$tags $(CONFIG_HEADER) $(SOURCES) $(HEADERS) +TAGS: $(HEADERS) $(SOURCES) $(TAGS_DEPENDENCIES) + here=`pwd` && cd $(srcdir) && etags $(ETAGS_ARGS) $(SOURCES) $(HEADERS) -o $$here/TAGS mostlyclean-tags: @@ -218,14 +183,6 @@ distdir: $(DEP_DISTFILES) || ln $(srcdir)/$$file $(distdir)/$$file 2> /dev/null \ || cp -p $(srcdir)/$$file $(distdir)/$$file; \ done - for subdir in $(SUBDIRS); do \ - test -d $(distdir)/$$subdir \ - || mkdir $(distdir)/$$subdir \ - || exit 1; \ - chmod 777 $(distdir)/$$subdir; \ - (cd $$subdir && $(MAKE) distdir=../$(distdir)/$$subdir distdir) \ - || exit 1; \ - done # This fragment is probably only useful for maintainers. It relies on # GNU make and gcc. It is only included in the generated Makefile.in @@ -253,30 +210,28 @@ $(srcdir)/.deps/%.P: $(srcdir)/%.c fi # End of maintainer-only section -info: info-recursive - -dvi: dvi-recursive +info: -check: all check-recursive +dvi: -installcheck: installcheck-recursive +check: all -all-am: $(LIBFILES) $(HEADERS) Makefile +installcheck: -install-exec: install-exec-recursive +install-exec: -install-data: install-data-recursive +install-data: -install: install-recursive +install: install-exec install-data all @: -uninstall: uninstall-recursive +uninstall: -all: all-recursive all-am +all: $(LIBFILES) Makefile install-strip: $(MAKE) INSTALL_PROGRAM='$(INSTALL_PROGRAM) -s' install -installdirs: installdirs-recursive +installdirs: mostlyclean-generic: @@ -292,42 +247,29 @@ distclean-generic: maintainer-clean-generic: test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) -mostlyclean-am: mostlyclean-noinstLIBRARIES mostlyclean-compile \ +mostlyclean: mostlyclean-noinstLIBRARIES mostlyclean-compile \ mostlyclean-tags mostlyclean-generic -clean-am: clean-noinstLIBRARIES clean-compile clean-tags clean-generic \ - mostlyclean-am - -distclean-am: distclean-noinstLIBRARIES distclean-compile \ - distclean-tags distclean-generic clean-am +clean: clean-noinstLIBRARIES clean-compile clean-tags clean-generic \ + mostlyclean -maintainer-clean-am: maintainer-clean-noinstLIBRARIES \ - maintainer-clean-compile maintainer-clean-tags \ - maintainer-clean-generic distclean-am - -mostlyclean: mostlyclean-am mostlyclean-recursive - -clean: clean-am clean-recursive - -distclean: distclean-am distclean-recursive +distclean: distclean-noinstLIBRARIES distclean-compile distclean-tags \ + distclean-generic clean rm -f config.status -maintainer-clean: maintainer-clean-am maintainer-clean-recursive +maintainer-clean: maintainer-clean-noinstLIBRARIES \ + maintainer-clean-compile maintainer-clean-tags \ + maintainer-clean-generic distclean @echo "This command is intended for maintainers to use;" @echo "it deletes files that may require special tools to rebuild." .PHONY: default mostlyclean-noinstLIBRARIES distclean-noinstLIBRARIES \ clean-noinstLIBRARIES maintainer-clean-noinstLIBRARIES \ mostlyclean-compile distclean-compile clean-compile \ -maintainer-clean-compile install-data-recursive \ -uninstall-data-recursive install-exec-recursive \ -uninstall-exec-recursive installdirs-recursive uninstalldirs-recursive \ -all-recursive check-recursive installcheck-recursive info-recursive \ -dvi-recursive mostlyclean-recursive distclean-recursive clean-recursive \ -maintainer-clean-recursive tags tags-recursive mostlyclean-tags \ -distclean-tags clean-tags maintainer-clean-tags distdir info dvi check \ -installcheck all-am install-exec install-data install uninstall all \ -installdirs mostlyclean-generic distclean-generic clean-generic \ +maintainer-clean-compile tags mostlyclean-tags distclean-tags \ +clean-tags maintainer-clean-tags distdir info dvi check installcheck \ +install-exec install-data install uninstall all installdirs \ +mostlyclean-generic distclean-generic clean-generic \ maintainer-clean-generic clean mostlyclean distclean maintainer-clean CFLAGS += -O2 diff --git a/mpi/alpha/distfiles b/mpi/alpha/distfiles new file mode 100644 index 000000000..4dd0ffe3a --- /dev/null +++ b/mpi/alpha/distfiles @@ -0,0 +1,3 @@ + +udiv-qrnnd.S + diff --git a/mpi/alpha/udiv-qrnnd.S b/mpi/alpha/udiv-qrnnd.S new file mode 100644 index 000000000..487991cd8 --- /dev/null +++ b/mpi/alpha/udiv-qrnnd.S @@ -0,0 +1,161 @@ +/* Alpha 21064 __udiv_qrnnd + * + * Copyright (C) 1992, 1994, 1995 Free Software Foundation, Inc. + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + + + .set noreorder + .set noat +.text + .align 3 + .globl __udiv_qrnnd + .ent __udiv_qrnnd +__udiv_qrnnd: + .frame $30,0,$26,0 + .prologue 0 +#define cnt $2 +#define tmp $3 +#define rem_ptr $16 +#define n1 $17 +#define n0 $18 +#define d $19 +#define qb $20 + + ldiq cnt,16 + blt d,.Largedivisor + +.Loop1: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule d,n1,qb + subq n1,d,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,.Loop1 + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +.Largedivisor: + and n0,1,$4 + + srl n0,1,n0 + sll n1,63,tmp + or tmp,n0,n0 + srl n1,1,n1 + + and d,1,$6 + srl d,1,$5 + addq $5,$6,$5 + +.Loop2: cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + cmplt n0,0,tmp + addq n1,n1,n1 + bis n1,tmp,n1 + addq n0,n0,n0 + cmpule $5,n1,qb + subq n1,$5,tmp + cmovne qb,tmp,n1 + bis n0,qb,n0 + subq cnt,1,cnt + bgt cnt,.Loop2 + + addq n1,n1,n1 + addq $4,n1,n1 + bne $6,.LOdd + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + +.LOdd: + /* q' in n0. r' in n1 */ + addq n1,n0,n1 + cmpult n1,n0,tmp # tmp := carry from addq + beq tmp,.LLp6 + addq n0,1,n0 + subq n1,d,n1 +.LLp6: cmpult n1,d,tmp + bne tmp,.LLp7 + addq n0,1,n0 + subq n1,d,n1 +.LLp7: + stq n1,0(rem_ptr) + bis $31,n0,$0 + ret $31,($26),1 + + .end __udiv_qrnnd diff --git a/mpi/config.links b/mpi/config.links index e48cf7a08..0560b7de5 100644 --- a/mpi/config.links +++ b/mpi/config.links @@ -2,9 +2,22 @@ # this should set $mpi_ln_src and mpi_ln_dst. # Note: this is called from the above directory. -echo '# created by config.links - do not edit' >./mpi/asm-syntax.h + +mpi_extra_modules= + +echo '/* created by config.links - do not edit */' >./mpi/asm-syntax.h case "${target}" in + i[345]86*-*-linuxaout* | i[345]86*-*-linuxoldld* | i[345]86*-*-*bsd*) + echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h + echo '#include "./i386/syntax.h"' >>./mpi/asm-syntax.h + path="i386" + ;; + i[56]86*-*-linuxaout* | i[56]86*-*-linuxoldld* | i[56]86*-*-*bsd*) + echo '#define BSD_SYNTAX' >>./mpi/asm-syntax.h + echo '#include "./i386/syntax.h"' >>./mpi/asm-syntax.h + path="i586 i386" + ;; i[3456]86*-*-*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h echo '#include "./i386/syntax.h"' >>./mpi/asm-syntax.h @@ -13,7 +26,27 @@ case "${target}" in i[56]86*-*-* | pentium-*-* | pentiumpro-*-*) echo '#define ELF_SYNTAX' >>./mpi/asm-syntax.h echo '#include "./i586/syntax.h"' >>./mpi/asm-syntax.h - path="i586" + path="i586 i386" + ;; + alpha*-*-*) + echo '/* configured for alpha */' >>./mpi/asm-syntax.h + path="alpha" + mpi_extra_modules="udiv-qrnnd" + ;; + hppa7000*-*-*) + echo '/* configured for HPPA (pa7000) */' >>./mpi/asm-syntax.h + path="hppa1_1 hppa" + mpi_extra_modules="udiv-qrnnd" + ;; + hppa1.0*-*-*) + echo '/* configured for HPPA 1.0 */' >>./mpi/asm-syntax.h + path="hppa" + mpi_extra_modules="udiv-qrnnd" + ;; + hppa*-*-*) # assume pa7100 + echo '/* configured for HPPA (pa7100) */' >>./mpi/asm-syntax.h + path="pa7100 hppa1_1 hppa" + mpi_extra_modules="udiv-qrnnd" ;; *) echo '/* No assembler modules configured */' >>./mpi/asm-syntax.h @@ -21,9 +54,38 @@ case "${target}" in ;; esac +case "${target}" in + *-*-linuxaout* | *-*-linuxoldld*) + needs_underscore="y" + ;; + *-*-linux* | *-sysv* | *-solaris*) + needs_underscore="n" + ;; + *) + needs_underscore="y" + ;; +esac + + +# Make sysdep.h +echo '/* created by config.links - do not edit */' >./mpi/sysdep.h +if test "$needs_underscore" = "y" ; then + cat <<EOF >>./mpi/sysdep.h +#if __STDC__ +#define C_SYMBOL_NAME(name) _##name +#else +#define C_SYMBOL_NAME(name) _/**/name +#endif +EOF +else + cat <<EOF >>./mpi/sysdep.h +#define C_SYMBOL_NAME(name) name +EOF +fi + # fixme: grep these modules from Makefile.in -mpi_ln_modules="mpih-add1 mpih-mul1 mpih-mul2 mpih-mul3 \ +mpi_ln_modules="${mpi_extra_modules} mpih-add1 mpih-mul1 mpih-mul2 mpih-mul3 \ mpih-shift mpih-sub1" mpi_ln_objects= diff --git a/mpi/generic/distfiles b/mpi/generic/distfiles new file mode 100644 index 000000000..1febb49dd --- /dev/null +++ b/mpi/generic/distfiles @@ -0,0 +1,7 @@ +mpih-add1.c +mpih-mul1.c +mpih-mul2.c +mpih-mul3.c +mpih-shift.c +mpih-sub1.c + diff --git a/mpi/hppa/distfiles b/mpi/hppa/distfiles new file mode 100644 index 000000000..7ca77f801 --- /dev/null +++ b/mpi/hppa/distfiles @@ -0,0 +1,4 @@ + +mpih-add1.S +udiv-qrnnd.S + diff --git a/mpi/hppa/mpih-add1.S b/mpi/hppa/mpih-add1.S new file mode 100644 index 000000000..a30e80b83 --- /dev/null +++ b/mpi/hppa/mpih-add1.S @@ -0,0 +1,70 @@ +/* hppa add_n -- Add two limb vectors of the same length > 0 and store + * sum in a third limb vector. + * + * Copyright (C) 1992, 1994 Free Software Foundation, Inc. + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + + +/******************* + * mpi_limb_t + * mpihelp_add_n( mpi_ptr_t res_ptr, (gr26) + * mpi_ptr_t s1_ptr, (gr25) + * mpi_ptr_t s2_ptr, (gr24) + * mpi_size_t size) (gr23) + * + * One might want to unroll this as for other processors, but it turns + * out that the data cache contention after a store makes such + * unrolling useless. We can't come under 5 cycles/limb anyway. + */ + + .code + .export __mpihelp_add_n +__mpihelp_add_n + .proc + .callinfo frame=0,no_calls + .entry + + ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + + addib,= -1,%r23,L$end ; check for (SIZE == 1) + add %r20,%r19,%r28 ; add first limbs ignoring cy + +L$loop ldws,ma 4(0,%r25),%r20 + ldws,ma 4(0,%r24),%r19 + stws,ma %r28,4(0,%r26) + addib,<> -1,%r23,L$loop + addc %r20,%r19,%r28 + +L$end stws %r28,0(0,%r26) + bv 0(%r2) + addc %r0,%r0,%r28 + + .exit + .procend diff --git a/mpi/hppa/udiv-qrnnd.S b/mpi/hppa/udiv-qrnnd.S new file mode 100644 index 000000000..849238349 --- /dev/null +++ b/mpi/hppa/udiv-qrnnd.S @@ -0,0 +1,297 @@ +/* HP-PA __udiv_qrnnd division support, used from longlong.h. + * This version runs fast on pre-PA7000 CPUs. + * + * Copyright (C) 1993, 1994 Free Software Foundation, Inc. + * Copyright (c) 1997 by Werner Koch (dd9jn) + * + * This file is part of G10. + * + * G10 is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * G10 is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Note: This code is heavily based on the GNU MP Library. + * Actually it's the same code with only minor changes in the + * way the data is stored; this is to support the abstraction + * of an optional secure memory allocation which may be used + * to avoid revealing of sensitive data due to paging etc. + * The GNU MP Library itself is published under the LGPL; + * however I decided to publish this code under the plain GPL. + */ + + + +/* INPUT PARAMETERS + * rem_ptr gr26 + * n1 gr25 + * n0 gr24 + * d gr23 + * + * The code size is a bit excessive. We could merge the last two ds;addc + * sequences by simply moving the "bb,< Odd" instruction down. The only + * trouble is the FFFFFFFF code that would need some hacking. + */ + + .code + .export __udiv_qrnnd +__udiv_qrnnd + .proc + .callinfo frame=0,no_calls + .entry + + comb,< %r23,0,L$largedivisor + sub %r0,%r23,%r1 ; clear cy as side-effect + ds %r0,%r1,%r0 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r24 + ds %r25,%r23,%r25 + addc %r24,%r24,%r28 + ds %r25,%r23,%r25 + comclr,>= %r25,%r0,%r0 + addl %r25,%r23,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r28,%r28,%r28 + +L$largedivisor + extru %r24,31,1,%r19 ; r19 = n0 & 1 + bb,< %r23,31,L$odd + extru %r23,30,31,%r22 ; r22 = d >> 1 + shd %r25,%r24,1,%r24 ; r24 = new n0 + extru %r25,30,31,%r25 ; r25 = new n1 + sub %r0,%r22,%r21 + ds %r0,%r21,%r0 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + comclr,>= %r25,%r0,%r0 + addl %r25,%r22,%r25 + sh1addl %r25,%r19,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r24,%r24,%r28 + +L$odd addib,sv,n 1,%r22,L$FF.. ; r22 = (d / 2 + 1) + shd %r25,%r24,1,%r24 ; r24 = new n0 + extru %r25,30,31,%r25 ; r25 = new n1 + sub %r0,%r22,%r21 + ds %r0,%r21,%r0 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r24 + ds %r25,%r22,%r25 + addc %r24,%r24,%r28 + comclr,>= %r25,%r0,%r0 + addl %r25,%r22,%r25 + sh1addl %r25,%r19,%r25 +; We have computed (n1,,n0) / (d + 1), q' = r28, r' = r25 + add,nuv %r28,%r25,%r25 + addl %r25,%r1,%r25 + addc %r0,%r28,%r28 + sub,<< %r25,%r23,%r0 + addl %r25,%r1,%r25 + stws %r25,0(0,%r26) + bv 0(%r2) + addc %r0,%r28,%r28 + +; This is just a special case of the code above. +; We come here when d == 0xFFFFFFFF +L$FF.. add,uv %r25,%r24,%r24 + sub,<< %r24,%r23,%r0 + ldo 1(%r24),%r24 + stws %r24,0(0,%r26) + bv 0(%r2) + addc %r0,%r25,%r28 + + .exit + .procend diff --git a/mpi/i386/distfiles b/mpi/i386/distfiles new file mode 100644 index 000000000..34de91574 --- /dev/null +++ b/mpi/i386/distfiles @@ -0,0 +1,8 @@ +mpih-add1.S +mpih-mul1.S +mpih-mul2.S +mpih-mul3.S +mpih-shift.S +mpih-sub1.S +syntax.h + diff --git a/mpi/mpi-inline.h b/mpi/mpi-inline.h index 4d19942ad..03b5fbc86 100644 --- a/mpi/mpi-inline.h +++ b/mpi/mpi-inline.h @@ -123,5 +123,4 @@ mpihelp_sub( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size, } - #endif /*G10_MPI_INLINE_H*/ diff --git a/mpi/mpi-internal.h b/mpi/mpi-internal.h index 93ed688ae..f084c7e8a 100644 --- a/mpi/mpi-internal.h +++ b/mpi/mpi-internal.h @@ -32,6 +32,21 @@ #include "mpi.h" +/* If KARATSUBA_THRESHOLD is not already defined, define it to a + * value which is good on most machines. */ + +/* tested 4, 16, 32 and 64, where 16 gave the best performance when + * checking a 768 and a 1024 bit ElGamal signature. + * (wk 22.12.97) */ +#ifndef KARATSUBA_THRESHOLD + #define KARATSUBA_THRESHOLD 16 +#endif + +/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ +#if KARATSUBA_THRESHOLD < 2 + #undef KARATSUBA_THRESHOLD + #define KARATSUBA_THRESHOLD 2 +#endif typedef mpi_limb_t *mpi_ptr_t; /* pointer to a limb */ @@ -174,6 +189,9 @@ void mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size); mpi_limb_t mpihelp_mul( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, mpi_ptr_t vp, mpi_size_t vsize); +void mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size ); +void mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, + mpi_ptr_t tspace); /*-- mpihelp-mul_1.c (or xxx/cpu/*.S) --*/ mpi_limb_t mpihelp_mul_1( mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, diff --git a/mpi/mpi-pow.c b/mpi/mpi-pow.c index 43514567b..fcf500c36 100644 --- a/mpi/mpi-pow.c +++ b/mpi/mpi-pow.c @@ -51,6 +51,8 @@ mpi_powm( MPI res, MPI base, MPI exp, MPI mod) mpi_ptr_t mp_marker=NULL, bp_marker=NULL, ep_marker=NULL; mpi_ptr_t xp_marker=NULL; int assign_rp=0; + mpi_ptr_t tspace = NULL; + mpi_size_t tsize; esize = exp->nlimbs; msize = mod->nlimbs; @@ -179,7 +181,23 @@ mpi_powm( MPI res, MPI base, MPI exp, MPI mod) mpi_ptr_t tp; mpi_size_t xsize; - mpihelp_mul_n(xp, rp, rp, rsize); + /*mpihelp_mul_n(xp, rp, rp, rsize);*/ + if( rsize < KARATSUBA_THRESHOLD ) + mpih_sqr_n_basecase( xp, rp, rsize ); + else { + if( !tspace ) { + tsize = 2 * rsize; + tspace = mpi_alloc_limb_space( tsize, 0 ); + } + else if( tsize < (2*rsize) ) { + mpi_free_limb_space( tspace ); + tsize = 2 * rsize; + tspace = mpi_alloc_limb_space( tsize, 0 ); + + } + mpih_sqr_n( xp, rp, rsize, tspace ); + } + xsize = 2 * rsize; if( xsize > msize ) { mpihelp_divrem(xp + msize, 0, xp, xsize, mp, msize); @@ -258,5 +276,6 @@ mpi_powm( MPI res, MPI base, MPI exp, MPI mod) if( bp_marker ) mpi_free_limb_space( bp_marker ); if( ep_marker ) mpi_free_limb_space( ep_marker ); if( xp_marker ) mpi_free_limb_space( xp_marker ); + if( tspace ) mpi_free_limb_space( tspace ); } diff --git a/mpi/mpih-mul.c b/mpi/mpih-mul.c index 0e52488ec..3b6b732b6 100644 --- a/mpi/mpih-mul.c +++ b/mpi/mpih-mul.c @@ -33,17 +33,6 @@ #include "mpi-internal.h" #include "longlong.h" -/* If KARATSUBA_THRESHOLD is not already defined, define it to a - * value which is good on most machines. */ -#ifndef KARATSUBA_THRESHOLD - #define KARATSUBA_THRESHOLD 32 -#endif - -/* The code can't handle KARATSUBA_THRESHOLD smaller than 2. */ -#if KARATSUBA_THRESHOLD < 2 - #undef KARATSUBA_THRESHOLD - #define KARATSUBA_THRESHOLD 2 -#endif #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ @@ -57,9 +46,9 @@ #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ do { \ if ((size) < KARATSUBA_THRESHOLD) \ - sqr_n_basecase (prodp, up, size); \ + mpih_sqr_n_basecase (prodp, up, size); \ else \ - sqr_n (prodp, up, size, tspace); \ + mpih_sqr_n (prodp, up, size, tspace); \ } while (0); @@ -235,8 +224,8 @@ mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, } -static void -sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size ) +void +mpih_sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size ) { mpi_size_t i; mpi_limb_t cy_limb; @@ -276,8 +265,8 @@ sqr_n_basecase( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size ) } -static void -sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) +void +mpih_sqr_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) { if( size & 1 ) { /* The size is odd, the code code below doesn't handle that. @@ -361,11 +350,11 @@ mpihelp_mul_n( mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) if( up == vp ) { if( size < KARATSUBA_THRESHOLD ) - sqr_n_basecase( prodp, up, size ); + mpih_sqr_n_basecase( prodp, up, size ); else { mpi_ptr_t tspace; tspace = mpi_alloc_limb_space( 2 * size, 0 ); - sqr_n( prodp, up, size, tspace ); + mpih_sqr_n( prodp, up, size, tspace ); mpi_free_limb_space( tspace ); } } |