diff options
author | Andy Polyakov <appro@openssl.org> | 2005-10-14 17:22:27 +0200 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2005-10-14 17:22:27 +0200 |
commit | df94f187b9a302adeffcc14bdaeef55085e4beec (patch) | |
tree | 4a907ae6dab47dfb3e95eaa77d2549b91990b464 /crypto/bn/bn_asm.c | |
parent | Broaden compatibility amount Win32 headers even further [some don't have (diff) | |
download | openssl-df94f187b9a302adeffcc14bdaeef55085e4beec.tar.xz openssl-df94f187b9a302adeffcc14bdaeef55085e4beec.zip |
Fix bug in SMALL_FOOTPRINT path and clarify comment.
Diffstat (limited to 'crypto/bn/bn_asm.c')
-rw-r--r-- | crypto/bn/bn_asm.c | 12 |
1 files changed, 8 insertions, 4 deletions
diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c index 9b8e380c4f..cd50b182b7 100644 --- a/crypto/bn/bn_asm.c +++ b/crypto/bn/bn_asm.c @@ -835,8 +835,12 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) * observed to give 40% faster rsa1024 private key operations and 10% * faster rsa4096 ones, while on AMD64 it improves rsa1024 sign only * by 10% and *worsens* rsa4096 sign by 15%. Once again, it's a - * reference implementation, one to be used as start-point for - * platform-specific assembler. + * reference implementation, one to be used as starting point for + * platform-specific assembler. Mentioned numbers apply to compiler + * generated code compiled with and without -DOPENSSL_BN_ASM_MONT and + * can vary not only from platform to platform, but even for compiler + * versions. Assembler vs. assembler improvement coefficients can + * [and are known to] differ and are to be documented elsewhere. */ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) { @@ -963,12 +967,12 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U for(i=0;i<num;i++) { c0 = bn_mul_add_words(tp,ap,num,bp[i]); - c1 = tp[num] + c0; + c1 = (tp[num] + c0)&BN_MASK2; tp[num] = c1; tp[num+1] = (c1<c0?1:0); c0 = bn_mul_add_words(tp,np,num,tp[0]*n0); - c1 = tp[num] + c0; + c1 = (tp[num] + c0)&BN_MASK2; tp[num] = c1; tp[num+1] += (c1<c0?1:0); for(j=0;j<=num;j++) tp[j]=tp[j+1]; |