diff options
Diffstat (limited to 'crypto/bn/bn_exp2.c')
-rw-r--r-- | crypto/bn/bn_exp2.c | 202 |
1 files changed, 202 insertions, 0 deletions
diff --git a/crypto/bn/bn_exp2.c b/crypto/bn/bn_exp2.c new file mode 100644 index 0000000000..eface739b3 --- /dev/null +++ b/crypto/bn/bn_exp2.c @@ -0,0 +1,202 @@ +#include <stdio.h> +#include "cryptlib.h" +#include "bn_lcl.h" + +/* I've done some timing with different table sizes. + * The main hassle is that even with bits set at 3, this requires + * 63 BIGNUMs to store the pre-calculated values. + * 512 1024 + * bits=1 75.4% 79.4% + * bits=2 61.2% 62.4% + * bits=3 61.3% 59.3% + * The lack of speed improvment is also a function of the pre-calculation + * which could be removed. + */ +#define EXP2_TABLE_BITS 2 /* 1 2 3 4 5 */ +#define EXP2_TABLE_SIZE 4 /* 2 4 8 16 32 */ + +int BN_mod_exp2_mont(rr,a1,p1,a2,p2,m,ctx,in_mont) +BIGNUM *rr; +BIGNUM *a1; +BIGNUM *p1; +BIGNUM *a2; +BIGNUM *p2; +BIGNUM *m; +BN_CTX *ctx; +BN_MONT_CTX *in_mont; + { + int i,j,k,bits,bits1,bits2,ret=0,wstart,wend,window,xvalue,yvalue; + int start=1,ts=0,x,y; + BIGNUM *d,*aa1,*aa2,*r; + BIGNUM val[EXP2_TABLE_SIZE][EXP2_TABLE_SIZE]; + BN_MONT_CTX *mont=NULL; + + bn_check_top(a1); + bn_check_top(p1); + bn_check_top(a2); + bn_check_top(p2); + bn_check_top(m); + + if (!(m->d[0] & 1)) + { + BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS); + return(0); + } + d= &(ctx->bn[ctx->tos++]); + r= &(ctx->bn[ctx->tos++]); + bits1=BN_num_bits(p1); + bits2=BN_num_bits(p2); + if ((bits1 == 0) && (bits2 == 0)) + { + BN_one(r); + return(1); + } + bits=(bits1 > bits2)?bits1:bits2; + + /* If this is not done, things will break in the montgomery + * part */ + + if (in_mont != NULL) + mont=in_mont; + else + { + if ((mont=BN_MONT_CTX_new()) == NULL) goto err; + if (!BN_MONT_CTX_set(mont,m,ctx)) goto err; + } + + BN_init(&(val[0][0])); + BN_init(&(val[1][1])); + BN_init(&(val[0][1])); + BN_init(&(val[1][0])); + ts=1; + if (BN_ucmp(a1,m) >= 0) + { + BN_mod(&(val[1][0]),a1,m,ctx); + aa1= &(val[1][0]); + } + else + aa1=a1; + if (BN_ucmp(a2,m) >= 0) + { + BN_mod(&(val[0][1]),a2,m,ctx); + aa2= &(val[0][1]); + } + else + aa2=a2; + if (!BN_to_montgomery(&(val[1][0]),aa1,mont,ctx)) goto err; + if (!BN_to_montgomery(&(val[0][1]),aa2,mont,ctx)) goto err; + if (!BN_mod_mul_montgomery(&(val[1][1]), + &(val[1][0]),&(val[0][1]),mont,ctx)) + goto err; + +#if 0 + if (bits <= 20) /* This is probably 3 or 0x10001, so just do singles */ + window=1; + else if (bits > 250) + window=5; /* max size of window */ + else if (bits >= 120) + window=4; + else + window=3; +#else + window=EXP2_TABLE_BITS; +#endif + + k=1<<window; + for (x=0; x<k; x++) + { + if (x >= 2) + { + BN_init(&(val[x][0])); + BN_init(&(val[x][1])); + if (!BN_mod_mul_montgomery(&(val[x][0]), + &(val[1][0]),&(val[x-1][0]),mont,ctx)) goto err; + if (!BN_mod_mul_montgomery(&(val[x][1]), + &(val[1][0]),&(val[x-1][1]),mont,ctx)) goto err; + } + for (y=2; y<k; y++) + { + BN_init(&(val[x][y])); + if (!BN_mod_mul_montgomery(&(val[x][y]), + &(val[x][y-1]),&(val[0][1]),mont,ctx)) + goto err; + } + } + ts=k; + + start=1; /* This is used to avoid multiplication etc + * when there is only the value '1' in the + * buffer. */ + xvalue=0; /* The 'x value' of the window */ + yvalue=0; /* The 'y value' of the window */ + wstart=bits-1; /* The top bit of the window */ + wend=0; /* The bottom bit of the window */ + + if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err; + for (;;) + { + xvalue=BN_is_bit_set(p1,wstart); + yvalue=BN_is_bit_set(p2,wstart); + if (!(xvalue || yvalue)) + { + if (!start) + { + if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) + goto err; + } + wstart--; + if (wstart < 0) break; + continue; + } + /* We now have wstart on a 'set' bit, we now need to work out + * how bit a window to do. To do this we need to scan + * forward until the last set bit before the end of the + * window */ + j=wstart; + /* xvalue=BN_is_bit_set(p1,wstart); already set */ + /* yvalue=BN_is_bit_set(p1,wstart); already set */ + wend=0; + for (i=1; i<window; i++) + { + if (wstart-i < 0) break; + xvalue+=xvalue; + xvalue|=BN_is_bit_set(p1,wstart-i); + yvalue+=yvalue; + yvalue|=BN_is_bit_set(p2,wstart-i); + } + + /* i is the size of the current window */ + /* add the 'bytes above' */ + if (!start) + for (j=0; j<i; j++) + { + if (!BN_mod_mul_montgomery(r,r,r,mont,ctx)) + goto err; + } + + /* wvalue will be an odd number < 2^window */ + if (xvalue || yvalue) + { + if (!BN_mod_mul_montgomery(r,r,&(val[xvalue][yvalue]), + mont,ctx)) goto err; + } + + /* move the 'window' down further */ + wstart-=i; + start=0; + if (wstart < 0) break; + } + BN_from_montgomery(rr,r,mont,ctx); + ret=1; +err: + if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont); + ctx->tos-=2; + for (i=0; i<ts; i++) + { + for (j=0; j<ts; j++) + { + BN_clear_free(&(val[i][j])); + } + } + return(ret); + } |