diff options
author | Andy Polyakov <appro@openssl.org> | 2006-10-11 13:55:11 +0200 |
---|---|---|
committer | Andy Polyakov <appro@openssl.org> | 2006-10-11 13:55:11 +0200 |
commit | c69ed6ea396b184fd56327b475a441081fcade8e (patch) | |
tree | 934b6a730dd5eebcf82ec3b1015f6c2ee96ada1f /crypto | |
parent | Typo. (diff) | |
download | openssl-c69ed6ea396b184fd56327b475a441081fcade8e.tar.xz openssl-c69ed6ea396b184fd56327b475a441081fcade8e.zip |
Re-implement md32_common.h [make it simpler!] and eliminate code rendered
redundant as result.
Diffstat (limited to 'crypto')
-rw-r--r-- | crypto/md32_common.h | 343 | ||||
-rw-r--r-- | crypto/md4/md4_dgst.c | 93 | ||||
-rw-r--r-- | crypto/md4/md4_locl.h | 45 | ||||
-rw-r--r-- | crypto/md5/asm/md5-586.pl | 2 | ||||
-rwxr-xr-x | crypto/md5/asm/md5-x86_64.pl | 8 | ||||
-rw-r--r-- | crypto/md5/md5_dgst.c | 110 | ||||
-rw-r--r-- | crypto/md5/md5_locl.h | 51 | ||||
-rw-r--r-- | crypto/ripemd/asm/rmd-586.pl | 4 | ||||
-rw-r--r-- | crypto/ripemd/rmd_dgst.c | 205 | ||||
-rw-r--r-- | crypto/ripemd/rmd_locl.h | 15 | ||||
-rw-r--r-- | crypto/sha/sha256.c | 61 | ||||
-rw-r--r-- | crypto/sha/sha_locl.h | 309 |
12 files changed, 156 insertions, 1090 deletions
diff --git a/crypto/md32_common.h b/crypto/md32_common.h index 3ed16f380f..ce956df382 100644 --- a/crypto/md32_common.h +++ b/crypto/md32_common.h @@ -1,6 +1,6 @@ /* crypto/md32_common.h */ /* ==================================================================== - * Copyright (c) 1999-2002 The OpenSSL Project. All rights reserved. + * Copyright (c) 1999-2006 The OpenSSL Project. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -47,10 +47,6 @@ * OF THE POSSIBILITY OF SUCH DAMAGE. * ==================================================================== * - * This product includes cryptographic software written by Eric Young - * (eay@cryptsoft.com). This product includes software written by Tim - * Hudson (tjh@cryptsoft.com). - * */ /* @@ -76,40 +72,27 @@ * typedef struct { * ... * HASH_LONG Nl,Nh; + * either { * HASH_LONG data[HASH_LBLOCK]; + * unsigned char data[HASH_CBLOCK]; + * }; * unsigned int num; * ... * } HASH_CTX; + * data[] vector is expected to be zeroed upon first call to + * HASH_UPDATE. * HASH_UPDATE * name of "Update" function, implemented here. * HASH_TRANSFORM * name of "Transform" function, implemented here. * HASH_FINAL * name of "Final" function, implemented here. - * HASH_BLOCK_HOST_ORDER - * name of "block" function treating *aligned* input message - * in host byte order, implemented externally. * HASH_BLOCK_DATA_ORDER - * name of "block" function treating *unaligned* input message - * in original (data) byte order, implemented externally (it - * actually is optional if data and host are of the same - * "endianess"). + * name of "block" function capable of treating *unaligned* input + * message in original (data) byte order, implemented externally. * HASH_MAKE_STRING * macro convering context variables to an ASCII hash string. * - * Optional macros: - * - * B_ENDIAN or L_ENDIAN - * defines host byte-order. - * HASH_LONG_LOG2 - * defaults to 2 if not states otherwise. - * HASH_LBLOCK - * assumed to be HASH_CBLOCK/4 if not stated otherwise. - * HASH_BLOCK_DATA_ORDER_ALIGNED - * alternative "block" function capable of treating - * aligned input message in original (data) order, - * implemented externally. - * * MD5 example: * * #define DATA_ORDER_IS_LITTLE_ENDIAN @@ -118,11 +101,9 @@ * #define HASH_LONG_LOG2 MD5_LONG_LOG2 * #define HASH_CTX MD5_CTX * #define HASH_CBLOCK MD5_CBLOCK - * #define HASH_LBLOCK MD5_LBLOCK * #define HASH_UPDATE MD5_Update * #define HASH_TRANSFORM MD5_Transform * #define HASH_FINAL MD5_Final - * #define HASH_BLOCK_HOST_ORDER md5_block_host_order * #define HASH_BLOCK_DATA_ORDER md5_block_data_order * * <appro@fy.chalmers.se> @@ -152,27 +133,9 @@ #error "HASH_FINAL must be defined!" #endif -#ifndef HASH_BLOCK_HOST_ORDER -#error "HASH_BLOCK_HOST_ORDER must be defined!" -#endif - -#if 0 -/* - * Moved below as it's required only if HASH_BLOCK_DATA_ORDER_ALIGNED - * isn't defined. - */ #ifndef HASH_BLOCK_DATA_ORDER #error "HASH_BLOCK_DATA_ORDER must be defined!" #endif -#endif - -#ifndef HASH_LBLOCK -#define HASH_LBLOCK (HASH_CBLOCK/4) -#endif - -#ifndef HASH_LONG_LOG2 -#define HASH_LONG_LOG2 2 -#endif /* * Engage compiler specific rotate intrinsic function if available. @@ -219,70 +182,10 @@ # endif #endif /* PEDANTIC */ -#if HASH_LONG_LOG2==2 /* Engage only if sizeof(HASH_LONG)== 4 */ -/* A nice byte order reversal from Wei Dai <weidai@eskimo.com> */ -#ifdef ROTATE -/* 5 instructions with rotate instruction, else 9 */ -#define REVERSE_FETCH32(a,l) ( \ - l=*(const HASH_LONG *)(a), \ - ((ROTATE(l,8)&0x00FF00FF)|(ROTATE((l&0x00FF00FF),24))) \ - ) -#else -/* 6 instructions with rotate instruction, else 8 */ -#define REVERSE_FETCH32(a,l) ( \ - l=*(const HASH_LONG *)(a), \ - l=(((l>>8)&0x00FF00FF)|((l&0x00FF00FF)<<8)), \ - ROTATE(l,16) \ - ) -/* - * Originally the middle line started with l=(((l&0xFF00FF00)>>8)|... - * It's rewritten as above for two reasons: - * - RISCs aren't good at long constants and have to explicitely - * compose 'em with several (well, usually 2) instructions in a - * register before performing the actual operation and (as you - * already realized:-) having same constant should inspire the - * compiler to permanently allocate the only register for it; - * - most modern CPUs have two ALUs, but usually only one has - * circuitry for shifts:-( this minor tweak inspires compiler - * to schedule shift instructions in a better way... - * - * <appro@fy.chalmers.se> - */ -#endif -#endif - #ifndef ROTATE #define ROTATE(a,n) (((a)<<(n))|(((a)&0xffffffff)>>(32-(n)))) #endif -/* - * Make some obvious choices. E.g., HASH_BLOCK_DATA_ORDER_ALIGNED - * and HASH_BLOCK_HOST_ORDER ought to be the same if input data - * and host are of the same "endianess". It's possible to mask - * this with blank #define HASH_BLOCK_DATA_ORDER though... - * - * <appro@fy.chalmers.se> - */ -#if defined(B_ENDIAN) -# if defined(DATA_ORDER_IS_BIG_ENDIAN) -# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2 -# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER -# endif -# endif -#elif defined(L_ENDIAN) -# if defined(DATA_ORDER_IS_LITTLE_ENDIAN) -# if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) && HASH_LONG_LOG2==2 -# define HASH_BLOCK_DATA_ORDER_ALIGNED HASH_BLOCK_HOST_ORDER -# endif -# endif -#endif - -#if !defined(HASH_BLOCK_DATA_ORDER_ALIGNED) -#ifndef HASH_BLOCK_DATA_ORDER -#error "HASH_BLOCK_DATA_ORDER must be defined!" -#endif -#endif - #if defined(DATA_ORDER_IS_BIG_ENDIAN) #ifndef PEDANTIC @@ -314,29 +217,6 @@ l|=(((unsigned long)(*((c)++))) ), \ l) #endif -#define HOST_p_c2l(c,l,n) { \ - switch (n) { \ - case 0: l =((unsigned long)(*((c)++)))<<24; \ - case 1: l|=((unsigned long)(*((c)++)))<<16; \ - case 2: l|=((unsigned long)(*((c)++)))<< 8; \ - case 3: l|=((unsigned long)(*((c)++))); \ - } } -#define HOST_p_c2l_p(c,l,sc,len) { \ - switch (sc) { \ - case 0: l =((unsigned long)(*((c)++)))<<24; \ - if (--len == 0) break; \ - case 1: l|=((unsigned long)(*((c)++)))<<16; \ - if (--len == 0) break; \ - case 2: l|=((unsigned long)(*((c)++)))<< 8; \ - } } -/* NOTE the pointer is not incremented at the end of this */ -#define HOST_c2l_p(c,l,n) { \ - l=0; (c)+=n; \ - switch (n) { \ - case 3: l =((unsigned long)(*(--(c))))<< 8; \ - case 2: l|=((unsigned long)(*(--(c))))<<16; \ - case 1: l|=((unsigned long)(*(--(c))))<<24; \ - } } #ifndef HOST_l2c #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l)>>24)&0xff), \ *((c)++)=(unsigned char)(((l)>>16)&0xff), \ @@ -362,29 +242,6 @@ l|=(((unsigned long)(*((c)++)))<<24), \ l) #endif -#define HOST_p_c2l(c,l,n) { \ - switch (n) { \ - case 0: l =((unsigned long)(*((c)++))); \ - case 1: l|=((unsigned long)(*((c)++)))<< 8; \ - case 2: l|=((unsigned long)(*((c)++)))<<16; \ - case 3: l|=((unsigned long)(*((c)++)))<<24; \ - } } -#define HOST_p_c2l_p(c,l,sc,len) { \ - switch (sc) { \ - case 0: l =((unsigned long)(*((c)++))); \ - if (--len == 0) break; \ - case 1: l|=((unsigned long)(*((c)++)))<< 8; \ - if (--len == 0) break; \ - case 2: l|=((unsigned long)(*((c)++)))<<16; \ - } } -/* NOTE the pointer is not incremented at the end of this */ -#define HOST_c2l_p(c,l,n) { \ - l=0; (c)+=n; \ - switch (n) { \ - case 3: l =((unsigned long)(*(--(c))))<<16; \ - case 2: l|=((unsigned long)(*(--(c))))<< 8; \ - case 1: l|=((unsigned long)(*(--(c)))); \ - } } #ifndef HOST_l2c #define HOST_l2c(l,c) (*((c)++)=(unsigned char)(((l) )&0xff), \ *((c)++)=(unsigned char)(((l)>> 8)&0xff), \ @@ -402,9 +259,9 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) { const unsigned char *data=data_; - register HASH_LONG * p; - register HASH_LONG l; - size_t sw,sc,ew,ec; + unsigned char *p; + HASH_LONG l; + size_t n; if (len==0) return 1; @@ -416,101 +273,43 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) c->Nh+=(len>>29); /* might cause compiler warning on 16-bit */ c->Nl=l; - if (c->num != 0) + n = c->num; + if (n != 0) { - p=c->data; - sw=c->num>>2; - sc=c->num&0x03; + p=(unsigned char *)c->data; - if ((c->num+len) >= HASH_CBLOCK) + if ((n+len) >= HASH_CBLOCK) { - l=p[sw]; HOST_p_c2l(data,l,sc); p[sw++]=l; - for (; sw<HASH_LBLOCK; sw++) - { - HOST_c2l(data,l); p[sw]=l; - } - HASH_BLOCK_HOST_ORDER (c,p,1); - len-=(HASH_CBLOCK-c->num); - c->num=0; - /* drop through and do the rest */ + memcpy (p+n,data,HASH_CBLOCK-n); + HASH_BLOCK_DATA_ORDER (c,p,1); + n = HASH_CBLOCK-n; + data += n; + len -= n; + c->num = 0; + memset (p,0,HASH_CBLOCK); /* keep it zeroed */ } else { - c->num+=(unsigned int)len; - if ((sc+len) < 4) /* ugly, add char's to a word */ - { - l=p[sw]; HOST_p_c2l_p(data,l,sc,len); p[sw]=l; - } - else - { - ew=(c->num>>2); - ec=(c->num&0x03); - if (sc) - l=p[sw]; - HOST_p_c2l(data,l,sc); - p[sw++]=l; - for (; sw < ew; sw++) - { - HOST_c2l(data,l); p[sw]=l; - } - if (ec) - { - HOST_c2l_p(data,l,ec); p[sw]=l; - } - } + memcpy (p+n,data,len); + c->num += (unsigned int)len; return 1; } } - sw=len/HASH_CBLOCK; - if (sw > 0) + n = len/HASH_CBLOCK; + if (n > 0) { -#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) - /* - * Note that HASH_BLOCK_DATA_ORDER_ALIGNED gets defined - * only if sizeof(HASH_LONG)==4. - */ - if ((((size_t)data)%4) == 0) - { - /* data is properly aligned so that we can cast it: */ - HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,sw); - sw*=HASH_CBLOCK; - data+=sw; - len-=sw; - } - else -#if !defined(HASH_BLOCK_DATA_ORDER) - while (sw--) - { - memcpy (p=c->data,data,HASH_CBLOCK); - HASH_BLOCK_DATA_ORDER_ALIGNED(c,p,1); - data+=HASH_CBLOCK; - len-=HASH_CBLOCK; - } -#endif -#endif -#if defined(HASH_BLOCK_DATA_ORDER) - { - HASH_BLOCK_DATA_ORDER(c,data,sw); - sw*=HASH_CBLOCK; - data+=sw; - len-=sw; - } -#endif + HASH_BLOCK_DATA_ORDER (c,data,n); + n *= HASH_CBLOCK; + data += n; + len -= n; } - if (len!=0) + if (len != 0) { - p = c->data; + p = (unsigned char *)c->data; c->num = len; - ew=len>>2; /* words to copy */ - ec=len&0x03; - for (; ew; ew--,p++) - { - HOST_c2l(data,l); *p=l; - } - HOST_c2l_p(data,l,ec); - *p=l; + memcpy (p,data,len); } return 1; } @@ -518,73 +317,36 @@ int HASH_UPDATE (HASH_CTX *c, const void *data_, size_t len) void HASH_TRANSFORM (HASH_CTX *c, const unsigned char *data) { -#if defined(HASH_BLOCK_DATA_ORDER_ALIGNED) - if ((((size_t)data)%4) == 0) - /* data is properly aligned so that we can cast it: */ - HASH_BLOCK_DATA_ORDER_ALIGNED (c,(const HASH_LONG *)data,1); - else -#if !defined(HASH_BLOCK_DATA_ORDER) - { - memcpy (c->data,data,HASH_CBLOCK); - HASH_BLOCK_DATA_ORDER_ALIGNED (c,c->data,1); - } -#endif -#endif -#if defined(HASH_BLOCK_DATA_ORDER) HASH_BLOCK_DATA_ORDER (c,data,1); -#endif } int HASH_FINAL (unsigned char *md, HASH_CTX *c) { - register HASH_LONG *p; - register unsigned long l; - register int i,j; - static const unsigned char end[4]={0x80,0x00,0x00,0x00}; - const unsigned char *cp=end; - - /* c->num should definitly have room for at least one more byte. */ - p=c->data; - i=c->num>>2; - j=c->num&0x03; - -#if 0 - /* purify often complains about the following line as an - * Uninitialized Memory Read. While this can be true, the - * following p_c2l macro will reset l when that case is true. - * This is because j&0x03 contains the number of 'valid' bytes - * already in p[i]. If and only if j&0x03 == 0, the UMR will - * occur but this is also the only time p_c2l will do - * l= *(cp++) instead of l|= *(cp++) - * Many thanks to Alex Tang <altitude@cic.net> for pickup this - * 'potential bug' */ -#ifdef PURIFY - if (j==0) p[i]=0; /* Yeah, but that's not the way to fix it:-) */ -#endif - l=p[i]; -#else - l = (j==0) ? 0 : p[i]; -#endif - HOST_p_c2l(cp,l,j); p[i++]=l; /* i is the next 'undefined word' */ + unsigned char *p = (unsigned char *)c->data; + size_t n = c->num; - if (i>(HASH_LBLOCK-2)) /* save room for Nl and Nh */ + p[n] = 0x80; /* there is always room for one */ + n++; + + if (n > (HASH_CBLOCK-8)) { - if (i<HASH_LBLOCK) p[i]=0; - HASH_BLOCK_HOST_ORDER (c,p,1); - i=0; + HASH_BLOCK_DATA_ORDER (c,p,1); + memset (p,0,HASH_CBLOCK); } - for (; i<(HASH_LBLOCK-2); i++) - p[i]=0; + p += HASH_CBLOCK-8; #if defined(DATA_ORDER_IS_BIG_ENDIAN) - p[HASH_LBLOCK-2]=c->Nh; - p[HASH_LBLOCK-1]=c->Nl; + (void)HOST_l2c(c->Nh,p); + (void)HOST_l2c(c->Nl,p); #elif defined(DATA_ORDER_IS_LITTLE_ENDIAN) - p[HASH_LBLOCK-2]=c->Nl; - p[HASH_LBLOCK-1]=c->Nh; + (void)HOST_l2c(c->Nl,p); + (void)HOST_l2c(c->Nh,p); #endif - HASH_BLOCK_HOST_ORDER (c,p,1); + p -= HASH_CBLOCK; + HASH_BLOCK_DATA_ORDER (c,p,1); + c->num=0; + memset (p,0,HASH_CBLOCK); #ifndef HASH_MAKE_STRING #error "HASH_MAKE_STRING must be defined!" @@ -592,11 +354,6 @@ int HASH_FINAL (unsigned char *md, HASH_CTX *c) HASH_MAKE_STRING(c,md); #endif - c->num=0; - /* clear stuff, HASH_BLOCK may be leaving some stuff on the stack - * but I'm not worried :-) - OPENSSL_cleanse((void *)c,sizeof(HASH_CTX)); - */ return 1; } diff --git a/crypto/md4/md4_dgst.c b/crypto/md4/md4_dgst.c index d4c7057f13..e5b08f3fce 100644 --- a/crypto/md4/md4_dgst.c +++ b/crypto/md4/md4_dgst.c @@ -72,89 +72,14 @@ const char *MD4_version="MD4" OPENSSL_VERSION_PTEXT; int MD4_Init(MD4_CTX *c) { + memset (c,0,sizeof(*c)); c->A=INIT_DATA_A; c->B=INIT_DATA_B; c->C=INIT_DATA_C; c->D=INIT_DATA_D; - c->Nl=0; - c->Nh=0; - c->num=0; return 1; } -#ifndef md4_block_host_order -void md4_block_host_order (MD4_CTX *c, const void *data, size_t num) - { - const MD4_LONG *X=data; - register unsigned MD32_REG_T A,B,C,D; - - A=c->A; - B=c->B; - C=c->C; - D=c->D; - - for (;num--;X+=HASH_LBLOCK) - { - /* Round 0 */ - R0(A,B,C,D,X[ 0], 3,0); - R0(D,A,B,C,X[ 1], 7,0); - R0(C,D,A,B,X[ 2],11,0); - R0(B,C,D,A,X[ 3],19,0); - R0(A,B,C,D,X[ 4], 3,0); - R0(D,A,B,C,X[ 5], 7,0); - R0(C,D,A,B,X[ 6],11,0); - R0(B,C,D,A,X[ 7],19,0); - R0(A,B,C,D,X[ 8], 3,0); - R0(D,A,B,C,X[ 9], 7,0); - R0(C,D,A,B,X[10],11,0); - R0(B,C,D,A,X[11],19,0); - R0(A,B,C,D,X[12], 3,0); - R0(D,A,B,C,X[13], 7,0); - R0(C,D,A,B,X[14],11,0); - R0(B,C,D,A,X[15],19,0); - /* Round 1 */ - R1(A,B,C,D,X[ 0], 3,0x5A827999L); - R1(D,A,B,C,X[ 4], 5,0x5A827999L); - R1(C,D,A,B,X[ 8], 9,0x5A827999L); - R1(B,C,D,A,X[12],13,0x5A827999L); - R1(A,B,C,D,X[ 1], 3,0x5A827999L); - R1(D,A,B,C,X[ 5], 5,0x5A827999L); - R1(C,D,A,B,X[ 9], 9,0x5A827999L); - R1(B,C,D,A,X[13],13,0x5A827999L); - R1(A,B,C,D,X[ 2], 3,0x5A827999L); - R1(D,A,B,C,X[ 6], 5,0x5A827999L); - R1(C,D,A,B,X[10], 9,0x5A827999L); - R1(B,C,D,A,X[14],13,0x5A827999L); - R1(A,B,C,D,X[ 3], 3,0x5A827999L); - R1(D,A,B,C,X[ 7], 5,0x5A827999L); - R1(C,D,A,B,X[11], 9,0x5A827999L); - R1(B,C,D,A,X[15],13,0x5A827999L); - /* Round 2 */ - R2(A,B,C,D,X[ 0], 3,0x6ED9EBA1); - R2(D,A,B,C,X[ 8], 9,0x6ED9EBA1); - R2(C,D,A,B,X[ 4],11,0x6ED9EBA1); - R2(B,C,D,A,X[12],15,0x6ED9EBA1); - R2(A,B,C,D,X[ 2], 3,0x6ED9EBA1); - R2(D,A,B,C,X[10], 9,0x6ED9EBA1); - R2(C,D,A,B,X[ 6],11,0x6ED9EBA1); - R2(B,C,D,A,X[14],15,0x6ED9EBA1); - R2(A,B,C,D,X[ 1], 3,0x6ED9EBA1); - R2(D,A,B,C,X[ 9], 9,0x6ED9EBA1); - R2(C,D,A,B,X[ 5],11,0x6ED9EBA1); - R2(B,C,D,A,X[13],15,0x6ED9EBA1); - R2(A,B,C,D,X[ 3], 3,0x6ED9EBA1); - R2(D,A,B,C,X[11], 9,0x6ED9EBA1); - R2(C,D,A,B,X[ 7],11,0x6ED9EBA1); - R2(B,C,D,A,X[15],15,0x6ED9EBA1); - - A = c->A += A; - B = c->B += B; - C = c->C += C; - D = c->D += D; - } - } -#endif - #ifndef md4_block_data_order #ifdef X #undef X @@ -240,19 +165,3 @@ void md4_block_data_order (MD4_CTX *c, const void *data_, size_t num) } } #endif - -#ifdef undef -int printit(unsigned long *l) - { - int i,ii; - - for (i=0; i<2; i++) - { - for (ii=0; ii<8; ii++) - { - fprintf(stderr,"%08lx ",l[i*8+ii]); - } - fprintf(stderr,"\n"); - } - } -#endif diff --git a/crypto/md4/md4_locl.h b/crypto/md4/md4_locl.h index 625b587fde..c8085b0ead 100644 --- a/crypto/md4/md4_locl.h +++ b/crypto/md4/md4_locl.h @@ -65,44 +65,13 @@ #define MD4_LONG_LOG2 2 /* default to 32 bits */ #endif -void md4_block_host_order (MD4_CTX *c, const void *p,size_t num); void md4_block_data_order (MD4_CTX *c, const void *p,size_t num); -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \ - defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) -# if !defined(B_ENDIAN) -/* - * *_block_host_order is expected to handle aligned data while - * *_block_data_order - unaligned. As algorithm and host (x86) - * are in this case of the same "endianness" these two are - * otherwise indistinguishable. But normally you don't want to - * call the same function because unaligned access in places - * where alignment is expected is usually a "Bad Thing". Indeed, - * on RISCs you get punished with BUS ERROR signal or *severe* - * performance degradation. Intel CPUs are in turn perfectly - * capable of loading unaligned data without such drastic side - * effect. Yes, they say it's slower than aligned load, but no - * exception is generated and therefore performance degradation - * is *incomparable* with RISCs. What we should weight here is - * costs of unaligned access against costs of aligning data. - * According to my measurements allowing unaligned access results - * in ~9% performance improvement on Pentium II operating at - * 266MHz. I won't be surprised if the difference will be higher - * on faster systems:-) - * - * <appro@fy.chalmers.se> - */ -# define md4_block_data_order md4_block_host_order -# endif -#endif - #define DATA_ORDER_IS_LITTLE_ENDIAN #define HASH_LONG MD4_LONG -#define HASH_LONG_LOG2 MD4_LONG_LOG2 #define HASH_CTX MD4_CTX #define HASH_CBLOCK MD4_CBLOCK -#define HASH_LBLOCK MD4_LBLOCK #define HASH_UPDATE MD4_Update #define HASH_TRANSFORM MD4_Transform #define HASH_FINAL MD4_Final @@ -113,21 +82,7 @@ void md4_block_data_order (MD4_CTX *c, const void *p,size_t num); ll=(c)->C; HOST_l2c(ll,(s)); \ ll=(c)->D; HOST_l2c(ll,(s)); \ } while (0) -#define HASH_BLOCK_HOST_ORDER md4_block_host_order -#if !defined(L_ENDIAN) || defined(md4_block_data_order) #define HASH_BLOCK_DATA_ORDER md4_block_data_order -/* - * Little-endians (Intel and Alpha) feel better without this. - * It looks like memcpy does better job than generic - * md4_block_data_order on copying-n-aligning input data. - * But frankly speaking I didn't expect such result on Alpha. - * On the other hand I've got this with egcs-1.0.2 and if - * program is compiled with another (better?) compiler it - * might turn out other way around. - * - * <appro@fy.chalmers.se> - */ -#endif #include "md32_common.h" diff --git a/crypto/md5/asm/md5-586.pl b/crypto/md5/asm/md5-586.pl index fa3fa3bed5..76ac235f7d 100644 --- a/crypto/md5/asm/md5-586.pl +++ b/crypto/md5/asm/md5-586.pl @@ -29,7 +29,7 @@ $X="esi"; 0, 7, 14, 5, 12, 3, 10, 1, 8, 15, 6, 13, 4, 11, 2, 9, # R3 ); -&md5_block("md5_block_asm_host_order"); +&md5_block("md5_block_asm_data_order"); &asm_finish(); sub Np diff --git a/crypto/md5/asm/md5-x86_64.pl b/crypto/md5/asm/md5-x86_64.pl index 75b4085a7b..4330eddb17 100755 --- a/crypto/md5/asm/md5-x86_64.pl +++ b/crypto/md5/asm/md5-x86_64.pl @@ -115,9 +115,9 @@ $code .= <<EOF; .text .align 16 -.globl md5_block_asm_host_order -.type md5_block_asm_host_order,\@function,3 -md5_block_asm_host_order: +.globl md5_block_asm_data_order +.type md5_block_asm_data_order,\@function,3 +md5_block_asm_data_order: push %rbp push %rbx push %r12 @@ -243,7 +243,7 @@ $code .= <<EOF; pop %rbx pop %rbp ret -.size md5_block_asm_host_order,.-md5_block_asm_host_order +.size md5_block_asm_data_order,.-md5_block_asm_data_order EOF print $code; diff --git a/crypto/md5/md5_dgst.c b/crypto/md5/md5_dgst.c index f97f48e55b..2708e167f7 100644 --- a/crypto/md5/md5_dgst.c +++ b/crypto/md5/md5_dgst.c @@ -72,106 +72,14 @@ const char *MD5_version="MD5" OPENSSL_VERSION_PTEXT; int MD5_Init(MD5_CTX *c) { + memset (c,0,sizeof(*c)); c->A=INIT_DATA_A; c->B=INIT_DATA_B; c->C=INIT_DATA_C; c->D=INIT_DATA_D; - c->Nl=0; - c->Nh=0; - c->num=0; return 1; } -#ifndef md5_block_host_order -void md5_block_host_order (MD5_CTX *c, const void *data, size_t num) - { - const MD5_LONG *X=data; - register unsigned MD32_REG_T A,B,C,D; - - A=c->A; - B=c->B; - C=c->C; - D=c->D; - - for (;num--;X+=HASH_LBLOCK) - { - /* Round 0 */ - R0(A,B,C,D,X[ 0], 7,0xd76aa478L); - R0(D,A,B,C,X[ 1],12,0xe8c7b756L); - R0(C,D,A,B,X[ 2],17,0x242070dbL); - R0(B,C,D,A,X[ 3],22,0xc1bdceeeL); - R0(A,B,C,D,X[ 4], 7,0xf57c0fafL); - R0(D,A,B,C,X[ 5],12,0x4787c62aL); - R0(C,D,A,B,X[ 6],17,0xa8304613L); - R0(B,C,D,A,X[ 7],22,0xfd469501L); - R0(A,B,C,D,X[ 8], 7,0x698098d8L); - R0(D,A,B,C,X[ 9],12,0x8b44f7afL); - R0(C,D,A,B,X[10],17,0xffff5bb1L); - R0(B,C,D,A,X[11],22,0x895cd7beL); - R0(A,B,C,D,X[12], 7,0x6b901122L); - R0(D,A,B,C,X[13],12,0xfd987193L); - R0(C,D,A,B,X[14],17,0xa679438eL); - R0(B,C,D,A,X[15],22,0x49b40821L); - /* Round 1 */ - R1(A,B,C,D,X[ 1], 5,0xf61e2562L); - R1(D,A,B,C,X[ 6], 9,0xc040b340L); - R1(C,D,A,B,X[11],14,0x265e5a51L); - R1(B,C,D,A,X[ 0],20,0xe9b6c7aaL); - R1(A,B,C,D,X[ 5], 5,0xd62f105dL); - R1(D,A,B,C,X[10], 9,0x02441453L); - R1(C,D,A,B,X[15],14,0xd8a1e681L); - R1(B,C,D,A,X[ 4],20,0xe7d3fbc8L); - R1(A,B,C,D,X[ 9], 5,0x21e1cde6L); - R1(D,A,B,C,X[14], 9,0xc33707d6L); - R1(C,D,A,B,X[ 3],14,0xf4d50d87L); - R1(B,C,D,A,X[ 8],20,0x455a14edL); - R1(A,B,C,D,X[13], 5,0xa9e3e905L); - R1(D,A,B,C,X[ 2], 9,0xfcefa3f8L); - R1(C,D,A,B,X[ 7],14,0x676f02d9L); - R1(B,C,D,A,X[12],20,0x8d2a4c8aL); - /* Round 2 */ - R2(A,B,C,D,X[ 5], 4,0xfffa3942L); - R2(D,A,B,C,X[ 8],11,0x8771f681L); - R2(C,D,A,B,X[11],16,0x6d9d6122L); - R2(B,C,D,A,X[14],23,0xfde5380cL); - R2(A,B,C,D,X[ 1], 4,0xa4beea44L); - R2(D,A,B,C,X[ 4],11,0x4bdecfa9L); - R2(C,D,A,B,X[ 7],16,0xf6bb4b60L); - R2(B,C,D,A,X[10],23,0xbebfbc70L); - R2(A,B,C,D,X[13], 4,0x289b7ec6L); - R2(D,A,B,C,X[ 0],11,0xeaa127faL); - R2(C,D,A,B,X[ 3],16,0xd4ef3085L); - R2(B,C,D,A,X[ 6],23,0x04881d05L); - R2(A,B,C,D,X[ 9], 4,0xd9d4d039L); - R2(D,A,B,C,X[12],11,0xe6db99e5L); - R2(C,D,A,B,X[15],16,0x1fa27cf8L); - R2(B,C,D,A,X[ 2],23,0xc4ac5665L); - /* Round 3 */ - R3(A,B,C,D,X[ 0], 6,0xf4292244L); - R3(D,A,B,C,X[ 7],10,0x432aff97L); - R3(C,D,A,B,X[14],15,0xab9423a7L); - R3(B,C,D,A,X[ 5],21,0xfc93a039L); - R3(A,B,C,D,X[12], 6,0x655b59c3L); - R3(D,A,B,C,X[ 3],10,0x8f0ccc92L); - R3(C,D,A,B,X[10],15,0xffeff47dL); - R3(B,C,D,A,X[ 1],21,0x85845dd1L); - R3(A,B,C,D,X[ 8], 6,0x6fa87e4fL); - R3(D,A,B,C,X[15],10,0xfe2ce6e0L); - R3(C,D,A,B,X[ 6],15,0xa3014314L); - R3(B,C,D,A,X[13],21,0x4e0811a1L); - R3(A,B,C,D,X[ 4], 6,0xf7537e82L); - R3(D,A,B,C,X[11],10,0xbd3af235L); - R3(C,D,A,B,X[ 2],15,0x2ad7d2bbL); - R3(B,C,D,A,X[ 9],21,0xeb86d391L); - - A = c->A += A; - B = c->B += B; - C = c->C += C; - D = c->D += D; - } - } -#endif - #ifndef md5_block_data_order #ifdef X #undef X @@ -274,19 +182,3 @@ void md5_block_data_order (MD5_CTX *c, const void *data_, size_t num) } } #endif - -#ifdef undef -int printit(unsigned long *l) - { - int i,ii; - - for (i=0; i<2; i++) - { - for (ii=0; ii<8; ii++) - { - fprintf(stderr,"%08lx ",l[i*8+ii]); - } - fprintf(stderr,"\n"); - } - } -#endif diff --git a/crypto/md5/md5_locl.h b/crypto/md5/md5_locl.h index d1375dd9f4..968d577995 100644 --- a/crypto/md5/md5_locl.h +++ b/crypto/md5/md5_locl.h @@ -68,54 +68,19 @@ #ifdef MD5_ASM # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \ defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) -# define md5_block_host_order md5_block_asm_host_order -# elif defined(__sparc) && defined(OPENSSL_SYS_ULTRASPARC) - void md5_block_asm_data_order_aligned (MD5_CTX *c, const MD5_LONG *p,size_t num); -# define HASH_BLOCK_DATA_ORDER_ALIGNED md5_block_asm_data_order_aligned +# define md5_block_data_order md5_block_asm_data_order # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) -# define md5_block_host_order md5_block_asm_host_order # define md5_block_data_order md5_block_asm_data_order # endif #endif -void md5_block_host_order (MD5_CTX *c, const void *p,size_t num); void md5_block_data_order (MD5_CTX *c, const void *p,size_t num); -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \ - defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) -# if !defined(B_ENDIAN) -/* - * *_block_host_order is expected to handle aligned data while - * *_block_data_order - unaligned. As algorithm and host (x86) - * are in this case of the same "endianness" these two are - * otherwise indistinguishable. But normally you don't want to - * call the same function because unaligned access in places - * where alignment is expected is usually a "Bad Thing". Indeed, - * on RISCs you get punished with BUS ERROR signal or *severe* - * performance degradation. Intel CPUs are in turn perfectly - * capable of loading unaligned data without such drastic side - * effect. Yes, they say it's slower than aligned load, but no - * exception is generated and therefore performance degradation - * is *incomparable* with RISCs. What we should weight here is - * costs of unaligned access against costs of aligning data. - * According to my measurements allowing unaligned access results - * in ~9% performance improvement on Pentium II operating at - * 266MHz. I won't be surprised if the difference will be higher - * on faster systems:-) - * - * <appro@fy.chalmers.se> - */ -# define md5_block_data_order md5_block_host_order -# endif -#endif - #define DATA_ORDER_IS_LITTLE_ENDIAN #define HASH_LONG MD5_LONG -#define HASH_LONG_LOG2 MD5_LONG_LOG2 #define HASH_CTX MD5_CTX #define HASH_CBLOCK MD5_CBLOCK -#define HASH_LBLOCK MD5_LBLOCK #define HASH_UPDATE MD5_Update #define HASH_TRANSFORM MD5_Transform #define HASH_FINAL MD5_Final @@ -126,21 +91,7 @@ void md5_block_data_order (MD5_CTX *c, const void *p,size_t num); ll=(c)->C; HOST_l2c(ll,(s)); \ ll=(c)->D; HOST_l2c(ll,(s)); \ } while (0) -#define HASH_BLOCK_HOST_ORDER md5_block_host_order -#if !defined(L_ENDIAN) || defined(md5_block_data_order) #define HASH_BLOCK_DATA_ORDER md5_block_data_order -/* - * Little-endians (Intel and Alpha) feel better without this. - * It looks like memcpy does better job than generic - * md5_block_data_order on copying-n-aligning input data. - * But frankly speaking I didn't expect such result on Alpha. - * On the other hand I've got this with egcs-1.0.2 and if - * program is compiled with another (better?) compiler it - * might turn out other way around. - * - * <appro@fy.chalmers.se> - */ -#endif #include "md32_common.h" diff --git a/crypto/ripemd/asm/rmd-586.pl b/crypto/ripemd/asm/rmd-586.pl index 0ab6f76bff..4f3c4c967f 100644 --- a/crypto/ripemd/asm/rmd-586.pl +++ b/crypto/ripemd/asm/rmd-586.pl @@ -1,7 +1,7 @@ #!/usr/local/bin/perl # Normal is the -# ripemd160_block_asm_host_order(RIPEMD160_CTX *c, ULONG *X,int blocks); +# ripemd160_block_asm_data_order(RIPEMD160_CTX *c, ULONG *X,int blocks); $normal=0; @@ -56,7 +56,7 @@ $KR3=0x7A6D76E9; 8, 5,12, 9,12, 5,14, 6, 8,13, 6, 5,15,13,11,11, ); -&ripemd160_block("ripemd160_block_asm_host_order"); +&ripemd160_block("ripemd160_block_asm_data_order"); &asm_finish(); sub Xv diff --git a/crypto/ripemd/rmd_dgst.c b/crypto/ripemd/rmd_dgst.c index 03a286dfcc..b04d7bf548 100644 --- a/crypto/ripemd/rmd_dgst.c +++ b/crypto/ripemd/rmd_dgst.c @@ -71,218 +71,15 @@ const char *RMD160_version="RIPE-MD160" OPENSSL_VERSION_PTEXT; int RIPEMD160_Init(RIPEMD160_CTX *c) { + memset (c,0,sizeof(*c)); c->A=RIPEMD160_A; c->B=RIPEMD160_B; c->C=RIPEMD160_C; c->D=RIPEMD160_D; c->E=RIPEMD160_E; - c->Nl=0; - c->Nh=0; - c->num=0; return 1; } -#ifndef ripemd160_block_host_order -#ifdef X -#undef X -#endif -#define X(i) XX[i] -void ripemd160_block_host_order (RIPEMD160_CTX *ctx, const void *p, size_t num) - { - const RIPEMD160_LONG *XX=p; - register unsigned MD32_REG_T A,B,C,D,E; - register unsigned MD32_REG_T a,b,c,d,e; - - for (;num--;XX+=HASH_LBLOCK) - { - - A=ctx->A; B=ctx->B; C=ctx->C; D=ctx->D; E=ctx->E; - - RIP1(A,B,C,D,E,WL00,SL00); - RIP1(E,A,B,C,D,WL01,SL01); - RIP1(D,E,A,B,C,WL02,SL02); - RIP1(C,D,E,A,B,WL03,SL03); - RIP1(B,C,D,E,A,WL04,SL04); - RIP1(A,B,C,D,E,WL05,SL05); - RIP1(E,A,B,C,D,WL06,SL06); - RIP1(D,E,A,B,C,WL07,SL07); - RIP1(C,D,E,A,B,WL08,SL08); - RIP1(B,C,D,E,A,WL09,SL09); - RIP1(A,B,C,D,E,WL10,SL10); - RIP1(E,A,B,C,D,WL11,SL11); - RIP1(D,E,A,B,C,WL12,SL12); - RIP1(C,D,E,A,B,WL13,SL13); - RIP1(B,C,D,E,A,WL14,SL14); - RIP1(A,B,C,D,E,WL15,SL15); - - RIP2(E,A,B,C,D,WL16,SL16,KL1); - RIP2(D,E,A,B,C,WL17,SL17,KL1); - RIP2(C,D,E,A,B,WL18,SL18,KL1); - RIP2(B,C,D,E,A,WL19,SL19,KL1); - RIP2(A,B,C,D,E,WL20,SL20,KL1); - RIP2(E,A,B,C,D,WL21,SL21,KL1); - RIP2(D,E,A,B,C,WL22,SL22,KL1); - RIP2(C,D,E,A,B,WL23,SL23,KL1); - RIP2(B,C,D,E,A,WL24,SL24,KL1); - RIP2(A,B,C,D,E,WL25,SL25,KL1); - RIP2(E,A,B,C,D,WL26,SL26,KL1); - RIP2(D,E,A,B,C,WL27,SL27,KL1); - RIP2(C,D,E,A,B,WL28,SL28,KL1); - RIP2(B,C,D,E,A,WL29,SL29,KL1); - RIP2(A,B,C,D,E,WL30,SL30,KL1); - RIP2(E,A,B,C,D,WL31,SL31,KL1); - - RIP3(D,E,A,B,C,WL32,SL32,KL2); - RIP3(C,D,E,A,B,WL33,SL33,KL2); - RIP3(B,C,D,E,A,WL34,SL34,KL2); - RIP3(A,B,C,D,E,WL35,SL35,KL2); - RIP3(E,A,B,C,D,WL36,SL36,KL2); - RIP3(D,E,A,B,C,WL37,SL37,KL2); - RIP3(C,D,E,A,B,WL38,SL38,KL2); - RIP3(B,C,D,E,A,WL39,SL39,KL2); - RIP3(A,B,C,D,E,WL40,SL40,KL2); - RIP3(E,A,B,C,D,WL41,SL41,KL2); - RIP3(D,E,A,B,C,WL42,SL42,KL2); - RIP3(C,D,E,A,B,WL43,SL43,KL2); - RIP3(B,C,D,E,A,WL44,SL44,KL2); - RIP3(A,B,C,D,E,WL45,SL45,KL2); - RIP3(E,A,B,C,D,WL46,SL46,KL2); - RIP3(D,E,A,B,C,WL47,SL47,KL2); - - RIP4(C,D,E,A,B,WL48,SL48,KL3); - RIP4(B,C,D,E,A,WL49,SL49,KL3); - RIP4(A,B,C,D,E,WL50,SL50,KL3); - RIP4(E,A,B,C,D,WL51,SL51,KL3); - RIP4(D,E,A,B,C,WL52,SL52,KL3); - RIP4(C,D,E,A,B,WL53,SL53,KL3); - RIP4(B,C,D,E,A,WL54,SL54,KL3); - RIP4(A,B,C,D,E,WL55,SL55,KL3); - RIP4(E,A,B,C,D,WL56,SL56,KL3); - RIP4(D,E,A,B,C,WL57,SL57,KL3); - RIP4(C,D,E,A,B,WL58,SL58,KL3); - RIP4(B,C,D,E,A,WL59,SL59,KL3); - RIP4(A,B,C,D,E,WL60,SL60,KL3); - RIP4(E,A,B,C,D,WL61,SL61,KL3); - RIP4(D,E,A,B,C,WL62,SL62,KL3); - RIP4(C,D,E,A,B,WL63,SL63,KL3); - - RIP5(B,C,D,E,A,WL64,SL64,KL4); - RIP5(A,B,C,D,E,WL65,SL65,KL4); - RIP5(E,A,B,C,D,WL66,SL66,KL4); - RIP5(D,E,A,B,C,WL67,SL67,KL4); - RIP5(C,D,E,A,B,WL68,SL68,KL4); - RIP5(B,C,D,E,A,WL69,SL69,KL4); - RIP5(A,B,C,D,E,WL70,SL70,KL4); - RIP5(E,A,B,C,D,WL71,SL71,KL4); - RIP5(D,E,A,B,C,WL72,SL72,KL4); - RIP5(C,D,E,A,B,WL73,SL73,KL4); - RIP5(B,C,D,E,A,WL74,SL74,KL4); - RIP5(A,B,C,D,E,WL75,SL75,KL4); - RIP5(E,A,B,C,D,WL76,SL76,KL4); - RIP5(D,E,A,B,C,WL77,SL77,KL4); - RIP5(C,D,E,A,B,WL78,SL78,KL4); - RIP5(B,C,D,E,A,WL79,SL79,KL4); - - a=A; b=B; c=C; d=D; e=E; - /* Do other half */ - A=ctx->A; B=ctx->B; C=ctx->C; D=ctx->D; E=ctx->E; - - RIP5(A,B,C,D,E,WR00,SR00,KR0); - RIP5(E,A,B,C,D,WR01,SR01,KR0); - RIP5(D,E,A,B,C,WR02,SR02,KR0); - RIP5(C,D,E,A,B,WR03,SR03,KR0); - RIP5(B,C,D,E,A,WR04,SR04,KR0); - RIP5(A,B,C,D,E,WR05,SR05,KR0); - RIP5(E,A,B,C,D,WR06,SR06,KR0); - RIP5(D,E,A,B,C,WR07,SR07,KR0); - RIP5(C,D,E,A,B,WR08,SR08,KR0); - RIP5(B,C,D,E,A,WR09,SR09,KR0); - RIP5(A,B,C,D,E,WR10,SR10,KR0); - RIP5(E,A,B,C,D,WR11,SR11,KR0); - RIP5(D,E,A,B,C,WR12,SR12,KR0); - RIP5(C,D,E,A,B,WR13,SR13,KR0); - RIP5(B,C,D,E,A,WR14,SR14,KR0); - RIP5(A,B,C,D,E,WR15,SR15,KR0); - - RIP4(E,A,B,C,D,WR16,SR16,KR1); - RIP4(D,E,A,B,C,WR17,SR17,KR1); - RIP4(C,D,E,A,B,WR18,SR18,KR1); - RIP4(B,C,D,E,A,WR19,SR19,KR1); - RIP4(A,B,C,D,E,WR20,SR20,KR1); - RIP4(E,A,B,C,D,WR21,SR21,KR1); - RIP4(D,E,A,B,C,WR22,SR22,KR1); - RIP4(C,D,E,A,B,WR23,SR23,KR1); - RIP4(B,C,D,E,A,WR24,SR24,KR1); - RIP4(A,B,C,D,E,WR25,SR25,KR1); - RIP4(E,A,B,C,D,WR26,SR26,KR1); - RIP4(D,E,A,B,C,WR27,SR27,KR1); - RIP4(C,D,E,A,B,WR28,SR28,KR1); - RIP4(B,C,D,E,A,WR29,SR29,KR1); - RIP4(A,B,C,D,E,WR30,SR30,KR1); - RIP4(E,A,B,C,D,WR31,SR31,KR1); - - RIP3(D,E,A,B,C,WR32,SR32,KR2); - RIP3(C,D,E,A,B,WR33,SR33,KR2); - RIP3(B,C,D,E,A,WR34,SR34,KR2); - RIP3(A,B,C,D,E,WR35,SR35,KR2); - RIP3(E,A,B,C,D,WR36,SR36,KR2); - RIP3(D,E,A,B,C,WR37,SR37,KR2); - RIP3(C,D,E,A,B,WR38,SR38,KR2); - RIP3(B,C,D,E,A,WR39,SR39,KR2); - RIP3(A,B,C,D,E,WR40,SR40,KR2); - RIP3(E,A,B,C,D,WR41,SR41,KR2); - RIP3(D,E,A,B,C,WR42,SR42,KR2); - RIP3(C,D,E,A,B,WR43,SR43,KR2); - RIP3(B,C,D,E,A,WR44,SR44,KR2); - RIP3(A,B,C,D,E,WR45,SR45,KR2); - RIP3(E,A,B,C,D,WR46,SR46,KR2); - RIP3(D,E,A,B,C,WR47,SR47,KR2); - - RIP2(C,D,E,A,B,WR48,SR48,KR3); - RIP2(B,C,D,E,A,WR49,SR49,KR3); - RIP2(A,B,C,D,E,WR50,SR50,KR3); - RIP2(E,A,B,C,D,WR51,SR51,KR3); - RIP2(D,E,A,B,C,WR52,SR52,KR3); - RIP2(C,D,E,A,B,WR53,SR53,KR3); - RIP2(B,C,D,E,A,WR54,SR54,KR3); - RIP2(A,B,C,D,E,WR55,SR55,KR3); - RIP2(E,A,B,C,D,WR56,SR56,KR3); - RIP2(D,E,A,B,C,WR57,SR57,KR3); - RIP2(C,D,E,A,B,WR58,SR58,KR3); - RIP2(B,C,D,E,A,WR59,SR59,KR3); - RIP2(A,B,C,D,E,WR60,SR60,KR3); - RIP2(E,A,B,C,D,WR61,SR61,KR3); - RIP2(D,E,A,B,C,WR62,SR62,KR3); - RIP2(C,D,E,A,B,WR63,SR63,KR3); - - RIP1(B,C,D,E,A,WR64,SR64); - RIP1(A,B,C,D,E,WR65,SR65); - RIP1(E,A,B,C,D,WR66,SR66); - RIP1(D,E,A,B,C,WR67,SR67); - RIP1(C,D,E,A,B,WR68,SR68); - RIP1(B,C,D,E,A,WR69,SR69); - RIP1(A,B,C,D,E,WR70,SR70); - RIP1(E,A,B,C,D,WR71,SR71); - RIP1(D,E,A,B,C,WR72,SR72); - RIP1(C,D,E,A,B,WR73,SR73); - RIP1(B,C,D,E,A,WR74,SR74); - RIP1(A,B,C,D,E,WR75,SR75); - RIP1(E,A,B,C,D,WR76,SR76); - RIP1(D,E,A,B,C,WR77,SR77); - RIP1(C,D,E,A,B,WR78,SR78); - RIP1(B,C,D,E,A,WR79,SR79); - - D =ctx->B+c+D; - ctx->B=ctx->C+d+E; - ctx->C=ctx->D+e+A; - ctx->D=ctx->E+a+B; - ctx->E=ctx->A+b+C; - ctx->A=D; - - } - } -#endif - #ifndef ripemd160_block_data_order #ifdef X #undef X diff --git a/crypto/ripemd/rmd_locl.h b/crypto/ripemd/rmd_locl.h index bb66dfa475..f14b346e66 100644 --- a/crypto/ripemd/rmd_locl.h +++ b/crypto/ripemd/rmd_locl.h @@ -72,31 +72,20 @@ */ #ifdef RMD160_ASM # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) -# define ripemd160_block_host_order ripemd160_block_asm_host_order +# define ripemd160_block_data_order ripemd160_block_asm_data_order # endif #endif -void ripemd160_block_host_order (RIPEMD160_CTX *c, const void *p,size_t num); void ripemd160_block_data_order (RIPEMD160_CTX *c, const void *p,size_t num); -#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) || \ - defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) -# if !defined(B_ENDIAN) -# define ripemd160_block_data_order ripemd160_block_host_order -# endif -#endif - #define DATA_ORDER_IS_LITTLE_ENDIAN #define HASH_LONG RIPEMD160_LONG -#define HASH_LONG_LOG2 RIPEMD160_LONG_LOG2 #define HASH_CTX RIPEMD160_CTX #define HASH_CBLOCK RIPEMD160_CBLOCK -#define HASH_LBLOCK RIPEMD160_LBLOCK #define HASH_UPDATE RIPEMD160_Update #define HASH_TRANSFORM RIPEMD160_Transform #define HASH_FINAL RIPEMD160_Final -#define HASH_BLOCK_HOST_ORDER ripemd160_block_host_order #define HASH_MAKE_STRING(c,s) do { \ unsigned long ll; \ ll=(c)->A; HOST_l2c(ll,(s)); \ @@ -105,9 +94,7 @@ void ripemd160_block_data_order (RIPEMD160_CTX *c, const void *p,size_t num); ll=(c)->D; HOST_l2c(ll,(s)); \ ll=(c)->E; HOST_l2c(ll,(s)); \ } while (0) -#if !defined(L_ENDIAN) || defined(ripemd160_block_data_order) #define HASH_BLOCK_DATA_ORDER ripemd160_block_data_order -#endif #include "md32_common.h" diff --git a/crypto/sha/sha256.c b/crypto/sha/sha256.c index bbc20da0e9..f83e5afe71 100644 --- a/crypto/sha/sha256.c +++ b/crypto/sha/sha256.c @@ -18,23 +18,23 @@ const char *SHA256_version="SHA-256" OPENSSL_VERSION_PTEXT; int SHA224_Init (SHA256_CTX *c) { + memset (c,0,sizeof(*c)); c->h[0]=0xc1059ed8UL; c->h[1]=0x367cd507UL; c->h[2]=0x3070dd17UL; c->h[3]=0xf70e5939UL; c->h[4]=0xffc00b31UL; c->h[5]=0x68581511UL; c->h[6]=0x64f98fa7UL; c->h[7]=0xbefa4fa4UL; - c->Nl=0; c->Nh=0; - c->num=0; c->md_len=SHA224_DIGEST_LENGTH; + c->md_len=SHA224_DIGEST_LENGTH; return 1; } int SHA256_Init (SHA256_CTX *c) { + memset (c,0,sizeof(*c)); c->h[0]=0x6a09e667UL; c->h[1]=0xbb67ae85UL; c->h[2]=0x3c6ef372UL; c->h[3]=0xa54ff53aUL; c->h[4]=0x510e527fUL; c->h[5]=0x9b05688cUL; c->h[6]=0x1f83d9abUL; c->h[7]=0x5be0cd19UL; - c->Nl=0; c->Nh=0; - c->num=0; c->md_len=SHA256_DIGEST_LENGTH; + c->md_len=SHA256_DIGEST_LENGTH; return 1; } @@ -69,17 +69,11 @@ int SHA224_Update(SHA256_CTX *c, const void *data, size_t len) int SHA224_Final (unsigned char *md, SHA256_CTX *c) { return SHA256_Final (md,c); } -#ifndef SHA_LONG_LOG2 -#define SHA_LONG_LOG2 2 /* default to 32 bits */ -#endif - #define DATA_ORDER_IS_BIG_ENDIAN #define HASH_LONG SHA_LONG -#define HASH_LONG_LOG2 SHA_LONG_LOG2 #define HASH_CTX SHA256_CTX #define HASH_CBLOCK SHA_CBLOCK -#define HASH_LBLOCK SHA_LBLOCK /* * Note that FIPS180-2 discusses "Truncation of the Hash Function Output." * default: case below covers for it. It's not clear however if it's @@ -112,9 +106,7 @@ int SHA224_Final (unsigned char *md, SHA256_CTX *c) #define HASH_UPDATE SHA256_Update #define HASH_TRANSFORM SHA256_Transform #define HASH_FINAL SHA256_Final -#define HASH_BLOCK_HOST_ORDER sha256_block_host_order #define HASH_BLOCK_DATA_ORDER sha256_block_data_order -void sha256_block_host_order (SHA256_CTX *ctx, const void *in, size_t num); void sha256_block_data_order (SHA256_CTX *ctx, const void *in, size_t num); #include "md32_common.h" @@ -158,7 +150,7 @@ static const SHA_LONG K256[64] = { static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host) { unsigned MD32_REG_T a,b,c,d,e,f,g,h,s0,s1,T1,T2; - SHA_LONG X[16]; + SHA_LONG X[16],l; int i; const unsigned char *data=in; @@ -167,33 +159,13 @@ static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host) a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; - if (host) - { - const SHA_LONG *W=(const SHA_LONG *)data; - - for (i=0;i<16;i++) - { - T1 = X[i] = W[i]; - T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; - T2 = Sigma0(a) + Maj(a,b,c); - h = g; g = f; f = e; e = d + T1; - d = c; c = b; b = a; a = T1 + T2; - } - - data += SHA256_CBLOCK; - } - else + for (i=0;i<16;i++) { - SHA_LONG l; - - for (i=0;i<16;i++) - { - HOST_c2l(data,l); T1 = X[i] = l; - T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; - T2 = Sigma0(a) + Maj(a,b,c); - h = g; g = f; f = e; e = d + T1; - d = c; c = b; b = a; a = T1 + T2; - } + HOST_c2l(data,l); T1 = X[i] = l; + T1 += h + Sigma1(e) + Ch(e,f,g) + K256[i]; + T2 = Sigma0(a) + Maj(a,b,c); + h = g; g = f; f = e; e = d + T1; + d = c; c = b; b = a; a = T1 + T2; } for (;i<64;i++) @@ -233,13 +205,14 @@ static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host) SHA_LONG X[16]; int i; const unsigned char *data=in; + const union { long one; char little; } is_endian = {1}; while (num--) { a = ctx->h[0]; b = ctx->h[1]; c = ctx->h[2]; d = ctx->h[3]; e = ctx->h[4]; f = ctx->h[5]; g = ctx->h[6]; h = ctx->h[7]; - if (host) + if (!is_endian.little && sizeof(SHA_LONG)==4 && ((size_t)in%4)==0) { const SHA_LONG *W=(const SHA_LONG *)data; @@ -305,14 +278,6 @@ static void sha256_block (SHA256_CTX *ctx, const void *in, size_t num, int host) #endif #endif /* SHA256_ASM */ -/* - * Idea is to trade couple of cycles for some space. On IA-32 we save - * about 4K in "big footprint" case. In "small footprint" case any gain - * is appreciated:-) - */ -void HASH_BLOCK_HOST_ORDER (SHA256_CTX *ctx, const void *in, size_t num) -{ sha256_block (ctx,in,num,1); } - void HASH_BLOCK_DATA_ORDER (SHA256_CTX *ctx, const void *in, size_t num) { sha256_block (ctx,in,num,0); } diff --git a/crypto/sha/sha_locl.h b/crypto/sha/sha_locl.h index 2f5ab1f58e..d482ba12a0 100644 --- a/crypto/sha/sha_locl.h +++ b/crypto/sha/sha_locl.h @@ -62,17 +62,11 @@ #include <openssl/opensslconf.h> #include <openssl/sha.h> -#ifndef SHA_LONG_LOG2 -#define SHA_LONG_LOG2 2 /* default to 32 bits */ -#endif - #define DATA_ORDER_IS_BIG_ENDIAN #define HASH_LONG SHA_LONG -#define HASH_LONG_LOG2 SHA_LONG_LOG2 #define HASH_CTX SHA_CTX #define HASH_CBLOCK SHA_CBLOCK -#define HASH_LBLOCK SHA_LBLOCK #define HASH_MAKE_STRING(c,s) do { \ unsigned long ll; \ ll=(c)->h0; HOST_l2c(ll,(s)); \ @@ -88,11 +82,9 @@ # define HASH_TRANSFORM SHA_Transform # define HASH_FINAL SHA_Final # define HASH_INIT SHA_Init -# define HASH_BLOCK_HOST_ORDER sha_block_host_order # define HASH_BLOCK_DATA_ORDER sha_block_data_order # define Xupdate(a,ix,ia,ib,ic,id) (ix=(a)=(ia^ib^ic^id)) - void sha_block_host_order (SHA_CTX *c, const void *p,size_t num); void sha_block_data_order (SHA_CTX *c, const void *p,size_t num); #elif defined(SHA_1) @@ -101,7 +93,6 @@ # define HASH_TRANSFORM SHA1_Transform # define HASH_FINAL SHA1_Final # define HASH_INIT SHA1_Init -# define HASH_BLOCK_HOST_ORDER sha1_block_host_order # define HASH_BLOCK_DATA_ORDER sha1_block_data_order # if defined(__MWERKS__) && defined(__MC68K__) /* Metrowerks for Motorola fails otherwise:-( <appro@fy.chalmers.se> */ @@ -117,26 +108,18 @@ # ifdef SHA1_ASM # if defined(__i386) || defined(__i386__) || defined(_M_IX86) || defined(__INTEL__) \ || defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || defined(_M_X64) -# define sha1_block_host_order sha1_block_asm_host_order -# define DONT_IMPLEMENT_BLOCK_HOST_ORDER # define sha1_block_data_order sha1_block_asm_data_order # define DONT_IMPLEMENT_BLOCK_DATA_ORDER -# define HASH_BLOCK_DATA_ORDER_ALIGNED sha1_block_asm_data_order # elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64) -# define sha1_block_host_order sha1_block_asm_host_order -# define DONT_IMPLEMENT_BLOCK_HOST_ORDER # define sha1_block_data_order sha1_block_asm_data_order # define DONT_IMPLEMENT_BLOCK_DATA_ORDER # elif defined(_ARCH_PPC) || defined(_ARCH_PPC64) || \ defined(__ppc) || defined(__ppc__) || defined(__powerpc) || \ defined(__ppc64) || defined(__ppc64__) || defined(__powerpc64) -# define sha1_block_host_order sha1_block_asm_data_order -# define DONT_IMPLEMENT_BLOCK_HOST_ORDER # define sha1_block_data_order sha1_block_asm_data_order # define DONT_IMPLEMENT_BLOCK_DATA_ORDER # endif # endif - void sha1_block_host_order (SHA_CTX *c, const void *p,size_t num); void sha1_block_data_order (SHA_CTX *c, const void *p,size_t num); #else @@ -153,14 +136,12 @@ int HASH_INIT (SHA_CTX *c) { + memset (c,0,sizeof(*c)); c->h0=INIT_DATA_h0; c->h1=INIT_DATA_h1; c->h2=INIT_DATA_h2; c->h3=INIT_DATA_h3; c->h4=INIT_DATA_h4; - c->Nl=0; - c->Nh=0; - c->num=0; return 1; } @@ -235,131 +216,6 @@ int HASH_INIT (SHA_CTX *c) # define X(i) XX[i] #endif -#ifndef DONT_IMPLEMENT_BLOCK_HOST_ORDER -void HASH_BLOCK_HOST_ORDER (SHA_CTX *c, const void *d, size_t num) - { - const SHA_LONG *W=d; - register unsigned MD32_REG_T A,B,C,D,E,T; -#ifndef MD32_XARRAY - unsigned MD32_REG_T XX0, XX1, XX2, XX3, XX4, XX5, XX6, XX7, - XX8, XX9,XX10,XX11,XX12,XX13,XX14,XX15; -#else - SHA_LONG XX[16]; -#endif - - A=c->h0; - B=c->h1; - C=c->h2; - D=c->h3; - E=c->h4; - - for (;;) - { - BODY_00_15( 0,A,B,C,D,E,T,W[ 0]); - BODY_00_15( 1,T,A,B,C,D,E,W[ 1]); - BODY_00_15( 2,E,T,A,B,C,D,W[ 2]); - BODY_00_15( 3,D,E,T,A,B,C,W[ 3]); - BODY_00_15( 4,C,D,E,T,A,B,W[ 4]); - BODY_00_15( 5,B,C,D,E,T,A,W[ 5]); - BODY_00_15( 6,A,B,C,D,E,T,W[ 6]); - BODY_00_15( 7,T,A,B,C,D,E,W[ 7]); - BODY_00_15( 8,E,T,A,B,C,D,W[ 8]); - BODY_00_15( 9,D,E,T,A,B,C,W[ 9]); - BODY_00_15(10,C,D,E,T,A,B,W[10]); - BODY_00_15(11,B,C,D,E,T,A,W[11]); - BODY_00_15(12,A,B,C,D,E,T,W[12]); - BODY_00_15(13,T,A,B,C,D,E,W[13]); - BODY_00_15(14,E,T,A,B,C,D,W[14]); - BODY_00_15(15,D,E,T,A,B,C,W[15]); - - BODY_16_19(16,C,D,E,T,A,B,X( 0),W[ 0],W[ 2],W[ 8],W[13]); - BODY_16_19(17,B,C,D,E,T,A,X( 1),W[ 1],W[ 3],W[ 9],W[14]); - BODY_16_19(18,A,B,C,D,E,T,X( 2),W[ 2],W[ 4],W[10],W[15]); - BODY_16_19(19,T,A,B,C,D,E,X( 3),W[ 3],W[ 5],W[11],X( 0)); - - BODY_20_31(20,E,T,A,B,C,D,X( 4),W[ 4],W[ 6],W[12],X( 1)); - BODY_20_31(21,D,E,T,A,B,C,X( 5),W[ 5],W[ 7],W[13],X( 2)); - BODY_20_31(22,C,D,E,T,A,B,X( 6),W[ 6],W[ 8],W[14],X( 3)); - BODY_20_31(23,B,C,D,E,T,A,X( 7),W[ 7],W[ 9],W[15],X( 4)); - BODY_20_31(24,A,B,C,D,E,T,X( 8),W[ 8],W[10],X( 0),X( 5)); - BODY_20_31(25,T,A,B,C,D,E,X( 9),W[ 9],W[11],X( 1),X( 6)); - BODY_20_31(26,E,T,A,B,C,D,X(10),W[10],W[12],X( 2),X( 7)); - BODY_20_31(27,D,E,T,A,B,C,X(11),W[11],W[13],X( 3),X( 8)); - BODY_20_31(28,C,D,E,T,A,B,X(12),W[12],W[14],X( 4),X( 9)); - BODY_20_31(29,B,C,D,E,T,A,X(13),W[13],W[15],X( 5),X(10)); - BODY_20_31(30,A,B,C,D,E,T,X(14),W[14],X( 0),X( 6),X(11)); - BODY_20_31(31,T,A,B,C,D,E,X(15),W[15],X( 1),X( 7),X(12)); - - BODY_32_39(32,E,T,A,B,C,D,X( 0),X( 2),X( 8),X(13)); - BODY_32_39(33,D,E,T,A,B,C,X( 1),X( 3),X( 9),X(14)); - BODY_32_39(34,C,D,E,T,A,B,X( 2),X( 4),X(10),X(15)); - BODY_32_39(35,B,C,D,E,T,A,X( 3),X( 5),X(11),X( 0)); - BODY_32_39(36,A,B,C,D,E,T,X( 4),X( 6),X(12),X( 1)); - BODY_32_39(37,T,A,B,C,D,E,X( 5),X( 7),X(13),X( 2)); - BODY_32_39(38,E,T,A,B,C,D,X( 6),X( 8),X(14),X( 3)); - BODY_32_39(39,D,E,T,A,B,C,X( 7),X( 9),X(15),X( 4)); - - BODY_40_59(40,C,D,E,T,A,B,X( 8),X(10),X( 0),X( 5)); - BODY_40_59(41,B,C,D,E,T,A,X( 9),X(11),X( 1),X( 6)); - BODY_40_59(42,A,B,C,D,E,T,X(10),X(12),X( 2),X( 7)); - BODY_40_59(43,T,A,B,C,D,E,X(11),X(13),X( 3),X( 8)); - BODY_40_59(44,E,T,A,B,C,D,X(12),X(14),X( 4),X( 9)); - BODY_40_59(45,D,E,T,A,B,C,X(13),X(15),X( 5),X(10)); - BODY_40_59(46,C,D,E,T,A,B,X(14),X( 0),X( 6),X(11)); - BODY_40_59(47,B,C,D,E,T,A,X(15),X( 1),X( 7),X(12)); - BODY_40_59(48,A,B,C,D,E,T,X( 0),X( 2),X( 8),X(13)); - BODY_40_59(49,T,A,B,C,D,E,X( 1),X( 3),X( 9),X(14)); - BODY_40_59(50,E,T,A,B,C,D,X( 2),X( 4),X(10),X(15)); - BODY_40_59(51,D,E,T,A,B,C,X( 3),X( 5),X(11),X( 0)); - BODY_40_59(52,C,D,E,T,A,B,X( 4),X( 6),X(12),X( 1)); - BODY_40_59(53,B,C,D,E,T,A,X( 5),X( 7),X(13),X( 2)); - BODY_40_59(54,A,B,C,D,E,T,X( 6),X( 8),X(14),X( 3)); - BODY_40_59(55,T,A,B,C,D,E,X( 7),X( 9),X(15),X( 4)); - BODY_40_59(56,E,T,A,B,C,D,X( 8),X(10),X( 0),X( 5)); - BODY_40_59(57,D,E,T,A,B,C,X( 9),X(11),X( 1),X( 6)); - BODY_40_59(58,C,D,E,T,A,B,X(10),X(12),X( 2),X( 7)); - BODY_40_59(59,B,C,D,E,T,A,X(11),X(13),X( 3),X( 8)); - - BODY_60_79(60,A,B,C,D,E,T,X(12),X(14),X( 4),X( 9)); - BODY_60_79(61,T,A,B,C,D,E,X(13),X(15),X( 5),X(10)); - BODY_60_79(62,E,T,A,B,C,D,X(14),X( 0),X( 6),X(11)); - BODY_60_79(63,D,E,T,A,B,C,X(15),X( 1),X( 7),X(12)); - BODY_60_79(64,C,D,E,T,A,B,X( 0),X( 2),X( 8),X(13)); - BODY_60_79(65,B,C,D,E,T,A,X( 1),X( 3),X( 9),X(14)); - BODY_60_79(66,A,B,C,D,E,T,X( 2),X( 4),X(10),X(15)); - BODY_60_79(67,T,A,B,C,D,E,X( 3),X( 5),X(11),X( 0)); - BODY_60_79(68,E,T,A,B,C,D,X( 4),X( 6),X(12),X( 1)); - BODY_60_79(69,D,E,T,A,B,C,X( 5),X( 7),X(13),X( 2)); - BODY_60_79(70,C,D,E,T,A,B,X( 6),X( 8),X(14),X( 3)); - BODY_60_79(71,B,C,D,E,T,A,X( 7),X( 9),X(15),X( 4)); - BODY_60_79(72,A,B,C,D,E,T,X( 8),X(10),X( 0),X( 5)); - BODY_60_79(73,T,A,B,C,D,E,X( 9),X(11),X( 1),X( 6)); - BODY_60_79(74,E,T,A,B,C,D,X(10),X(12),X( 2),X( 7)); - BODY_60_79(75,D,E,T,A,B,C,X(11),X(13),X( 3),X( 8)); - BODY_60_79(76,C,D,E,T,A,B,X(12),X(14),X( 4),X( 9)); - BODY_60_79(77,B,C,D,E,T,A,X(13),X(15),X( 5),X(10)); - BODY_60_79(78,A,B,C,D,E,T,X(14),X( 0),X( 6),X(11)); - BODY_60_79(79,T,A,B,C,D,E,X(15),X( 1),X( 7),X(12)); - - c->h0=(c->h0+E)&0xffffffffL; - c->h1=(c->h1+T)&0xffffffffL; - c->h2=(c->h2+A)&0xffffffffL; - c->h3=(c->h3+B)&0xffffffffL; - c->h4=(c->h4+C)&0xffffffffL; - - if (--num == 0) break; - - A=c->h0; - B=c->h1; - C=c->h2; - D=c->h3; - E=c->h4; - - W+=SHA_LBLOCK; - } - } -#endif - #ifndef DONT_IMPLEMENT_BLOCK_DATA_ORDER void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num) { @@ -379,43 +235,86 @@ void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num) E=c->h4; for (;;) - { + { + const union { long one; char little; } is_endian = {1}; - HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l; - BODY_00_15( 0,A,B,C,D,E,T,X( 0)); HOST_c2l(data,l); X( 2)=l; - BODY_00_15( 1,T,A,B,C,D,E,X( 1)); HOST_c2l(data,l); X( 3)=l; - BODY_00_15( 2,E,T,A,B,C,D,X( 2)); HOST_c2l(data,l); X( 4)=l; - BODY_00_15( 3,D,E,T,A,B,C,X( 3)); HOST_c2l(data,l); X( 5)=l; - BODY_00_15( 4,C,D,E,T,A,B,X( 4)); HOST_c2l(data,l); X( 6)=l; - BODY_00_15( 5,B,C,D,E,T,A,X( 5)); HOST_c2l(data,l); X( 7)=l; - BODY_00_15( 6,A,B,C,D,E,T,X( 6)); HOST_c2l(data,l); X( 8)=l; - BODY_00_15( 7,T,A,B,C,D,E,X( 7)); HOST_c2l(data,l); X( 9)=l; - BODY_00_15( 8,E,T,A,B,C,D,X( 8)); HOST_c2l(data,l); X(10)=l; - BODY_00_15( 9,D,E,T,A,B,C,X( 9)); HOST_c2l(data,l); X(11)=l; - BODY_00_15(10,C,D,E,T,A,B,X(10)); HOST_c2l(data,l); X(12)=l; - BODY_00_15(11,B,C,D,E,T,A,X(11)); HOST_c2l(data,l); X(13)=l; - BODY_00_15(12,A,B,C,D,E,T,X(12)); HOST_c2l(data,l); X(14)=l; - BODY_00_15(13,T,A,B,C,D,E,X(13)); HOST_c2l(data,l); X(15)=l; - BODY_00_15(14,E,T,A,B,C,D,X(14)); - BODY_00_15(15,D,E,T,A,B,C,X(15)); - - BODY_16_19(16,C,D,E,T,A,B,X( 0),X( 0),X( 2),X( 8),X(13)); - BODY_16_19(17,B,C,D,E,T,A,X( 1),X( 1),X( 3),X( 9),X(14)); - BODY_16_19(18,A,B,C,D,E,T,X( 2),X( 2),X( 4),X(10),X(15)); - BODY_16_19(19,T,A,B,C,D,E,X( 3),X( 3),X( 5),X(11),X( 0)); - - BODY_20_31(20,E,T,A,B,C,D,X( 4),X( 4),X( 6),X(12),X( 1)); - BODY_20_31(21,D,E,T,A,B,C,X( 5),X( 5),X( 7),X(13),X( 2)); - BODY_20_31(22,C,D,E,T,A,B,X( 6),X( 6),X( 8),X(14),X( 3)); - BODY_20_31(23,B,C,D,E,T,A,X( 7),X( 7),X( 9),X(15),X( 4)); - BODY_20_31(24,A,B,C,D,E,T,X( 8),X( 8),X(10),X( 0),X( 5)); - BODY_20_31(25,T,A,B,C,D,E,X( 9),X( 9),X(11),X( 1),X( 6)); - BODY_20_31(26,E,T,A,B,C,D,X(10),X(10),X(12),X( 2),X( 7)); - BODY_20_31(27,D,E,T,A,B,C,X(11),X(11),X(13),X( 3),X( 8)); - BODY_20_31(28,C,D,E,T,A,B,X(12),X(12),X(14),X( 4),X( 9)); - BODY_20_31(29,B,C,D,E,T,A,X(13),X(13),X(15),X( 5),X(10)); - BODY_20_31(30,A,B,C,D,E,T,X(14),X(14),X( 0),X( 6),X(11)); - BODY_20_31(31,T,A,B,C,D,E,X(15),X(15),X( 1),X( 7),X(12)); + if (!is_endian.little && sizeof(SHA_LONG)==4 && ((size_t)p%4)==0) + { + const SHA_LONG *W=p; + + BODY_00_15( 0,A,B,C,D,E,T,W[ 0]); + BODY_00_15( 1,T,A,B,C,D,E,W[ 1]); + BODY_00_15( 2,E,T,A,B,C,D,W[ 2]); + BODY_00_15( 3,D,E,T,A,B,C,W[ 3]); + BODY_00_15( 4,C,D,E,T,A,B,W[ 4]); + BODY_00_15( 5,B,C,D,E,T,A,W[ 5]); + BODY_00_15( 6,A,B,C,D,E,T,W[ 6]); + BODY_00_15( 7,T,A,B,C,D,E,W[ 7]); + BODY_00_15( 8,E,T,A,B,C,D,W[ 8]); + BODY_00_15( 9,D,E,T,A,B,C,W[ 9]); + BODY_00_15(10,C,D,E,T,A,B,W[10]); + BODY_00_15(11,B,C,D,E,T,A,W[11]); + BODY_00_15(12,A,B,C,D,E,T,W[12]); + BODY_00_15(13,T,A,B,C,D,E,W[13]); + BODY_00_15(14,E,T,A,B,C,D,W[14]); + BODY_00_15(15,D,E,T,A,B,C,W[15]); + + BODY_16_19(16,C,D,E,T,A,B,X( 0),W[ 0],W[ 2],W[ 8],W[13]); + BODY_16_19(17,B,C,D,E,T,A,X( 1),W[ 1],W[ 3],W[ 9],W[14]); + BODY_16_19(18,A,B,C,D,E,T,X( 2),W[ 2],W[ 4],W[10],W[15]); + BODY_16_19(19,T,A,B,C,D,E,X( 3),W[ 3],W[ 5],W[11],X( 0)); + + BODY_20_31(20,E,T,A,B,C,D,X( 4),W[ 4],W[ 6],W[12],X( 1)); + BODY_20_31(21,D,E,T,A,B,C,X( 5),W[ 5],W[ 7],W[13],X( 2)); + BODY_20_31(22,C,D,E,T,A,B,X( 6),W[ 6],W[ 8],W[14],X( 3)); + BODY_20_31(23,B,C,D,E,T,A,X( 7),W[ 7],W[ 9],W[15],X( 4)); + BODY_20_31(24,A,B,C,D,E,T,X( 8),W[ 8],W[10],X( 0),X( 5)); + BODY_20_31(25,T,A,B,C,D,E,X( 9),W[ 9],W[11],X( 1),X( 6)); + BODY_20_31(26,E,T,A,B,C,D,X(10),W[10],W[12],X( 2),X( 7)); + BODY_20_31(27,D,E,T,A,B,C,X(11),W[11],W[13],X( 3),X( 8)); + BODY_20_31(28,C,D,E,T,A,B,X(12),W[12],W[14],X( 4),X( 9)); + BODY_20_31(29,B,C,D,E,T,A,X(13),W[13],W[15],X( 5),X(10)); + BODY_20_31(30,A,B,C,D,E,T,X(14),W[14],X( 0),X( 6),X(11)); + BODY_20_31(31,T,A,B,C,D,E,X(15),W[15],X( 1),X( 7),X(12)); + } + else + { + HOST_c2l(data,l); X( 0)=l; HOST_c2l(data,l); X( 1)=l; + BODY_00_15( 0,A,B,C,D,E,T,X( 0)); HOST_c2l(data,l); X( 2)=l; + BODY_00_15( 1,T,A,B,C,D,E,X( 1)); HOST_c2l(data,l); X( 3)=l; + BODY_00_15( 2,E,T,A,B,C,D,X( 2)); HOST_c2l(data,l); X( 4)=l; + BODY_00_15( 3,D,E,T,A,B,C,X( 3)); HOST_c2l(data,l); X( 5)=l; + BODY_00_15( 4,C,D,E,T,A,B,X( 4)); HOST_c2l(data,l); X( 6)=l; + BODY_00_15( 5,B,C,D,E,T,A,X( 5)); HOST_c2l(data,l); X( 7)=l; + BODY_00_15( 6,A,B,C,D,E,T,X( 6)); HOST_c2l(data,l); X( 8)=l; + BODY_00_15( 7,T,A,B,C,D,E,X( 7)); HOST_c2l(data,l); X( 9)=l; + BODY_00_15( 8,E,T,A,B,C,D,X( 8)); HOST_c2l(data,l); X(10)=l; + BODY_00_15( 9,D,E,T,A,B,C,X( 9)); HOST_c2l(data,l); X(11)=l; + BODY_00_15(10,C,D,E,T,A,B,X(10)); HOST_c2l(data,l); X(12)=l; + BODY_00_15(11,B,C,D,E,T,A,X(11)); HOST_c2l(data,l); X(13)=l; + BODY_00_15(12,A,B,C,D,E,T,X(12)); HOST_c2l(data,l); X(14)=l; + BODY_00_15(13,T,A,B,C,D,E,X(13)); HOST_c2l(data,l); X(15)=l; + BODY_00_15(14,E,T,A,B,C,D,X(14)); + BODY_00_15(15,D,E,T,A,B,C,X(15)); + + BODY_16_19(16,C,D,E,T,A,B,X( 0),X( 0),X( 2),X( 8),X(13)); + BODY_16_19(17,B,C,D,E,T,A,X( 1),X( 1),X( 3),X( 9),X(14)); + BODY_16_19(18,A,B,C,D,E,T,X( 2),X( 2),X( 4),X(10),X(15)); + BODY_16_19(19,T,A,B,C,D,E,X( 3),X( 3),X( 5),X(11),X( 0)); + + BODY_20_31(20,E,T,A,B,C,D,X( 4),X( 4),X( 6),X(12),X( 1)); + BODY_20_31(21,D,E,T,A,B,C,X( 5),X( 5),X( 7),X(13),X( 2)); + BODY_20_31(22,C,D,E,T,A,B,X( 6),X( 6),X( 8),X(14),X( 3)); + BODY_20_31(23,B,C,D,E,T,A,X( 7),X( 7),X( 9),X(15),X( 4)); + BODY_20_31(24,A,B,C,D,E,T,X( 8),X( 8),X(10),X( 0),X( 5)); + BODY_20_31(25,T,A,B,C,D,E,X( 9),X( 9),X(11),X( 1),X( 6)); + BODY_20_31(26,E,T,A,B,C,D,X(10),X(10),X(12),X( 2),X( 7)); + BODY_20_31(27,D,E,T,A,B,C,X(11),X(11),X(13),X( 3),X( 8)); + BODY_20_31(28,C,D,E,T,A,B,X(12),X(12),X(14),X( 4),X( 9)); + BODY_20_31(29,B,C,D,E,T,A,X(13),X(13),X(15),X( 5),X(10)); + BODY_20_31(30,A,B,C,D,E,T,X(14),X(14),X( 0),X( 6),X(11)); + BODY_20_31(31,T,A,B,C,D,E,X(15),X(15),X( 1),X( 7),X(12)); + } BODY_32_39(32,E,T,A,B,C,D,X( 0),X( 2),X( 8),X(13)); BODY_32_39(33,D,E,T,A,B,C,X( 1),X( 3),X( 9),X(14)); @@ -482,7 +381,7 @@ void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num) D=c->h3; E=c->h4; - } + } } #endif @@ -517,52 +416,6 @@ void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num) E=D, D=C, C=ROTATE(B,30), B=A; \ A=ROTATE(A,5)+T+xa; } while(0) -#ifndef DONT_IMPLEMENT_BLOCK_HOST_ORDER -void HASH_BLOCK_HOST_ORDER (SHA_CTX *c, const void *d, size_t num) - { - const SHA_LONG *W=d; - register unsigned MD32_REG_T A,B,C,D,E,T; - int i; - SHA_LONG X[16]; - - A=c->h0; - B=c->h1; - C=c->h2; - D=c->h3; - E=c->h4; - - for (;;) - { - for (i=0;i<16;i++) - { X[i]=W[i]; BODY_00_15(X[i]); } - for (i=0;i<4;i++) - { BODY_16_19(X[i], X[i+2], X[i+8], X[(i+13)&15]); } - for (;i<24;i++) - { BODY_20_39(X[i&15], X[(i+2)&15], X[(i+8)&15],X[(i+13)&15]); } - for (i=0;i<20;i++) - { BODY_40_59(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); } - for (i=4;i<24;i++) - { BODY_60_79(X[(i+8)&15],X[(i+10)&15],X[i&15], X[(i+5)&15]); } - - c->h0=(c->h0+A)&0xffffffffL; - c->h1=(c->h1+B)&0xffffffffL; - c->h2=(c->h2+C)&0xffffffffL; - c->h3=(c->h3+D)&0xffffffffL; - c->h4=(c->h4+E)&0xffffffffL; - - if (--num == 0) break; - - A=c->h0; - B=c->h1; - C=c->h2; - D=c->h3; - E=c->h4; - - W+=SHA_LBLOCK; - } - } -#endif - #ifndef DONT_IMPLEMENT_BLOCK_DATA_ORDER void HASH_BLOCK_DATA_ORDER (SHA_CTX *c, const void *p, size_t num) { |