diff options
Diffstat (limited to 'include/crypto/gf128mul.h')
-rw-r--r-- | include/crypto/gf128mul.h | 87 |
1 files changed, 70 insertions, 17 deletions
diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h index 592d47e565a8..0977fb18ff68 100644 --- a/include/crypto/gf128mul.h +++ b/include/crypto/gf128mul.h @@ -43,12 +43,13 @@ --------------------------------------------------------------------------- Issue Date: 31/01/2006 - An implementation of field multiplication in Galois Field GF(128) + An implementation of field multiplication in Galois Field GF(2^128) */ #ifndef _CRYPTO_GF128MUL_H #define _CRYPTO_GF128MUL_H +#include <asm/byteorder.h> #include <crypto/b128ops.h> #include <linux/slab.h> @@ -65,7 +66,7 @@ * are left and the lsb's are right. char b[16] is an array and b[0] is * the first octet. * - * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 + * 10000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 * b[0] b[1] b[2] b[3] b[13] b[14] b[15] * * Every bit is a coefficient of some power of X. We can store the bits @@ -85,15 +86,17 @@ * Both of the above formats are easy to implement on big-endian * machines. * - * EME (which is patent encumbered) uses the ble format (bits are stored - * in big endian order and the bytes in little endian). The above buffer - * represents X^7 in this case and the primitive polynomial is b[0] = 0x87. + * XTS and EME (the latter of which is patent encumbered) use the ble + * format (bits are stored in big endian order and the bytes in little + * endian). The above buffer represents X^7 in this case and the + * primitive polynomial is b[0] = 0x87. * * The common machine word-size is smaller than 128 bits, so to make * an efficient implementation we must split into machine word sizes. - * This file uses one 32bit for the moment. Machine endianness comes into - * play. The lle format in relation to machine endianness is discussed - * below by the original author of gf128mul Dr Brian Gladman. + * This implementation uses 64-bit words for the moment. Machine + * endianness comes into play. The lle format in relation to machine + * endianness is discussed below by the original author of gf128mul Dr + * Brian Gladman. * * Let's look at the bbe and ble format on a little endian machine. * @@ -127,10 +130,10 @@ * machines this will automatically aligned to wordsize and on a 64-bit * machine also. */ -/* Multiply a GF128 field element by x. Field elements are held in arrays - of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower - indexed bits placed in the more numerically significant bit positions - within bytes. +/* Multiply a GF(2^128) field element by x. Field elements are + held in arrays of bytes in which field bits 8n..8n + 7 are held in + byte[n], with lower indexed bits placed in the more numerically + significant bit positions within bytes. On little endian machines the bit indexes translate into the bit positions within four 32-bit words in the following way @@ -161,8 +164,58 @@ void gf128mul_lle(be128 *a, const be128 *b); void gf128mul_bbe(be128 *a, const be128 *b); -/* multiply by x in ble format, needed by XTS */ -void gf128mul_x_ble(be128 *a, const be128 *b); +/* + * The following functions multiply a field element by x in + * the polynomial field representation. They use 64-bit word operations + * to gain speed but compensate for machine endianness and hence work + * correctly on both styles of machine. + * + * They are defined here for performance. + */ + +static inline u64 gf128mul_mask_from_bit(u64 x, int which) +{ + /* a constant-time version of 'x & ((u64)1 << which) ? (u64)-1 : 0' */ + return ((s64)(x << (63 - which)) >> 63); +} + +static inline void gf128mul_x_lle(be128 *r, const be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + + /* equivalent to gf128mul_table_le[(b << 7) & 0xff] << 48 + * (see crypto/gf128mul.c): */ + u64 _tt = gf128mul_mask_from_bit(b, 0) & ((u64)0xe1 << 56); + + r->b = cpu_to_be64((b >> 1) | (a << 63)); + r->a = cpu_to_be64((a >> 1) ^ _tt); +} + +static inline void gf128mul_x_bbe(be128 *r, const be128 *x) +{ + u64 a = be64_to_cpu(x->a); + u64 b = be64_to_cpu(x->b); + + /* equivalent to gf128mul_table_be[a >> 63] (see crypto/gf128mul.c): */ + u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; + + r->a = cpu_to_be64((a << 1) | (b >> 63)); + r->b = cpu_to_be64((b << 1) ^ _tt); +} + +/* needed by XTS */ +static inline void gf128mul_x_ble(le128 *r, const le128 *x) +{ + u64 a = le64_to_cpu(x->a); + u64 b = le64_to_cpu(x->b); + + /* equivalent to gf128mul_table_be[b >> 63] (see crypto/gf128mul.c): */ + u64 _tt = gf128mul_mask_from_bit(a, 63) & 0x87; + + r->a = cpu_to_le64((a << 1) | (b >> 63)); + r->b = cpu_to_le64((b << 1) ^ _tt); +} /* 4k table optimization */ @@ -172,8 +225,8 @@ struct gf128mul_4k { struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); -void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t); -void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t); +void gf128mul_4k_lle(be128 *a, const struct gf128mul_4k *t); +void gf128mul_4k_bbe(be128 *a, const struct gf128mul_4k *t); static inline void gf128mul_free_4k(struct gf128mul_4k *t) { @@ -194,6 +247,6 @@ struct gf128mul_64k { */ struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); void gf128mul_free_64k(struct gf128mul_64k *t); -void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t); +void gf128mul_64k_bbe(be128 *a, const struct gf128mul_64k *t); #endif /* _CRYPTO_GF128MUL_H */ |