Camellia update. Quoting camellia.c:

/*
 * This release balances code size and performance. In particular key
 * schedule setup is fully unrolled, because doing so *significantly*
 * reduces amount of instructions per setup round and code increase is
 * justifiable. In block functions on the other hand only inner loops
 * are unrolled, as full unroll gives only nominal performance boost,
 * while code size grows 4 or 7 times. Also, unlike previous versions
 * this one "encourages" compiler to keep intermediate variables in
 * registers, which should give better "all round" results, in other
 * words reasonable performance even with not so modern compilers.
 */
This commit is contained in:
Andy Polyakov 2008-10-28 08:47:24 +00:00
parent 80aa9cc985
commit 27f864e8ac
6 changed files with 547 additions and 1853 deletions

File diff suppressed because it is too large Load Diff

View File

@ -74,17 +74,16 @@ extern "C" {
#define CAMELLIA_TABLE_BYTE_LEN 272
#define CAMELLIA_TABLE_WORD_LEN (CAMELLIA_TABLE_BYTE_LEN / 4)
/* to match with WORD */
typedef unsigned int KEY_TABLE_TYPE[CAMELLIA_TABLE_WORD_LEN];
typedef unsigned int KEY_TABLE_TYPE[CAMELLIA_TABLE_WORD_LEN]; /* to match with WORD */
struct camellia_key_st
{
KEY_TABLE_TYPE rd_key;
int bitLength;
void (*enc)(const unsigned int *subkey, unsigned int *io);
void (*dec)(const unsigned int *subkey, unsigned int *io);
union {
double d; /* ensures 64-bit align */
KEY_TABLE_TYPE rd_key;
} u;
int grand_rounds;
};
typedef struct camellia_key_st CAMELLIA_KEY;
int Camellia_set_key(const unsigned char *userKey, const int bits,
@ -126,4 +125,3 @@ void Camellia_ctr128_encrypt(const unsigned char *in, unsigned char *out,
#endif
#endif /* !HEADER_Camellia_H */

View File

@ -55,219 +55,89 @@
# endif
#endif
#include <assert.h>
#include <stdio.h>
#include <string.h>
#include <openssl/camellia.h>
#include "cmll_locl.h"
void Camellia_cbc_encrypt(const unsigned char *in, unsigned char *out,
const unsigned long length, const CAMELLIA_KEY *key,
unsigned char *ivec, const int enc) {
const unsigned long length, const CAMELLIA_KEY *key,
unsigned char *ivec, const int enc)
{
unsigned long n;
unsigned long len = length;
unsigned char tmp[CAMELLIA_BLOCK_SIZE];
const unsigned char *iv = ivec;
union { u32 t32[CAMELLIA_BLOCK_SIZE/sizeof(u32)];
u8 t8 [CAMELLIA_BLOCK_SIZE]; } tmp;
const union { long one; char little; } camellia_endian = {1};
assert(in && out && key && ivec);
assert((CAMELLIA_ENCRYPT == enc)||(CAMELLIA_DECRYPT == enc));
if(((size_t)in|(size_t)out|(size_t)ivec) % sizeof(u32) == 0)
if (CAMELLIA_ENCRYPT == enc)
{
if (CAMELLIA_ENCRYPT == enc)
while (len >= CAMELLIA_BLOCK_SIZE)
{
while (len >= CAMELLIA_BLOCK_SIZE)
{
XOR4WORD2((u32 *)out,
(u32 *)in, (u32 *)iv);
if (camellia_endian.little)
SWAP4WORD((u32 *)out);
key->enc(key->rd_key, (u32 *)out);
if (camellia_endian.little)
SWAP4WORD((u32 *)out);
iv = out;
len -= CAMELLIA_BLOCK_SIZE;
in += CAMELLIA_BLOCK_SIZE;
out += CAMELLIA_BLOCK_SIZE;
}
if (len)
{
for(n=0; n < len; ++n)
out[n] = in[n] ^ iv[n];
for(n=len; n < CAMELLIA_BLOCK_SIZE; ++n)
out[n] = iv[n];
if (camellia_endian.little)
SWAP4WORD((u32 *)out);
key->enc(key->rd_key, (u32 *)out);
if (camellia_endian.little)
SWAP4WORD((u32 *)out);
iv = out;
}
memcpy(ivec,iv,CAMELLIA_BLOCK_SIZE);
for(n=0; n < CAMELLIA_BLOCK_SIZE; ++n)
out[n] = in[n] ^ iv[n];
Camellia_encrypt(out, out, key);
iv = out;
len -= CAMELLIA_BLOCK_SIZE;
in += CAMELLIA_BLOCK_SIZE;
out += CAMELLIA_BLOCK_SIZE;
}
else if (in != out)
if (len)
{
while (len >= CAMELLIA_BLOCK_SIZE)
{
memcpy(out,in,CAMELLIA_BLOCK_SIZE);
if (camellia_endian.little)
SWAP4WORD((u32 *)out);
key->dec(key->rd_key,(u32 *)out);
if (camellia_endian.little)
SWAP4WORD((u32 *)out);
XOR4WORD((u32 *)out, (u32 *)iv);
iv = in;
len -= CAMELLIA_BLOCK_SIZE;
in += CAMELLIA_BLOCK_SIZE;
out += CAMELLIA_BLOCK_SIZE;
}
if (len)
{
memcpy(tmp.t8, in, CAMELLIA_BLOCK_SIZE);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
key->dec(key->rd_key, tmp.t32);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
for(n=0; n < len; ++n)
out[n] = tmp.t8[n] ^ iv[n];
iv = in;
}
memcpy(ivec,iv,CAMELLIA_BLOCK_SIZE);
for(n=0; n < len; ++n)
out[n] = in[n] ^ iv[n];
for(n=len; n < CAMELLIA_BLOCK_SIZE; ++n)
out[n] = iv[n];
Camellia_encrypt(out, out, key);
iv = out;
}
else /* in == out */
memcpy(ivec,iv,CAMELLIA_BLOCK_SIZE);
}
else if (in != out)
{
while (len >= CAMELLIA_BLOCK_SIZE)
{
while (len >= CAMELLIA_BLOCK_SIZE)
{
memcpy(tmp.t8, in, CAMELLIA_BLOCK_SIZE);
if (camellia_endian.little)
SWAP4WORD((u32 *)out);
key->dec(key->rd_key, (u32 *)out);
if (camellia_endian.little)
SWAP4WORD((u32 *)out);
XOR4WORD((u32 *)out, (u32 *)ivec);
memcpy(ivec, tmp.t8, CAMELLIA_BLOCK_SIZE);
len -= CAMELLIA_BLOCK_SIZE;
in += CAMELLIA_BLOCK_SIZE;
out += CAMELLIA_BLOCK_SIZE;
}
if (len)
{
memcpy(tmp.t8, in, CAMELLIA_BLOCK_SIZE);
if (camellia_endian.little)
SWAP4WORD((u32 *)out);
key->dec(key->rd_key,(u32 *)out);
if (camellia_endian.little)
SWAP4WORD((u32 *)out);
for(n=0; n < len; ++n)
out[n] ^= ivec[n];
for(n=len; n < CAMELLIA_BLOCK_SIZE; ++n)
out[n] = tmp.t8[n];
memcpy(ivec, tmp.t8, CAMELLIA_BLOCK_SIZE);
}
Camellia_decrypt(in, out, key);
for(n=0; n < CAMELLIA_BLOCK_SIZE; ++n)
out[n] ^= iv[n];
iv = in;
len -= CAMELLIA_BLOCK_SIZE;
in += CAMELLIA_BLOCK_SIZE;
out += CAMELLIA_BLOCK_SIZE;
}
if (len)
{
Camellia_decrypt(in,tmp,key);
for(n=0; n < len; ++n)
out[n] = tmp[n] ^ iv[n];
iv = in;
}
memcpy(ivec,iv,CAMELLIA_BLOCK_SIZE);
}
else
{
while (len >= CAMELLIA_BLOCK_SIZE)
{
memcpy(tmp, in, CAMELLIA_BLOCK_SIZE);
Camellia_decrypt(in, out, key);
for(n=0; n < CAMELLIA_BLOCK_SIZE; ++n)
out[n] ^= ivec[n];
memcpy(ivec, tmp, CAMELLIA_BLOCK_SIZE);
len -= CAMELLIA_BLOCK_SIZE;
in += CAMELLIA_BLOCK_SIZE;
out += CAMELLIA_BLOCK_SIZE;
}
if (len)
{
memcpy(tmp, in, CAMELLIA_BLOCK_SIZE);
Camellia_decrypt(tmp, out, key);
for(n=0; n < len; ++n)
out[n] ^= ivec[n];
for(n=len; n < CAMELLIA_BLOCK_SIZE; ++n)
out[n] = tmp[n];
memcpy(ivec, tmp, CAMELLIA_BLOCK_SIZE);
}
}
else /* no aligned */
{
if (CAMELLIA_ENCRYPT == enc)
{
while (len >= CAMELLIA_BLOCK_SIZE)
{
for(n=0; n < CAMELLIA_BLOCK_SIZE; ++n)
tmp.t8[n] = in[n] ^ iv[n];
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
key->enc(key->rd_key, tmp.t32);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
memcpy(out, tmp.t8, CAMELLIA_BLOCK_SIZE);
iv = out;
len -= CAMELLIA_BLOCK_SIZE;
in += CAMELLIA_BLOCK_SIZE;
out += CAMELLIA_BLOCK_SIZE;
}
if (len)
{
for(n=0; n < len; ++n)
tmp.t8[n] = in[n] ^ iv[n];
for(n=len; n < CAMELLIA_BLOCK_SIZE; ++n)
tmp.t8[n] = iv[n];
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
key->enc(key->rd_key, tmp.t32);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
memcpy(out, tmp.t8, CAMELLIA_BLOCK_SIZE);
iv = out;
}
memcpy(ivec,iv,CAMELLIA_BLOCK_SIZE);
}
else if (in != out)
{
while (len >= CAMELLIA_BLOCK_SIZE)
{
memcpy(tmp.t8,in,CAMELLIA_BLOCK_SIZE);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
key->dec(key->rd_key,tmp.t32);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
for(n=0; n < CAMELLIA_BLOCK_SIZE; ++n)
out[n] = tmp.t8[n] ^ iv[n];
iv = in;
len -= CAMELLIA_BLOCK_SIZE;
in += CAMELLIA_BLOCK_SIZE;
out += CAMELLIA_BLOCK_SIZE;
}
if (len)
{
memcpy(tmp.t8, in, CAMELLIA_BLOCK_SIZE);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
key->dec(key->rd_key, tmp.t32);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
for(n=0; n < len; ++n)
out[n] = tmp.t8[n] ^ iv[n];
iv = in;
}
memcpy(ivec,iv,CAMELLIA_BLOCK_SIZE);
}
else
{
while (len >= CAMELLIA_BLOCK_SIZE)
{
memcpy(tmp.t8, in, CAMELLIA_BLOCK_SIZE);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
key->dec(key->rd_key, tmp.t32);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
for(n=0; n < CAMELLIA_BLOCK_SIZE; ++n)
tmp.t8[n] ^= ivec[n];
memcpy(ivec, in, CAMELLIA_BLOCK_SIZE);
memcpy(out, tmp.t8, CAMELLIA_BLOCK_SIZE);
len -= CAMELLIA_BLOCK_SIZE;
in += CAMELLIA_BLOCK_SIZE;
out += CAMELLIA_BLOCK_SIZE;
}
if (len)
{
memcpy(tmp.t8, in, CAMELLIA_BLOCK_SIZE);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
key->dec(key->rd_key,tmp.t32);
if (camellia_endian.little)
SWAP4WORD(tmp.t32);
for(n=0; n < len; ++n)
tmp.t8[n] ^= ivec[n];
memcpy(ivec, in, CAMELLIA_BLOCK_SIZE);
memcpy(out,tmp.t8,len);
}
}
}
}
}

View File

@ -173,7 +173,7 @@ void Camellia_cfbr_encrypt_block(const unsigned char *in,unsigned char *out,
unsigned char *ivec,const int enc)
{
int n,rem,num;
unsigned char ovec[CAMELLIA_BLOCK_SIZE*2 + 1]; /* +1 because we dereference (but don't use) one byte beyond the end */
unsigned char ovec[CAMELLIA_BLOCK_SIZE*2];
if (nbits<=0 || nbits>128) return;

View File

@ -68,98 +68,36 @@
#ifndef HEADER_CAMELLIA_LOCL_H
#define HEADER_CAMELLIA_LOCL_H
#include "openssl/e_os2.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef unsigned int u32;
typedef unsigned char u8;
typedef unsigned int u32;
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_AMD64) || defined(_M_X64))
# define SWAP(x) ( _lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00 )
# define GETU32(p) SWAP(*((u32 *)(p)))
# define PUTU32(ct, st) { *((u32 *)(ct)) = SWAP((st)); }
# define CAMELLIA_SWAP4(x) (x = ( _lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00) )
#else /* not windows */
# define GETU32(pt) (((u32)(pt)[0] << 24) \
^ ((u32)(pt)[1] << 16) \
^ ((u32)(pt)[2] << 8) \
^ ((u32)(pt)[3]))
# define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); \
(ct)[1] = (u8)((st) >> 16); \
(ct)[2] = (u8)((st) >> 8); \
(ct)[3] = (u8)(st); }
#if (defined (__GNUC__) && (defined(__x86_64__) || defined(__x86_64)))
#define CAMELLIA_SWAP4(x) \
do{\
asm("bswap %1" : "+r" (x));\
}while(0)
# if _MSC_VER >= 1400
# define SWAP(x) _byteswap_ulong(x)
# else
# define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00)
# endif
# define GETU32(p) SWAP(*((u32 *)(p)))
# define PUTU32(p,v) (*((u32 *)(p)) = SWAP((v)))
#elif defined(__GNUC__) && __GNUC__>=2 && (defined(__i386) || defined(__x86_64))
# if defined(B_ENDIAN) /* stratus.com does it */
# define GETU32(p) (*(u32 *)(p))
# define PUTU32(p,v) (*(u32 *)(p)=(v))
# else
# define GETU32(p) ({u32 r=*(const u32 *)(p); asm("bswapl %0":"=r"(r):"0"(r)); r; })
# define PUTU32(p,v) ({u32 r=(v); asm("bswapl %0":"=r"(r):"0"(r)); *(u32 *)(p)=r; })
# endif
#elif defined(__s390__) || defined(__s390x__)
# define GETU32(p) (*(u32 *)(p))
# define PUTU32(p,v) (*(u32 *)(p)=(v))
#else
#define CAMELLIA_SWAP4(x) \
do{\
x = ((u32)x << 16) + ((u32)x >> 16);\
x = (((u32)x & 0xff00ff) << 8) + (((u32)x >> 8) & 0xff00ff);\
} while(0)
#endif
#endif
#define COPY4WORD(dst, src) \
do \
{ \
(dst)[0]=(src)[0]; \
(dst)[1]=(src)[1]; \
(dst)[2]=(src)[2]; \
(dst)[3]=(src)[3]; \
}while(0)
#define SWAP4WORD(word) \
do \
{ \
CAMELLIA_SWAP4((word)[0]); \
CAMELLIA_SWAP4((word)[1]); \
CAMELLIA_SWAP4((word)[2]); \
CAMELLIA_SWAP4((word)[3]); \
}while(0)
#define XOR4WORD(a, b)/* a = a ^ b */ \
do \
{ \
(a)[0]^=(b)[0]; \
(a)[1]^=(b)[1]; \
(a)[2]^=(b)[2]; \
(a)[3]^=(b)[3]; \
}while(0)
#define XOR4WORD2(a, b, c)/* a = b ^ c */ \
do \
{ \
(a)[0]=(b)[0]^(c)[0]; \
(a)[1]=(b)[1]^(c)[1]; \
(a)[2]=(b)[2]^(c)[2]; \
(a)[3]=(b)[3]^(c)[3]; \
}while(0)
void camellia_setup128(const u8 *key, u32 *subkey);
void camellia_setup192(const u8 *key, u32 *subkey);
void camellia_setup256(const u8 *key, u32 *subkey);
void camellia_encrypt128(const u32 *subkey, u32 *io);
void camellia_decrypt128(const u32 *subkey, u32 *io);
void camellia_encrypt256(const u32 *subkey, u32 *io);
void camellia_decrypt256(const u32 *subkey, u32 *io);
#ifdef __cplusplus
}
# define GETU32(p) (((u32)(p)[0] << 24) ^ ((u32)(p)[1] << 16) ^ ((u32)(p)[2] << 8) ^ ((u32)(p)[3]))
# define PUTU32(p,v) ((p)[0] = (u8)((v) >> 24), (p)[1] = (u8)((v) >> 16), (p)[2] = (u8)((v) >> 8), (p)[3] = (u8)(v))
#endif
int Camellia_Ekeygen(int keyBitLength, const u8 *rawKey, KEY_TABLE_TYPE keyTable);
void Camellia_EncryptBlock(int grandRounds, const u8 plaintext[],
const KEY_TABLE_TYPE keyTable, u8 ciphertext[]);
void Camellia_DecryptBlock(int grandRounds, const u8 ciphertext[],
const KEY_TABLE_TYPE keyTable, u8 plaintext[]);
#endif /* #ifndef HEADER_CAMELLIA_LOCL_H */

View File

@ -58,59 +58,22 @@ const char CAMELLIA_version[]="CAMELLIA" OPENSSL_VERSION_PTEXT;
int Camellia_set_key(const unsigned char *userKey, const int bits,
CAMELLIA_KEY *key)
{
if (!userKey || !key)
{
if(!userKey || !key)
return -1;
}
switch(bits)
{
case 128:
camellia_setup128(userKey, (unsigned int *)key->rd_key);
key->enc = camellia_encrypt128;
key->dec = camellia_decrypt128;
break;
case 192:
camellia_setup192(userKey, (unsigned int *)key->rd_key);
key->enc = camellia_encrypt256;
key->dec = camellia_decrypt256;
break;
case 256:
camellia_setup256(userKey, (unsigned int *)key->rd_key);
key->enc = camellia_encrypt256;
key->dec = camellia_decrypt256;
break;
default:
if(bits != 128 && bits != 192 && bits != 256)
return -2;
}
key->bitLength = bits;
key->grand_rounds = Camellia_Ekeygen(bits , userKey, key->u.rd_key);
return 0;
}
void Camellia_encrypt(const unsigned char *in, unsigned char *out,
const CAMELLIA_KEY *key)
{
u32 tmp[CAMELLIA_BLOCK_SIZE/sizeof(u32)];
const union { long one; char little; } camellia_endian = {1};
memcpy(tmp, in, CAMELLIA_BLOCK_SIZE);
if (camellia_endian.little) SWAP4WORD(tmp);
key->enc(key->rd_key, tmp);
if (camellia_endian.little) SWAP4WORD(tmp);
memcpy(out, tmp, CAMELLIA_BLOCK_SIZE);
Camellia_EncryptBlock(key->grand_rounds, in , key->u.rd_key , out);
}
void Camellia_decrypt(const unsigned char *in, unsigned char *out,
const CAMELLIA_KEY *key)
{
u32 tmp[CAMELLIA_BLOCK_SIZE/sizeof(u32)];
const union { long one; char little; } camellia_endian = {1};
memcpy(tmp, in, CAMELLIA_BLOCK_SIZE);
if (camellia_endian.little) SWAP4WORD(tmp);
key->dec(key->rd_key, tmp);
if (camellia_endian.little) SWAP4WORD(tmp);
memcpy(out, tmp, CAMELLIA_BLOCK_SIZE);
Camellia_DecryptBlock(key->grand_rounds, in , key->u.rd_key , out);
}