openssl/providers/implementations/ciphers/ciphercommon_hw.c
XiaokangQian 2ff16afc17 Optimize AES-ECB mode in OpenSSL for both aarch64 and aarch32
Aes-ecb mode can be optimized by inverleaving cipher operation on
several blocks and loop unrolling. Interleaving needs one ideal
unrolling factor, here we adopt the same factor with aes-cbc,
which is described as below:
    If blocks number > 5, select 5 blocks as one iteration,every
    loop, decrease the blocks number by 5.
    If 3 < left blocks < 5 select 3 blocks as one iteration, every
    loop, decrease the block number by 3.
    If left blocks < 3, treat them as tail blocks.
Detailed implementation will have a little adjustment for squeezing
code space.
With this way, for small size such as 16 bytes, the performance is
similar as before, but for big size such as 16k bytes, the performance
improves a lot, even reaches to 100%, for some arches such as A57,
the improvement  even exceeds 100%. The following table will list the
encryption performance data on aarch64, take a72 and a57 as examples.
Performance value takes the unit of cycles per byte, takes the format
as comparision of values. List them as below:

A72:
                            Before optimization     After optimization  Improve
evp-aes-128-ecb@16          17.26538237             16.82663866         2.61%
evp-aes-128-ecb@64          5.50528499              5.222637557         5.41%
evp-aes-128-ecb@256         2.632700213             1.908442892         37.95%
evp-aes-128-ecb@1024        1.876102047             1.078018868         74.03%
evp-aes-128-ecb@8192        1.6550392               0.853982929         93.80%
evp-aes-128-ecb@16384       1.636871283             0.847623957         93.11%
evp-aes-192-ecb@16          17.73104961             17.09692468         3.71%
evp-aes-192-ecb@64          5.78984398              5.418545192         6.85%
evp-aes-192-ecb@256         2.872005308             2.081815274         37.96%
evp-aes-192-ecb@1024        2.083226672             1.25095642          66.53%
evp-aes-192-ecb@8192        1.831992057             0.995916251         83.95%
evp-aes-192-ecb@16384       1.821590009             0.993820525         83.29%
evp-aes-256-ecb@16          18.0606306              17.96963317         0.51%
evp-aes-256-ecb@64          6.19651997              5.762465812         7.53%
evp-aes-256-ecb@256         3.176991394             2.24642538          41.42%
evp-aes-256-ecb@1024        2.385991919             1.396018192         70.91%
evp-aes-256-ecb@8192        2.147862636             1.142222597         88.04%
evp-aes-256-ecb@16384       2.131361787             1.135944617         87.63%

A57:
                            Before optimization     After optimization  Improve
evp-aes-128-ecb@16          18.61045121             18.36456218         1.34%
evp-aes-128-ecb@64          6.438628994             5.467959461         17.75%
evp-aes-128-ecb@256         2.957452881             1.97238604          49.94%
evp-aes-128-ecb@1024        2.117096219             1.099665054         92.52%
evp-aes-128-ecb@8192        1.868385973             0.837440804         123.11%
evp-aes-128-ecb@16384       1.853078526             0.822420027         125.32%
evp-aes-192-ecb@16          19.07021756             18.50018552         3.08%
evp-aes-192-ecb@64          6.672351486             5.696088921         17.14%
evp-aes-192-ecb@256         3.260427769             2.131449916         52.97%
evp-aes-192-ecb@1024        2.410522832             1.250529718         92.76%
evp-aes-192-ecb@8192        2.17921605              0.973225504         123.92%
evp-aes-192-ecb@16384       2.162250997             0.95919871          125.42%
evp-aes-256-ecb@16          19.3008384              19.12743654         0.91%
evp-aes-256-ecb@64          6.992950658             5.92149541          18.09%
evp-aes-256-ecb@256         3.576361743             2.287619504         56.34%
evp-aes-256-ecb@1024        2.726671027             1.381267599         97.40%
evp-aes-256-ecb@8192        2.493583657             1.110959913         124.45%
evp-aes-256-ecb@16384       2.473916816             1.099967073         124.91%

Change-Id: Iccd23d972e0d52d22dc093f4c208f69c9d5a0ca7

Reviewed-by: Shane Lontis <shane.lontis@oracle.com>
Reviewed-by: Richard Levitte <levitte@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/10518)
2019-12-11 18:56:11 +01:00

195 lines
5.3 KiB
C

/*
* Copyright 2019 The OpenSSL Project Authors. All Rights Reserved.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
#include "prov/ciphercommon.h"
/*-
* The generic cipher functions for cipher modes cbc, ecb, ofb, cfb and ctr.
* Used if there is no special hardware implementations.
*/
int cipher_hw_generic_cbc(PROV_CIPHER_CTX *dat, unsigned char *out,
const unsigned char *in, size_t len)
{
if (dat->stream.cbc)
(*dat->stream.cbc) (in, out, len, dat->ks, dat->iv, dat->enc);
else if (dat->enc)
CRYPTO_cbc128_encrypt(in, out, len, dat->ks, dat->iv, dat->block);
else
CRYPTO_cbc128_decrypt(in, out, len, dat->ks, dat->iv, dat->block);
return 1;
}
int cipher_hw_generic_ecb(PROV_CIPHER_CTX *dat, unsigned char *out,
const unsigned char *in, size_t len)
{
size_t i, bl = dat->blocksize;
if (len < bl)
return 1;
if (dat->stream.ecb) {
(*dat->stream.ecb) (in, out, len, dat->ks, dat->enc);
}
else {
for (i = 0, len -= bl; i <= len; i += bl)
(*dat->block) (in + i, out + i, dat->ks);
}
return 1;
}
int cipher_hw_generic_ofb128(PROV_CIPHER_CTX *dat, unsigned char *out,
const unsigned char *in, size_t len)
{
int num = dat->num;
CRYPTO_ofb128_encrypt(in, out, len, dat->ks, dat->iv, &num, dat->block);
dat->num = num;
return 1;
}
int cipher_hw_generic_cfb128(PROV_CIPHER_CTX *dat, unsigned char *out,
const unsigned char *in, size_t len)
{
int num = dat->num;
CRYPTO_cfb128_encrypt(in, out, len, dat->ks, dat->iv, &num, dat->enc,
dat->block);
dat->num = num;
return 1;
}
int cipher_hw_generic_cfb8(PROV_CIPHER_CTX *dat, unsigned char *out,
const unsigned char *in, size_t len)
{
int num = dat->num;
CRYPTO_cfb128_8_encrypt(in, out, len, dat->ks, dat->iv, &num, dat->enc,
dat->block);
dat->num = num;
return 1;
}
int cipher_hw_generic_cfb1(PROV_CIPHER_CTX *dat, unsigned char *out,
const unsigned char *in, size_t len)
{
int num = dat->num;
if ((dat->flags & EVP_CIPH_FLAG_LENGTH_BITS) != 0) {
CRYPTO_cfb128_1_encrypt(in, out, len, dat->ks, dat->iv, &num,
dat->enc, dat->block);
dat->num = num;
return 1;
}
while (len >= MAXBITCHUNK) {
CRYPTO_cfb128_1_encrypt(in, out, MAXBITCHUNK * 8, dat->ks,
dat->iv, &num, dat->enc, dat->block);
len -= MAXBITCHUNK;
out += MAXBITCHUNK;
in += MAXBITCHUNK;
}
if (len)
CRYPTO_cfb128_1_encrypt(in, out, len * 8, dat->ks, dat->iv, &num,
dat->enc, dat->block);
dat->num = num;
return 1;
}
int cipher_hw_generic_ctr(PROV_CIPHER_CTX *dat, unsigned char *out,
const unsigned char *in, size_t len)
{
unsigned int num = dat->num;
if (dat->stream.ctr)
CRYPTO_ctr128_encrypt_ctr32(in, out, len, dat->ks, dat->iv, dat->buf,
&num, dat->stream.ctr);
else
CRYPTO_ctr128_encrypt(in, out, len, dat->ks, dat->iv, dat->buf,
&num, dat->block);
dat->num = num;
return 1;
}
/*-
* The chunked cipher functions for cipher modes cbc, ecb, ofb, cfb and ctr.
* Used if there is no special hardware implementations.
*/
int cipher_hw_chunked_cbc(PROV_CIPHER_CTX *ctx, unsigned char *out,
const unsigned char *in, size_t inl)
{
while (inl >= MAXCHUNK) {
cipher_hw_generic_cbc(ctx, out, in, MAXCHUNK);
inl -= MAXCHUNK;
in += MAXCHUNK;
out += MAXCHUNK;
}
if (inl > 0)
cipher_hw_generic_cbc(ctx, out, in, inl);
return 1;
}
int cipher_hw_chunked_cfb8(PROV_CIPHER_CTX *ctx, unsigned char *out,
const unsigned char *in, size_t inl)
{
size_t chunk = MAXCHUNK;
if (inl < chunk)
chunk = inl;
while (inl > 0 && inl >= chunk) {
cipher_hw_generic_cfb8(ctx, out, in, inl);
inl -= chunk;
in += chunk;
out += chunk;
if (inl < chunk)
chunk = inl;
}
return 1;
}
int cipher_hw_chunked_cfb128(PROV_CIPHER_CTX *ctx, unsigned char *out,
const unsigned char *in, size_t inl)
{
size_t chunk = MAXCHUNK;
if (inl < chunk)
chunk = inl;
while (inl > 0 && inl >= chunk) {
cipher_hw_generic_cfb128(ctx, out, in, inl);
inl -= chunk;
in += chunk;
out += chunk;
if (inl < chunk)
chunk = inl;
}
return 1;
}
int cipher_hw_chunked_ofb128(PROV_CIPHER_CTX *ctx, unsigned char *out,
const unsigned char *in, size_t inl)
{
while (inl >= MAXCHUNK) {
cipher_hw_generic_ofb128(ctx, out, in, MAXCHUNK);
inl -= MAXCHUNK;
in += MAXCHUNK;
out += MAXCHUNK;
}
if (inl > 0)
cipher_hw_generic_ofb128(ctx, out, in, inl);
return 1;
}