openssl/crypto/sm3/sm3_local.h
fangming.fang 71396cd048 SM3 acceleration with SM3 hardware instruction on aarch64
SM3 hardware instruction is optional feature of crypto extension for
aarch64. This implementation accelerates SM3 via SM3 instructions. For
the platform not supporting SM3 instruction, the original C
implementation still works. Thanks to AliBaba for testing and reporting
the following perf numbers for Yitian710:

Benchmark on T-Head Yitian-710 2.75GHz:

Before:
type  16 bytes     64 bytes    256 bytes    1024 bytes   8192 bytes   16384 bytes
sm3   49297.82k   121062.63k   223106.05k   283371.52k   307574.10k   309400.92k

After (33% - 74% faster):
type  16 bytes     64 bytes    256 bytes    1024 bytes   8192 bytes   16384 bytes
sm3   65640.01k   179121.79k   359854.59k   481448.96k   534055.59k   538274.47k

Reviewed-by: Paul Dale <pauli@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/17454)
2022-01-14 11:40:05 +01:00

95 lines
3.4 KiB
C

/*
* Copyright 2017-2021 The OpenSSL Project Authors. All Rights Reserved.
* Copyright 2017 Ribose Inc. All Rights Reserved.
* Ported from Ribose contributions from Botan.
*
* Licensed under the Apache License 2.0 (the "License"). You may not use
* this file except in compliance with the License. You can obtain a copy
* in the file LICENSE in the source distribution or at
* https://www.openssl.org/source/license.html
*/
#include <string.h>
#include "internal/sm3.h"
#define DATA_ORDER_IS_BIG_ENDIAN
#define HASH_LONG SM3_WORD
#define HASH_CTX SM3_CTX
#define HASH_CBLOCK SM3_CBLOCK
#define HASH_UPDATE ossl_sm3_update
#define HASH_TRANSFORM ossl_sm3_transform
#define HASH_FINAL ossl_sm3_final
#define HASH_MAKE_STRING(c, s) \
do { \
unsigned long ll; \
ll=(c)->A; (void)HOST_l2c(ll, (s)); \
ll=(c)->B; (void)HOST_l2c(ll, (s)); \
ll=(c)->C; (void)HOST_l2c(ll, (s)); \
ll=(c)->D; (void)HOST_l2c(ll, (s)); \
ll=(c)->E; (void)HOST_l2c(ll, (s)); \
ll=(c)->F; (void)HOST_l2c(ll, (s)); \
ll=(c)->G; (void)HOST_l2c(ll, (s)); \
ll=(c)->H; (void)HOST_l2c(ll, (s)); \
} while (0)
#if defined(OPENSSL_SM3_ASM)
# if defined(__aarch64__)
# include "crypto/arm_arch.h"
# define HWSM3_CAPABLE (OPENSSL_armcap_P & ARMV8_SM3)
void ossl_hwsm3_block_data_order(SM3_CTX *c, const void *p, size_t num);
# endif
#endif
#if defined(HWSM3_CAPABLE)
# define HASH_BLOCK_DATA_ORDER (HWSM3_CAPABLE ? ossl_hwsm3_block_data_order \
: ossl_sm3_block_data_order)
#else
# define HASH_BLOCK_DATA_ORDER ossl_sm3_block_data_order
#endif
void ossl_sm3_block_data_order(SM3_CTX *c, const void *p, size_t num);
void ossl_sm3_transform(SM3_CTX *c, const unsigned char *data);
#include "crypto/md32_common.h"
#define P0(X) (X ^ ROTATE(X, 9) ^ ROTATE(X, 17))
#define P1(X) (X ^ ROTATE(X, 15) ^ ROTATE(X, 23))
#define FF0(X,Y,Z) (X ^ Y ^ Z)
#define GG0(X,Y,Z) (X ^ Y ^ Z)
#define FF1(X,Y,Z) ((X & Y) | ((X | Y) & Z))
#define GG1(X,Y,Z) ((Z ^ (X & (Y ^ Z))))
#define EXPAND(W0,W7,W13,W3,W10) \
(P1(W0 ^ W7 ^ ROTATE(W13, 15)) ^ ROTATE(W3, 7) ^ W10)
#define RND(A, B, C, D, E, F, G, H, TJ, Wi, Wj, FF, GG) \
do { \
const SM3_WORD A12 = ROTATE(A, 12); \
const SM3_WORD A12_SM = A12 + E + TJ; \
const SM3_WORD SS1 = ROTATE(A12_SM, 7); \
const SM3_WORD TT1 = FF(A, B, C) + D + (SS1 ^ A12) + (Wj); \
const SM3_WORD TT2 = GG(E, F, G) + H + SS1 + Wi; \
B = ROTATE(B, 9); \
D = TT1; \
F = ROTATE(F, 19); \
H = P0(TT2); \
} while(0)
#define R1(A,B,C,D,E,F,G,H,TJ,Wi,Wj) \
RND(A,B,C,D,E,F,G,H,TJ,Wi,Wj,FF0,GG0)
#define R2(A,B,C,D,E,F,G,H,TJ,Wi,Wj) \
RND(A,B,C,D,E,F,G,H,TJ,Wi,Wj,FF1,GG1)
#define SM3_A 0x7380166fUL
#define SM3_B 0x4914b2b9UL
#define SM3_C 0x172442d7UL
#define SM3_D 0xda8a0600UL
#define SM3_E 0xa96f30bcUL
#define SM3_F 0x163138aaUL
#define SM3_G 0xe38dee4dUL
#define SM3_H 0xb0fb0e4eUL