mirror of
https://github.com/openssl/openssl.git
synced 2025-01-12 13:36:28 +08:00
3d68e2937e
This change can improve md5 performance by using a hand-optimized assembly implementation of the inner loop of md5 calculation. This implementation refered to md5-x86_64.pl and made more effort to reorder instructions for separating data dependencies as much as possible. Test with: $ openssl speed md5 3A5000 type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes md5 45061.04k 130440.75k 291105.28k 421101.23k 484639.27k 488320.43k md5-modified 47179.95k 139015.57k 308836.69k 445963.26k 512540.67k 518215.00k +5% +7% +6% +6% +6% +6% 3A6000 type 16 bytes 64 bytes 256 bytes 1024 bytes 8192 bytes 16384 bytes md5 60070.06k 161822.76k 325817.60k 438017.02k 486864.21k 492243.31k md5-modified 62827.74k 170294.04k 343795.03k 463324.50k 515831.13k 520060.93k +5% +5% +6% +6% +6% +6% Signed-off-by: Min Zhou <zhoumin@loongson.cn> Co-authored-by: Xi Ruoyao <xry111@xry111.site> Reviewed-by: Shane Lontis <shane.lontis@oracle.com> Reviewed-by: Tomas Mraz <tomas@openssl.org> (Merged from https://github.com/openssl/openssl/pull/21704)
83 lines
2.6 KiB
C
83 lines
2.6 KiB
C
/*
|
|
* Copyright 1995-2022 The OpenSSL Project Authors. All Rights Reserved.
|
|
*
|
|
* Licensed under the Apache License 2.0 (the "License"). You may not use
|
|
* this file except in compliance with the License. You can obtain a copy
|
|
* in the file LICENSE in the source distribution or at
|
|
* https://www.openssl.org/source/license.html
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <openssl/e_os2.h>
|
|
#include <openssl/md5.h>
|
|
|
|
#ifdef MD5_ASM
|
|
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
|
|
defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
|
|
defined(_M_X64) || defined(__aarch64__) || \
|
|
(defined(__loongarch__) && __loongarch_grlen == 64)
|
|
# define md5_block_data_order ossl_md5_block_asm_data_order
|
|
# elif defined(__ia64) || defined(__ia64__) || defined(_M_IA64)
|
|
# define md5_block_data_order ossl_md5_block_asm_data_order
|
|
# elif defined(__sparc) || defined(__sparc__)
|
|
# define md5_block_data_order ossl_md5_block_asm_data_order
|
|
# endif
|
|
#endif
|
|
|
|
void md5_block_data_order(MD5_CTX *c, const void *p, size_t num);
|
|
|
|
#define DATA_ORDER_IS_LITTLE_ENDIAN
|
|
|
|
#define HASH_LONG MD5_LONG
|
|
#define HASH_CTX MD5_CTX
|
|
#define HASH_CBLOCK MD5_CBLOCK
|
|
#define HASH_UPDATE MD5_Update
|
|
#define HASH_TRANSFORM MD5_Transform
|
|
#define HASH_FINAL MD5_Final
|
|
#define HASH_MAKE_STRING(c,s) do { \
|
|
unsigned long ll; \
|
|
ll=(c)->A; (void)HOST_l2c(ll,(s)); \
|
|
ll=(c)->B; (void)HOST_l2c(ll,(s)); \
|
|
ll=(c)->C; (void)HOST_l2c(ll,(s)); \
|
|
ll=(c)->D; (void)HOST_l2c(ll,(s)); \
|
|
} while (0)
|
|
#define HASH_BLOCK_DATA_ORDER md5_block_data_order
|
|
|
|
#include "crypto/md32_common.h"
|
|
|
|
/*-
|
|
#define F(x,y,z) (((x) & (y)) | ((~(x)) & (z)))
|
|
#define G(x,y,z) (((x) & (z)) | ((y) & (~(z))))
|
|
*/
|
|
|
|
/*
|
|
* As pointed out by Wei Dai, the above can be simplified to the code
|
|
* below. Wei attributes these optimizations to Peter Gutmann's
|
|
* SHS code, and he attributes it to Rich Schroeppel.
|
|
*/
|
|
#define F(b,c,d) ((((c) ^ (d)) & (b)) ^ (d))
|
|
#define G(b,c,d) ((((b) ^ (c)) & (d)) ^ (c))
|
|
#define H(b,c,d) ((b) ^ (c) ^ (d))
|
|
#define I(b,c,d) (((~(d)) | (b)) ^ (c))
|
|
|
|
#define R0(a,b,c,d,k,s,t) { \
|
|
a+=((k)+(t)+F((b),(c),(d))); \
|
|
a=ROTATE(a,s); \
|
|
a+=b; };
|
|
|
|
#define R1(a,b,c,d,k,s,t) { \
|
|
a+=((k)+(t)+G((b),(c),(d))); \
|
|
a=ROTATE(a,s); \
|
|
a+=b; };
|
|
|
|
#define R2(a,b,c,d,k,s,t) { \
|
|
a+=((k)+(t)+H((b),(c),(d))); \
|
|
a=ROTATE(a,s); \
|
|
a+=b; };
|
|
|
|
#define R3(a,b,c,d,k,s,t) { \
|
|
a+=((k)+(t)+I((b),(c),(d))); \
|
|
a=ROTATE(a,s); \
|
|
a+=b; };
|