mirror of
https://github.com/openssl/openssl.git
synced 2025-01-30 14:01:55 +08:00
Extension of OPENSSL_ia32cap to accommodate additional CPUID bits
bits 128 - 191 CPUID.(EAX=07H,ECX=0H).EDX and CPUID.(EAX=07H,ECX=1H).EAX bits 192 - 255 CPUID.(EAX=07H,ECX=1H).EDX and CPUID.(EAX=07H,ECX=1H).EBX bits 256 - 319 CPUID.(EAX=07H,ECX=1H).ECX and CPUID.(EAX=24H,ECX=0H).EBX Reviewed-by: Matt Caswell <matt@openssl.org> Reviewed-by: Tomas Mraz <tomas@openssl.org> (Merged from https://github.com/openssl/openssl/pull/25709)
This commit is contained in:
parent
1b3b5a019a
commit
acc2655236
@ -78,6 +78,12 @@ OpenSSL 3.5
|
||||
|
||||
*Paul Dale*
|
||||
|
||||
* Extended `OPENSSL_ia32cap` support to accommodate additional `CPUID`
|
||||
feature/capability bits in leaf `0x7` (Extended Feature Flags) as well
|
||||
as leaf `0x24` (Converged Vector ISA).
|
||||
|
||||
*Dan Zimmerman, Alina Elizarova*
|
||||
|
||||
OpenSSL 3.4
|
||||
-----------
|
||||
|
||||
|
@ -14,7 +14,7 @@
|
||||
defined(__x86_64) || defined(__x86_64__) || \
|
||||
defined(_M_AMD64) || defined(_M_X64)
|
||||
|
||||
extern unsigned int OPENSSL_ia32cap_P[4];
|
||||
extern unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES];
|
||||
|
||||
# if defined(OPENSSL_CPUID_OBJ)
|
||||
|
||||
@ -29,7 +29,7 @@ extern unsigned int OPENSSL_ia32cap_P[4];
|
||||
*/
|
||||
# ifdef _WIN32
|
||||
typedef WCHAR variant_char;
|
||||
|
||||
# define OPENSSL_IA32CAP_P_MAX_CHAR_SIZE 256
|
||||
static variant_char *ossl_getenv(const char *name)
|
||||
{
|
||||
/*
|
||||
@ -37,10 +37,10 @@ static variant_char *ossl_getenv(const char *name)
|
||||
* just ignore |name| and use equivalent wide-char L-literal.
|
||||
* As well as to ignore excessively long values...
|
||||
*/
|
||||
static WCHAR value[48];
|
||||
DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, 48);
|
||||
static WCHAR value[OPENSSL_IA32CAP_P_MAX_CHAR_SIZE];
|
||||
DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, OPENSSL_IA32CAP_P_MAX_CHAR_SIZE);
|
||||
|
||||
return (len > 0 && len < 48) ? value : NULL;
|
||||
return (len > 0 && len < OPENSSL_IA32CAP_P_MAX_CHAR_SIZE) ? value : NULL;
|
||||
}
|
||||
# else
|
||||
typedef char variant_char;
|
||||
@ -98,6 +98,7 @@ void OPENSSL_cpuid_setup(void)
|
||||
IA32CAP OPENSSL_ia32_cpuid(unsigned int *);
|
||||
IA32CAP vec;
|
||||
const variant_char *env;
|
||||
int index = 2;
|
||||
|
||||
if (trigger)
|
||||
return;
|
||||
@ -126,23 +127,37 @@ void OPENSSL_cpuid_setup(void)
|
||||
vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
|
||||
}
|
||||
|
||||
if ((env = ossl_strchr(env, ':')) != NULL) {
|
||||
IA32CAP vecx;
|
||||
|
||||
/* Processed indexes 0, 1 */
|
||||
if ((env = ossl_strchr(env, ':')) != NULL)
|
||||
env++;
|
||||
off = (env[0] == '~') ? 1 : 0;
|
||||
vecx = ossl_strtouint64(env + off);
|
||||
if (off) {
|
||||
OPENSSL_ia32cap_P[2] &= ~(unsigned int)vecx;
|
||||
OPENSSL_ia32cap_P[3] &= ~(unsigned int)(vecx >> 32);
|
||||
} else {
|
||||
OPENSSL_ia32cap_P[2] = (unsigned int)vecx;
|
||||
OPENSSL_ia32cap_P[3] = (unsigned int)(vecx >> 32);
|
||||
for (; index < OPENSSL_IA32CAP_P_MAX_INDEXES; index += 2) {
|
||||
if ((env != NULL) && (env[0] != '\0')) {
|
||||
/* if env[0] == ':' current index is skipped */
|
||||
if (env[0] != ':') {
|
||||
IA32CAP vecx;
|
||||
|
||||
off = (env[0] == '~') ? 1 : 0;
|
||||
vecx = ossl_strtouint64(env + off);
|
||||
if (off) {
|
||||
OPENSSL_ia32cap_P[index] &= ~(unsigned int)vecx;
|
||||
OPENSSL_ia32cap_P[index + 1] &= ~(unsigned int)(vecx >> 32);
|
||||
} else {
|
||||
OPENSSL_ia32cap_P[index] = (unsigned int)vecx;
|
||||
OPENSSL_ia32cap_P[index + 1] = (unsigned int)(vecx >> 32);
|
||||
}
|
||||
}
|
||||
/* skip delimeter */
|
||||
if ((env = ossl_strchr(env, ':')) != NULL)
|
||||
env++;
|
||||
} else { /* zeroize the next two indexes */
|
||||
OPENSSL_ia32cap_P[index] = 0;
|
||||
OPENSSL_ia32cap_P[index + 1] = 0;
|
||||
}
|
||||
} else {
|
||||
OPENSSL_ia32cap_P[2] = 0;
|
||||
OPENSSL_ia32cap_P[3] = 0;
|
||||
}
|
||||
|
||||
/* If AVX10 is disabled, zero out its detailed cap bits */
|
||||
if (!(OPENSSL_ia32cap_P[6] & (1 << 19)))
|
||||
OPENSSL_ia32cap_P[9] = 0;
|
||||
} else {
|
||||
vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
|
||||
}
|
||||
@ -156,7 +171,7 @@ void OPENSSL_cpuid_setup(void)
|
||||
OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32);
|
||||
}
|
||||
# else
|
||||
unsigned int OPENSSL_ia32cap_P[4];
|
||||
unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES];
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
@ -30,7 +30,7 @@
|
||||
# include "crypto/riscv_arch.h"
|
||||
# define CPU_INFO_STR_LEN 2048
|
||||
#else
|
||||
# define CPU_INFO_STR_LEN 128
|
||||
# define CPU_INFO_STR_LEN 256
|
||||
#endif
|
||||
|
||||
/* extern declaration to avoid warning */
|
||||
@ -52,11 +52,18 @@ DEFINE_RUN_ONCE_STATIC(init_info_strings)
|
||||
const char *env;
|
||||
|
||||
BIO_snprintf(ossl_cpu_info_str, sizeof(ossl_cpu_info_str),
|
||||
CPUINFO_PREFIX "OPENSSL_ia32cap=0x%llx:0x%llx",
|
||||
CPUINFO_PREFIX "OPENSSL_ia32cap=0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx",
|
||||
(unsigned long long)OPENSSL_ia32cap_P[0] |
|
||||
(unsigned long long)OPENSSL_ia32cap_P[1] << 32,
|
||||
(unsigned long long)OPENSSL_ia32cap_P[2] |
|
||||
(unsigned long long)OPENSSL_ia32cap_P[3] << 32);
|
||||
(unsigned long long)OPENSSL_ia32cap_P[3] << 32,
|
||||
(unsigned long long)OPENSSL_ia32cap_P[4] |
|
||||
(unsigned long long)OPENSSL_ia32cap_P[5] << 32,
|
||||
(unsigned long long)OPENSSL_ia32cap_P[6] |
|
||||
(unsigned long long)OPENSSL_ia32cap_P[7] << 32,
|
||||
(unsigned long long)OPENSSL_ia32cap_P[8] |
|
||||
(unsigned long long)OPENSSL_ia32cap_P[9] << 32);
|
||||
|
||||
if ((env = getenv("OPENSSL_ia32cap")) != NULL)
|
||||
BIO_snprintf(ossl_cpu_info_str + strlen(ossl_cpu_info_str),
|
||||
sizeof(ossl_cpu_info_str) - strlen(ossl_cpu_info_str),
|
||||
|
@ -167,7 +167,8 @@ sub ::file_end
|
||||
}
|
||||
}
|
||||
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
|
||||
my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16";
|
||||
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
|
||||
my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,40";
|
||||
if ($::macosx) { push (@out,"$tmp,2\n"); }
|
||||
elsif ($::elf) { push (@out,"$tmp,4\n"); }
|
||||
else { push (@out,"$tmp\n"); }
|
||||
|
@ -139,9 +139,10 @@ ___
|
||||
push(@out,"$segment ENDS\n");
|
||||
|
||||
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
|
||||
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
|
||||
{ my $comm=<<___;
|
||||
.bss SEGMENT 'BSS'
|
||||
COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:4
|
||||
COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:10
|
||||
.bss ENDS
|
||||
___
|
||||
# comment out OPENSSL_ia32cap_P declarations
|
||||
|
@ -124,9 +124,10 @@ sub ::function_end_B
|
||||
|
||||
sub ::file_end
|
||||
{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
|
||||
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
|
||||
{ my $comm=<<___;
|
||||
${drdecor}segment .bss
|
||||
${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 16
|
||||
${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 40
|
||||
___
|
||||
# comment out OPENSSL_ia32cap_P declarations
|
||||
grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;
|
||||
|
@ -27,14 +27,14 @@ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
|
||||
("%rdi","%rsi","%rdx","%rcx"); # Unix order
|
||||
|
||||
print<<___;
|
||||
#include crypto/cryptlib.h
|
||||
.extern OPENSSL_cpuid_setup
|
||||
.hidden OPENSSL_cpuid_setup
|
||||
.section .init
|
||||
call OPENSSL_cpuid_setup
|
||||
|
||||
.hidden OPENSSL_ia32cap_P
|
||||
.comm OPENSSL_ia32cap_P,16,4
|
||||
|
||||
.comm OPENSSL_ia32cap_P,40,4 # <--Should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
|
||||
.text
|
||||
|
||||
.globl OPENSSL_atomic_add
|
||||
@ -192,6 +192,7 @@ OPENSSL_ia32_cpuid:
|
||||
mov \$7,%eax
|
||||
xor %ecx,%ecx
|
||||
cpuid
|
||||
movd %eax,%xmm1 # put aside leaf 07H Max Sub-leaves
|
||||
bt \$26,%r9d # check XSAVE bit, cleared on Knights
|
||||
jc .Lnotknights
|
||||
and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
|
||||
@ -202,9 +203,31 @@ OPENSSL_ia32_cpuid:
|
||||
jne .Lnotskylakex
|
||||
and \$0xfffeffff,%ebx # ~(1<<16)
|
||||
# suppress AVX512F flag on Skylake-X
|
||||
.Lnotskylakex:
|
||||
mov %ebx,8(%rdi) # save extended feature flags
|
||||
mov %ecx,12(%rdi)
|
||||
|
||||
.Lnotskylakex: # save extended feature flags
|
||||
mov %ebx,8(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2]
|
||||
mov %ecx,12(%rdi) # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3]
|
||||
mov %edx,16(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4]
|
||||
|
||||
movd %xmm1,%eax # Restore leaf 07H Max Sub-leaves
|
||||
cmp \$0x1,%eax # Do we have cpuid(EAX=0x7, ECX=0x1)?
|
||||
jb .Lno_extended_info
|
||||
mov \$0x7,%eax
|
||||
mov \$0x1,%ecx
|
||||
cpuid # cpuid(EAX=0x7, ECX=0x1)
|
||||
mov %eax,20(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5]
|
||||
mov %edx,24(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6]
|
||||
mov %ebx,28(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7]
|
||||
mov %ecx,32(%rdi) # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8]
|
||||
|
||||
and \$0x80000,%edx # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support
|
||||
cmp \$0x0,%edx
|
||||
je .Lno_extended_info
|
||||
mov \$0x24,%eax # Have AVX10 Support, query for details
|
||||
mov \$0x0,%ecx
|
||||
cpuid # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf
|
||||
mov %ebx,36(%rdi) # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9]
|
||||
|
||||
.Lno_extended_info:
|
||||
|
||||
bt \$27,%r9d # check OSXSAVE bit
|
||||
@ -223,6 +246,9 @@ OPENSSL_ia32_cpuid:
|
||||
cmp \$6,%eax
|
||||
je .Ldone
|
||||
.Lclear_avx:
|
||||
andl \$0xff7fffff,20(%rdi) # ~(1<<23)
|
||||
# clear AVXIFMA, which is VEX-encoded
|
||||
# and requires YMM state support
|
||||
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
|
||||
and %eax,%r9d # clear AVX, FMA and AMD XOP bits
|
||||
mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5)
|
||||
|
@ -137,7 +137,28 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
||||
&mov ("eax",7);
|
||||
&xor ("ecx","ecx");
|
||||
&cpuid ();
|
||||
&mov (&DWP(8,"edi"),"ebx"); # save extended feature flag
|
||||
&mov (&DWP(8,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2]
|
||||
&mov (&DWP(12,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3]
|
||||
&mov (&DWP(16,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4]
|
||||
&cmp ("eax",1); # Do we have cpuid(EAX=0x7, ECX=0x1)?
|
||||
&jb (&label("no_extended_info"));
|
||||
&mov ("eax",7);
|
||||
&mov ("ecx",1);
|
||||
&cpuid (); # cpuid(EAX=0x7, ECX=0x1)
|
||||
&mov (&DWP(20,"edi"),"eax"); # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5]
|
||||
&mov (&DWP(24,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6]
|
||||
&mov (&DWP(28,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7]
|
||||
&mov (&DWP(32,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8]
|
||||
|
||||
&and ("edx",0x80000); # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support
|
||||
&cmp ("edx",0x0);
|
||||
&je (&label("no_extended_info"));
|
||||
|
||||
&mov ("eax",0x24); # Have AVX10 Support, query for details
|
||||
&mov ("ecx",0x0);
|
||||
&cpuid (); # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf
|
||||
&mov (&DWP(36,"edi"),"ebx"); # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9]
|
||||
|
||||
&set_label("no_extended_info");
|
||||
|
||||
&bt ("ebp",27); # check OSXSAVE bit
|
||||
@ -154,6 +175,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
||||
&and ("esi",0xfeffffff); # clear FXSR
|
||||
&set_label("clear_avx");
|
||||
&and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits
|
||||
&and (&DWP(20,"edi"),0xff7fffff); # ~(1<<23) clear AVXIFMA,
|
||||
# which is VEX-encoded
|
||||
# and requires YMM state support
|
||||
&and (&DWP(8,"edi"),0xffffffdf); # clear AVX2
|
||||
&set_label("done");
|
||||
&mov ("eax","esi");
|
||||
|
@ -10,81 +10,77 @@ OPENSSL_ia32cap - the x86[_64] processor capabilities vector
|
||||
|
||||
=head1 DESCRIPTION
|
||||
|
||||
OpenSSL supports a range of x86[_64] instruction set extensions. These
|
||||
extensions are denoted by individual bits in capability vector returned
|
||||
by processor in EDX:ECX register pair after executing CPUID instruction
|
||||
with EAX=1 input value (see Intel Application Note #241618). This vector
|
||||
is copied to memory upon toolkit initialization and used to choose
|
||||
between different code paths to provide optimal performance across wide
|
||||
range of processors. For the moment of this writing following bits are
|
||||
significant:
|
||||
OpenSSL supports a range of x86[_64] instruction set extensions and
|
||||
features. These extensions are denoted by individual bits or groups of bits
|
||||
stored internally as ten 32-bit capability vectors and for simplicity
|
||||
represented logically below as five 64-bit vectors. This logical
|
||||
vector (LV) representation is used to streamline the definition of the
|
||||
OPENSSL_ia32cap environment variable.
|
||||
|
||||
Upon toolkit initialization, the capability vectors are populated through
|
||||
successive executions of the CPUID instruction, after which any OPENSSL_ia32cap
|
||||
environment variable capability bit modifications are applied. After toolkit
|
||||
initialization is complete, populated vectors are then used to choose
|
||||
between different code paths to provide optimal performance across a wide
|
||||
range of x86[_64] based processors.
|
||||
|
||||
Further CPUID information can be found in the Intel(R) Architecture
|
||||
Instruction Set Extensions Programming Reference, and the AMD64 Architecture
|
||||
Programmer's Manual (Volume 3).
|
||||
|
||||
=head2 Notable Capability Bits for LV0
|
||||
|
||||
The following are notable capability bits from logical vector 0 (LV0)
|
||||
resulting from the following execution of CPUID.(EAX=01H).EDX and
|
||||
CPUID.(EAX=01H).ECX:
|
||||
|
||||
=over 4
|
||||
|
||||
=item bit #4 denoting presence of Time-Stamp Counter.
|
||||
=item bit #0+4 denoting presence of Time-Stamp Counter;
|
||||
|
||||
=item bit #19 denoting availability of CLFLUSH instruction;
|
||||
=item bit #0+19 denoting availability of CLFLUSH instruction;
|
||||
|
||||
=item bit #20, reserved by Intel, is used to choose among RC4 code paths;
|
||||
=item bit #0+20, reserved by Intel, is used to choose among RC4 code paths;
|
||||
|
||||
=item bit #23 denoting MMX support;
|
||||
=item bit #0+23 denoting MMX support;
|
||||
|
||||
=item bit #24, FXSR bit, denoting availability of XMM registers;
|
||||
=item bit #0+24, FXSR bit, denoting availability of XMM registers;
|
||||
|
||||
=item bit #25 denoting SSE support;
|
||||
=item bit #0+25 denoting SSE support;
|
||||
|
||||
=item bit #26 denoting SSE2 support;
|
||||
=item bit #0+26 denoting SSE2 support;
|
||||
|
||||
=item bit #28 denoting Hyperthreading, which is used to distinguish
|
||||
=item bit #0+28 denoting Hyperthreading, which is used to distinguish
|
||||
cores with shared cache;
|
||||
|
||||
=item bit #30, reserved by Intel, denotes specifically Intel CPUs;
|
||||
=item bit #0+30, reserved by Intel, denotes specifically Intel CPUs;
|
||||
|
||||
=item bit #33 denoting availability of PCLMULQDQ instruction;
|
||||
=item bit #0+33 denoting availability of PCLMULQDQ instruction;
|
||||
|
||||
=item bit #41 denoting SSSE3, Supplemental SSE3, support;
|
||||
=item bit #0+41 denoting SSSE3, Supplemental SSE3, support;
|
||||
|
||||
=item bit #43 denoting AMD XOP support (forced to zero on non-AMD CPUs);
|
||||
=item bit #0+43 denoting AMD XOP support (forced to zero on non-AMD CPUs);
|
||||
|
||||
=item bit #54 denoting availability of MOVBE instruction;
|
||||
=item bit #0+54 denoting availability of MOVBE instruction;
|
||||
|
||||
=item bit #57 denoting AES-NI instruction set extension;
|
||||
=item bit #0+57 denoting AES-NI instruction set extension;
|
||||
|
||||
=item bit #58, XSAVE bit, lack of which in combination with MOVBE is used
|
||||
=item bit #0+58, XSAVE bit, lack of which in combination with MOVBE is used
|
||||
to identify Atom Silvermont core;
|
||||
|
||||
=item bit #59, OSXSAVE bit, denoting availability of YMM registers;
|
||||
=item bit #0+59, OSXSAVE bit, denoting availability of YMM registers;
|
||||
|
||||
=item bit #60 denoting AVX extension;
|
||||
=item bit #0+60 denoting AVX extension;
|
||||
|
||||
=item bit #62 denoting availability of RDRAND instruction;
|
||||
=item bit #0+62 denoting availability of RDRAND instruction;
|
||||
|
||||
=back
|
||||
|
||||
For example, in 32-bit application context clearing bit #26 at run-time
|
||||
disables high-performance SSE2 code present in the crypto library, while
|
||||
clearing bit #24 disables SSE2 code operating on 128-bit XMM register
|
||||
bank. You might have to do the latter if target OpenSSL application is
|
||||
executed on SSE2 capable CPU, but under control of OS that does not
|
||||
enable XMM registers. Historically address of the capability vector copy
|
||||
was exposed to application through OPENSSL_ia32cap_loc(), but not
|
||||
anymore. Now the only way to affect the capability detection is to set
|
||||
B<OPENSSL_ia32cap> environment variable prior target application start. To
|
||||
give a specific example, on Intel P4 processor
|
||||
C<env OPENSSL_ia32cap=0x16980010 apps/openssl>, or better yet
|
||||
C<env OPENSSL_ia32cap=~0x1000000 apps/openssl> would achieve the desired
|
||||
effect. Alternatively you can reconfigure the toolkit with no-sse2
|
||||
option and recompile.
|
||||
=head2 Notable Capability Bits for LV1
|
||||
|
||||
Less intuitive is clearing bit #28, or ~0x10000000 in the "environment
|
||||
variable" terms. The truth is that it's not copied from CPUID output
|
||||
verbatim, but is adjusted to reflect whether or not the data cache is
|
||||
actually shared between logical cores. This in turn affects the decision
|
||||
on whether or not expensive countermeasures against cache-timing attacks
|
||||
are applied, most notably in AES assembler module.
|
||||
|
||||
The capability vector is further extended with EBX value returned by
|
||||
CPUID with EAX=7 and ECX=0 as input. Following bits are significant:
|
||||
The following are notable capability bits from logical vector 1 (LV1)
|
||||
resulting from the following execution of CPUID.(EAX=07H,ECX=0H).EBX and
|
||||
CPUID.(EAX=07H,ECX=0H).ECX:
|
||||
|
||||
=over 4
|
||||
|
||||
@ -103,8 +99,7 @@ and RORX;
|
||||
|
||||
=item bit #64+19 denoting availability of ADCX and ADOX instructions;
|
||||
|
||||
=item bit #64+21 denoting availability of VPMADD52[LH]UQ instructions,
|
||||
aka AVX512IFMA extension;
|
||||
=item bit #64+21 denoting availability of AVX512IFMA extension;
|
||||
|
||||
=item bit #64+29 denoting availability of SHA extension;
|
||||
|
||||
@ -118,10 +113,109 @@ aka AVX512IFMA extension;
|
||||
|
||||
=back
|
||||
|
||||
To control this extended capability word use C<:> as delimiter when
|
||||
setting up B<OPENSSL_ia32cap> environment variable. For example assigning
|
||||
C<:~0x20> would disable AVX2 code paths, and C<:0> - all post-AVX
|
||||
extensions.
|
||||
=head2 Notable Capability Bits for LV2
|
||||
|
||||
The following are notable capability bits from logical vector 2 (LV2)
|
||||
resulting from the following execution of CPUID.(EAX=07H,ECX=0H).EDX and
|
||||
CPUID.(EAX=07H,ECX=1H).EAX:
|
||||
|
||||
=over 4
|
||||
|
||||
=item bit #128+15 denoting availability of Hybrid CPU;
|
||||
|
||||
=item bit #128+29 denoting support for IA32_ARCH_CAPABILITIES MSR;
|
||||
|
||||
=item bit #128+32 denoting availability of SHA512 extension;
|
||||
|
||||
=item bit #128+33 denoting availability of SM3 extension;
|
||||
|
||||
=item bit #128+34 denoting availability of SM4 extension;
|
||||
|
||||
=item bit #128+55 denoting availability of AVX-IFMA extension;
|
||||
|
||||
=back
|
||||
|
||||
=head2 Notable Capability Bits for LV3
|
||||
|
||||
The following are notable capability bits from logical vector 3 (LV3)
|
||||
resulting from the following execution of CPUID.(EAX=07H,ECX=1H).EDX and
|
||||
CPUID.(EAX=07H,ECX=1H).EBX:
|
||||
|
||||
=over 4
|
||||
|
||||
=item bit #192+19 denoting availability of AVX10 Converged Vector ISA extension;
|
||||
|
||||
=item bit #192+21 denoting availability of APX_F extension;
|
||||
|
||||
=back
|
||||
|
||||
=head2 Notable Capability Bits for LV4
|
||||
|
||||
The following are notable capability bits from logical vector 4 (LV4)
|
||||
resulting from the following execution of CPUID.(EAX=07H,ECX=1H).ECX and
|
||||
CPUID.(EAX=24H,ECX=0H).EBX:
|
||||
|
||||
=over 4
|
||||
|
||||
=item bits #256+32+[0:7] denoting AVX10 Converged Vector ISA Version (8 bits);
|
||||
|
||||
=item bit #256+48 denoting AVX10 XMM support;
|
||||
|
||||
=item bit #256+49 denoting AVX10 YMM support;
|
||||
|
||||
=item bit #256+50 denoting AVX10 ZMM support;
|
||||
|
||||
=back
|
||||
|
||||
=head2 OPENSSL_ia32cap environment variable
|
||||
|
||||
The B<OPENSSL_ia32cap> environment variable provides a mechanism to override
|
||||
the default capability vector values at library initialization time.
|
||||
The variable consists of a series of 64-bit numbers representing each
|
||||
of the logical vectors (LV) described above. Each value is delimited by a 'B<:>'.
|
||||
Decimal/Octal/Hexadecimal values representations are supported.
|
||||
|
||||
C<env OPENSSL_ia32cap=LV0:LV1:LV2:LV3:LV4>
|
||||
|
||||
Used in this form, each non-null logical vector will *overwrite* the entire corresponding
|
||||
capability vector pair with the provided value. To keep compatibility with the
|
||||
behaviour of the original OPENSSL_ia32cap environment variable
|
||||
<env OPENSSL_ia32cap=LV0:LV1>, the next capability vector pairs will be set to zero.
|
||||
|
||||
To illustrate, the following will zero all capability bits in logical vectors 1 and further
|
||||
(disable all post-AVX extensions):
|
||||
|
||||
C<env OPENSSL_ia32cap=:0>
|
||||
|
||||
The following will zero all capability bits in logical vectors 2 and further:
|
||||
|
||||
C<env OPENSSL_ia32cap=::0>
|
||||
|
||||
The following will zero all capability bits only in logical vector 1:
|
||||
C<env OPENSSL_ia32cap=:0::::>
|
||||
|
||||
A more likely usage scenario would be to disable specific instruction set extensions.
|
||||
The 'B<~>' character is used to specify a bit mask of the extensions to be disabled for
|
||||
a particular logical vector.
|
||||
|
||||
To illustrate, the following will disable AVX2 code paths and further extensions:
|
||||
|
||||
C<env OPENSSL_ia32cap=:~0x20000000000>
|
||||
|
||||
The following will disable AESNI (LV0 bit 57) and VAES (LV1 bit 41)
|
||||
extensions and therefore any code paths using those extensions but leave
|
||||
the rest of the logical vectors unchanged:
|
||||
|
||||
C<env OPENSSL_ia32cap=~0x200000000000000:~0x20000000000:~0x0:~0x0:~0x0>
|
||||
|
||||
=head1 NOTES
|
||||
|
||||
Not all capability bits are copied from CPUID output verbatim. An example
|
||||
of this is the somewhat less intuitive clearing of LV0 bit #28, or ~0x10000000
|
||||
in the "environment variable" terms. It has been adjusted to reflect whether or
|
||||
not the data cache is actually shared between logical cores. This in turn affects
|
||||
the decision on whether or not expensive countermeasures against cache-timing attacks
|
||||
are applied, most notably in AES assembler module.
|
||||
|
||||
=head1 RETURN VALUES
|
||||
|
||||
|
@ -36,8 +36,10 @@ void OPENSSL_cpuid_setup(void);
|
||||
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
|
||||
defined(__x86_64) || defined(__x86_64__) || \
|
||||
defined(_M_AMD64) || defined(_M_X64)
|
||||
# define OPENSSL_IA32CAP_P_MAX_INDEXES 10
|
||||
extern unsigned int OPENSSL_ia32cap_P[];
|
||||
#endif
|
||||
|
||||
void OPENSSL_showfatal(const char *fmta, ...);
|
||||
int ossl_do_ex_data_init(OSSL_LIB_CTX *ctx);
|
||||
void ossl_crypto_cleanup_all_ex_data_int(OSSL_LIB_CTX *ctx);
|
||||
|
Loading…
Reference in New Issue
Block a user