Extension of OPENSSL_ia32cap to accommodate additional CPUID bits

bits 128 - 191 CPUID.(EAX=07H,ECX=0H).EDX and CPUID.(EAX=07H,ECX=1H).EAX
bits 192 - 255 CPUID.(EAX=07H,ECX=1H).EDX and CPUID.(EAX=07H,ECX=1H).EBX
bits 256 - 319 CPUID.(EAX=07H,ECX=1H).ECX and CPUID.(EAX=24H,ECX=0H).EBX

Reviewed-by: Matt Caswell <matt@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/25709)
This commit is contained in:
Elizarova, Alina 2024-12-04 10:29:23 -08:00 committed by Tomas Mraz
parent 1b3b5a019a
commit acc2655236
10 changed files with 264 additions and 87 deletions

View File

@ -78,6 +78,12 @@ OpenSSL 3.5
*Paul Dale*
* Extended `OPENSSL_ia32cap` support to accommodate additional `CPUID`
feature/capability bits in leaf `0x7` (Extended Feature Flags) as well
as leaf `0x24` (Converged Vector ISA).
*Dan Zimmerman, Alina Elizarova*
OpenSSL 3.4
-----------

View File

@ -14,7 +14,7 @@
defined(__x86_64) || defined(__x86_64__) || \
defined(_M_AMD64) || defined(_M_X64)
extern unsigned int OPENSSL_ia32cap_P[4];
extern unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES];
# if defined(OPENSSL_CPUID_OBJ)
@ -29,7 +29,7 @@ extern unsigned int OPENSSL_ia32cap_P[4];
*/
# ifdef _WIN32
typedef WCHAR variant_char;
# define OPENSSL_IA32CAP_P_MAX_CHAR_SIZE 256
static variant_char *ossl_getenv(const char *name)
{
/*
@ -37,10 +37,10 @@ static variant_char *ossl_getenv(const char *name)
* just ignore |name| and use equivalent wide-char L-literal.
* As well as to ignore excessively long values...
*/
static WCHAR value[48];
DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, 48);
static WCHAR value[OPENSSL_IA32CAP_P_MAX_CHAR_SIZE];
DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, OPENSSL_IA32CAP_P_MAX_CHAR_SIZE);
return (len > 0 && len < 48) ? value : NULL;
return (len > 0 && len < OPENSSL_IA32CAP_P_MAX_CHAR_SIZE) ? value : NULL;
}
# else
typedef char variant_char;
@ -98,6 +98,7 @@ void OPENSSL_cpuid_setup(void)
IA32CAP OPENSSL_ia32_cpuid(unsigned int *);
IA32CAP vec;
const variant_char *env;
int index = 2;
if (trigger)
return;
@ -126,23 +127,37 @@ void OPENSSL_cpuid_setup(void)
vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
}
if ((env = ossl_strchr(env, ':')) != NULL) {
IA32CAP vecx;
/* Processed indexes 0, 1 */
if ((env = ossl_strchr(env, ':')) != NULL)
env++;
off = (env[0] == '~') ? 1 : 0;
vecx = ossl_strtouint64(env + off);
if (off) {
OPENSSL_ia32cap_P[2] &= ~(unsigned int)vecx;
OPENSSL_ia32cap_P[3] &= ~(unsigned int)(vecx >> 32);
} else {
OPENSSL_ia32cap_P[2] = (unsigned int)vecx;
OPENSSL_ia32cap_P[3] = (unsigned int)(vecx >> 32);
for (; index < OPENSSL_IA32CAP_P_MAX_INDEXES; index += 2) {
if ((env != NULL) && (env[0] != '\0')) {
/* if env[0] == ':' current index is skipped */
if (env[0] != ':') {
IA32CAP vecx;
off = (env[0] == '~') ? 1 : 0;
vecx = ossl_strtouint64(env + off);
if (off) {
OPENSSL_ia32cap_P[index] &= ~(unsigned int)vecx;
OPENSSL_ia32cap_P[index + 1] &= ~(unsigned int)(vecx >> 32);
} else {
OPENSSL_ia32cap_P[index] = (unsigned int)vecx;
OPENSSL_ia32cap_P[index + 1] = (unsigned int)(vecx >> 32);
}
}
/* skip delimeter */
if ((env = ossl_strchr(env, ':')) != NULL)
env++;
} else { /* zeroize the next two indexes */
OPENSSL_ia32cap_P[index] = 0;
OPENSSL_ia32cap_P[index + 1] = 0;
}
} else {
OPENSSL_ia32cap_P[2] = 0;
OPENSSL_ia32cap_P[3] = 0;
}
/* If AVX10 is disabled, zero out its detailed cap bits */
if (!(OPENSSL_ia32cap_P[6] & (1 << 19)))
OPENSSL_ia32cap_P[9] = 0;
} else {
vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P);
}
@ -156,7 +171,7 @@ void OPENSSL_cpuid_setup(void)
OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32);
}
# else
unsigned int OPENSSL_ia32cap_P[4];
unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES];
# endif
#endif

View File

@ -30,7 +30,7 @@
# include "crypto/riscv_arch.h"
# define CPU_INFO_STR_LEN 2048
#else
# define CPU_INFO_STR_LEN 128
# define CPU_INFO_STR_LEN 256
#endif
/* extern declaration to avoid warning */
@ -52,11 +52,18 @@ DEFINE_RUN_ONCE_STATIC(init_info_strings)
const char *env;
BIO_snprintf(ossl_cpu_info_str, sizeof(ossl_cpu_info_str),
CPUINFO_PREFIX "OPENSSL_ia32cap=0x%llx:0x%llx",
CPUINFO_PREFIX "OPENSSL_ia32cap=0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx",
(unsigned long long)OPENSSL_ia32cap_P[0] |
(unsigned long long)OPENSSL_ia32cap_P[1] << 32,
(unsigned long long)OPENSSL_ia32cap_P[2] |
(unsigned long long)OPENSSL_ia32cap_P[3] << 32);
(unsigned long long)OPENSSL_ia32cap_P[3] << 32,
(unsigned long long)OPENSSL_ia32cap_P[4] |
(unsigned long long)OPENSSL_ia32cap_P[5] << 32,
(unsigned long long)OPENSSL_ia32cap_P[6] |
(unsigned long long)OPENSSL_ia32cap_P[7] << 32,
(unsigned long long)OPENSSL_ia32cap_P[8] |
(unsigned long long)OPENSSL_ia32cap_P[9] << 32);
if ((env = getenv("OPENSSL_ia32cap")) != NULL)
BIO_snprintf(ossl_cpu_info_str + strlen(ossl_cpu_info_str),
sizeof(ossl_cpu_info_str) - strlen(ossl_cpu_info_str),

View File

@ -167,7 +167,8 @@ sub ::file_end
}
}
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) {
my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16";
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,40";
if ($::macosx) { push (@out,"$tmp,2\n"); }
elsif ($::elf) { push (@out,"$tmp,4\n"); }
else { push (@out,"$tmp\n"); }

View File

@ -139,9 +139,10 @@ ___
push(@out,"$segment ENDS\n");
if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
{ my $comm=<<___;
.bss SEGMENT 'BSS'
COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:4
COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:10
.bss ENDS
___
# comment out OPENSSL_ia32cap_P declarations

View File

@ -124,9 +124,10 @@ sub ::function_end_B
sub ::file_end
{ if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out)
# OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
{ my $comm=<<___;
${drdecor}segment .bss
${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 16
${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 40
___
# comment out OPENSSL_ia32cap_P declarations
grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out;

View File

@ -27,14 +27,14 @@ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\""
("%rdi","%rsi","%rdx","%rcx"); # Unix order
print<<___;
#include crypto/cryptlib.h
.extern OPENSSL_cpuid_setup
.hidden OPENSSL_cpuid_setup
.section .init
call OPENSSL_cpuid_setup
.hidden OPENSSL_ia32cap_P
.comm OPENSSL_ia32cap_P,16,4
.comm OPENSSL_ia32cap_P,40,4 # <--Should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES
.text
.globl OPENSSL_atomic_add
@ -192,6 +192,7 @@ OPENSSL_ia32_cpuid:
mov \$7,%eax
xor %ecx,%ecx
cpuid
movd %eax,%xmm1 # put aside leaf 07H Max Sub-leaves
bt \$26,%r9d # check XSAVE bit, cleared on Knights
jc .Lnotknights
and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag
@ -202,9 +203,31 @@ OPENSSL_ia32_cpuid:
jne .Lnotskylakex
and \$0xfffeffff,%ebx # ~(1<<16)
# suppress AVX512F flag on Skylake-X
.Lnotskylakex:
mov %ebx,8(%rdi) # save extended feature flags
mov %ecx,12(%rdi)
.Lnotskylakex: # save extended feature flags
mov %ebx,8(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2]
mov %ecx,12(%rdi) # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3]
mov %edx,16(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4]
movd %xmm1,%eax # Restore leaf 07H Max Sub-leaves
cmp \$0x1,%eax # Do we have cpuid(EAX=0x7, ECX=0x1)?
jb .Lno_extended_info
mov \$0x7,%eax
mov \$0x1,%ecx
cpuid # cpuid(EAX=0x7, ECX=0x1)
mov %eax,20(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5]
mov %edx,24(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6]
mov %ebx,28(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7]
mov %ecx,32(%rdi) # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8]
and \$0x80000,%edx # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support
cmp \$0x0,%edx
je .Lno_extended_info
mov \$0x24,%eax # Have AVX10 Support, query for details
mov \$0x0,%ecx
cpuid # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf
mov %ebx,36(%rdi) # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9]
.Lno_extended_info:
bt \$27,%r9d # check OSXSAVE bit
@ -223,6 +246,9 @@ OPENSSL_ia32_cpuid:
cmp \$6,%eax
je .Ldone
.Lclear_avx:
andl \$0xff7fffff,20(%rdi) # ~(1<<23)
# clear AVXIFMA, which is VEX-encoded
# and requires YMM state support
mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11)
and %eax,%r9d # clear AVX, FMA and AMD XOP bits
mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5)

View File

@ -137,7 +137,28 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&mov ("eax",7);
&xor ("ecx","ecx");
&cpuid ();
&mov (&DWP(8,"edi"),"ebx"); # save extended feature flag
&mov (&DWP(8,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2]
&mov (&DWP(12,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3]
&mov (&DWP(16,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4]
&cmp ("eax",1); # Do we have cpuid(EAX=0x7, ECX=0x1)?
&jb (&label("no_extended_info"));
&mov ("eax",7);
&mov ("ecx",1);
&cpuid (); # cpuid(EAX=0x7, ECX=0x1)
&mov (&DWP(20,"edi"),"eax"); # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5]
&mov (&DWP(24,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6]
&mov (&DWP(28,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7]
&mov (&DWP(32,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8]
&and ("edx",0x80000); # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support
&cmp ("edx",0x0);
&je (&label("no_extended_info"));
&mov ("eax",0x24); # Have AVX10 Support, query for details
&mov ("ecx",0x0);
&cpuid (); # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf
&mov (&DWP(36,"edi"),"ebx"); # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9]
&set_label("no_extended_info");
&bt ("ebp",27); # check OSXSAVE bit
@ -154,6 +175,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&and ("esi",0xfeffffff); # clear FXSR
&set_label("clear_avx");
&and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits
&and (&DWP(20,"edi"),0xff7fffff); # ~(1<<23) clear AVXIFMA,
# which is VEX-encoded
# and requires YMM state support
&and (&DWP(8,"edi"),0xffffffdf); # clear AVX2
&set_label("done");
&mov ("eax","esi");

View File

@ -10,81 +10,77 @@ OPENSSL_ia32cap - the x86[_64] processor capabilities vector
=head1 DESCRIPTION
OpenSSL supports a range of x86[_64] instruction set extensions. These
extensions are denoted by individual bits in capability vector returned
by processor in EDX:ECX register pair after executing CPUID instruction
with EAX=1 input value (see Intel Application Note #241618). This vector
is copied to memory upon toolkit initialization and used to choose
between different code paths to provide optimal performance across wide
range of processors. For the moment of this writing following bits are
significant:
OpenSSL supports a range of x86[_64] instruction set extensions and
features. These extensions are denoted by individual bits or groups of bits
stored internally as ten 32-bit capability vectors and for simplicity
represented logically below as five 64-bit vectors. This logical
vector (LV) representation is used to streamline the definition of the
OPENSSL_ia32cap environment variable.
Upon toolkit initialization, the capability vectors are populated through
successive executions of the CPUID instruction, after which any OPENSSL_ia32cap
environment variable capability bit modifications are applied. After toolkit
initialization is complete, populated vectors are then used to choose
between different code paths to provide optimal performance across a wide
range of x86[_64] based processors.
Further CPUID information can be found in the Intel(R) Architecture
Instruction Set Extensions Programming Reference, and the AMD64 Architecture
Programmer's Manual (Volume 3).
=head2 Notable Capability Bits for LV0
The following are notable capability bits from logical vector 0 (LV0)
resulting from the following execution of CPUID.(EAX=01H).EDX and
CPUID.(EAX=01H).ECX:
=over 4
=item bit #4 denoting presence of Time-Stamp Counter.
=item bit #0+4 denoting presence of Time-Stamp Counter;
=item bit #19 denoting availability of CLFLUSH instruction;
=item bit #0+19 denoting availability of CLFLUSH instruction;
=item bit #20, reserved by Intel, is used to choose among RC4 code paths;
=item bit #0+20, reserved by Intel, is used to choose among RC4 code paths;
=item bit #23 denoting MMX support;
=item bit #0+23 denoting MMX support;
=item bit #24, FXSR bit, denoting availability of XMM registers;
=item bit #0+24, FXSR bit, denoting availability of XMM registers;
=item bit #25 denoting SSE support;
=item bit #0+25 denoting SSE support;
=item bit #26 denoting SSE2 support;
=item bit #0+26 denoting SSE2 support;
=item bit #28 denoting Hyperthreading, which is used to distinguish
=item bit #0+28 denoting Hyperthreading, which is used to distinguish
cores with shared cache;
=item bit #30, reserved by Intel, denotes specifically Intel CPUs;
=item bit #0+30, reserved by Intel, denotes specifically Intel CPUs;
=item bit #33 denoting availability of PCLMULQDQ instruction;
=item bit #0+33 denoting availability of PCLMULQDQ instruction;
=item bit #41 denoting SSSE3, Supplemental SSE3, support;
=item bit #0+41 denoting SSSE3, Supplemental SSE3, support;
=item bit #43 denoting AMD XOP support (forced to zero on non-AMD CPUs);
=item bit #0+43 denoting AMD XOP support (forced to zero on non-AMD CPUs);
=item bit #54 denoting availability of MOVBE instruction;
=item bit #0+54 denoting availability of MOVBE instruction;
=item bit #57 denoting AES-NI instruction set extension;
=item bit #0+57 denoting AES-NI instruction set extension;
=item bit #58, XSAVE bit, lack of which in combination with MOVBE is used
=item bit #0+58, XSAVE bit, lack of which in combination with MOVBE is used
to identify Atom Silvermont core;
=item bit #59, OSXSAVE bit, denoting availability of YMM registers;
=item bit #0+59, OSXSAVE bit, denoting availability of YMM registers;
=item bit #60 denoting AVX extension;
=item bit #0+60 denoting AVX extension;
=item bit #62 denoting availability of RDRAND instruction;
=item bit #0+62 denoting availability of RDRAND instruction;
=back
For example, in 32-bit application context clearing bit #26 at run-time
disables high-performance SSE2 code present in the crypto library, while
clearing bit #24 disables SSE2 code operating on 128-bit XMM register
bank. You might have to do the latter if target OpenSSL application is
executed on SSE2 capable CPU, but under control of OS that does not
enable XMM registers. Historically address of the capability vector copy
was exposed to application through OPENSSL_ia32cap_loc(), but not
anymore. Now the only way to affect the capability detection is to set
B<OPENSSL_ia32cap> environment variable prior target application start. To
give a specific example, on Intel P4 processor
C<env OPENSSL_ia32cap=0x16980010 apps/openssl>, or better yet
C<env OPENSSL_ia32cap=~0x1000000 apps/openssl> would achieve the desired
effect. Alternatively you can reconfigure the toolkit with no-sse2
option and recompile.
=head2 Notable Capability Bits for LV1
Less intuitive is clearing bit #28, or ~0x10000000 in the "environment
variable" terms. The truth is that it's not copied from CPUID output
verbatim, but is adjusted to reflect whether or not the data cache is
actually shared between logical cores. This in turn affects the decision
on whether or not expensive countermeasures against cache-timing attacks
are applied, most notably in AES assembler module.
The capability vector is further extended with EBX value returned by
CPUID with EAX=7 and ECX=0 as input. Following bits are significant:
The following are notable capability bits from logical vector 1 (LV1)
resulting from the following execution of CPUID.(EAX=07H,ECX=0H).EBX and
CPUID.(EAX=07H,ECX=0H).ECX:
=over 4
@ -103,8 +99,7 @@ and RORX;
=item bit #64+19 denoting availability of ADCX and ADOX instructions;
=item bit #64+21 denoting availability of VPMADD52[LH]UQ instructions,
aka AVX512IFMA extension;
=item bit #64+21 denoting availability of AVX512IFMA extension;
=item bit #64+29 denoting availability of SHA extension;
@ -118,10 +113,109 @@ aka AVX512IFMA extension;
=back
To control this extended capability word use C<:> as delimiter when
setting up B<OPENSSL_ia32cap> environment variable. For example assigning
C<:~0x20> would disable AVX2 code paths, and C<:0> - all post-AVX
extensions.
=head2 Notable Capability Bits for LV2
The following are notable capability bits from logical vector 2 (LV2)
resulting from the following execution of CPUID.(EAX=07H,ECX=0H).EDX and
CPUID.(EAX=07H,ECX=1H).EAX:
=over 4
=item bit #128+15 denoting availability of Hybrid CPU;
=item bit #128+29 denoting support for IA32_ARCH_CAPABILITIES MSR;
=item bit #128+32 denoting availability of SHA512 extension;
=item bit #128+33 denoting availability of SM3 extension;
=item bit #128+34 denoting availability of SM4 extension;
=item bit #128+55 denoting availability of AVX-IFMA extension;
=back
=head2 Notable Capability Bits for LV3
The following are notable capability bits from logical vector 3 (LV3)
resulting from the following execution of CPUID.(EAX=07H,ECX=1H).EDX and
CPUID.(EAX=07H,ECX=1H).EBX:
=over 4
=item bit #192+19 denoting availability of AVX10 Converged Vector ISA extension;
=item bit #192+21 denoting availability of APX_F extension;
=back
=head2 Notable Capability Bits for LV4
The following are notable capability bits from logical vector 4 (LV4)
resulting from the following execution of CPUID.(EAX=07H,ECX=1H).ECX and
CPUID.(EAX=24H,ECX=0H).EBX:
=over 4
=item bits #256+32+[0:7] denoting AVX10 Converged Vector ISA Version (8 bits);
=item bit #256+48 denoting AVX10 XMM support;
=item bit #256+49 denoting AVX10 YMM support;
=item bit #256+50 denoting AVX10 ZMM support;
=back
=head2 OPENSSL_ia32cap environment variable
The B<OPENSSL_ia32cap> environment variable provides a mechanism to override
the default capability vector values at library initialization time.
The variable consists of a series of 64-bit numbers representing each
of the logical vectors (LV) described above. Each value is delimited by a 'B<:>'.
Decimal/Octal/Hexadecimal values representations are supported.
C<env OPENSSL_ia32cap=LV0:LV1:LV2:LV3:LV4>
Used in this form, each non-null logical vector will *overwrite* the entire corresponding
capability vector pair with the provided value. To keep compatibility with the
behaviour of the original OPENSSL_ia32cap environment variable
<env OPENSSL_ia32cap=LV0:LV1>, the next capability vector pairs will be set to zero.
To illustrate, the following will zero all capability bits in logical vectors 1 and further
(disable all post-AVX extensions):
C<env OPENSSL_ia32cap=:0>
The following will zero all capability bits in logical vectors 2 and further:
C<env OPENSSL_ia32cap=::0>
The following will zero all capability bits only in logical vector 1:
C<env OPENSSL_ia32cap=:0::::>
A more likely usage scenario would be to disable specific instruction set extensions.
The 'B<~>' character is used to specify a bit mask of the extensions to be disabled for
a particular logical vector.
To illustrate, the following will disable AVX2 code paths and further extensions:
C<env OPENSSL_ia32cap=:~0x20000000000>
The following will disable AESNI (LV0 bit 57) and VAES (LV1 bit 41)
extensions and therefore any code paths using those extensions but leave
the rest of the logical vectors unchanged:
C<env OPENSSL_ia32cap=~0x200000000000000:~0x20000000000:~0x0:~0x0:~0x0>
=head1 NOTES
Not all capability bits are copied from CPUID output verbatim. An example
of this is the somewhat less intuitive clearing of LV0 bit #28, or ~0x10000000
in the "environment variable" terms. It has been adjusted to reflect whether or
not the data cache is actually shared between logical cores. This in turn affects
the decision on whether or not expensive countermeasures against cache-timing attacks
are applied, most notably in AES assembler module.
=head1 RETURN VALUES

View File

@ -36,8 +36,10 @@ void OPENSSL_cpuid_setup(void);
#if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
defined(__x86_64) || defined(__x86_64__) || \
defined(_M_AMD64) || defined(_M_X64)
# define OPENSSL_IA32CAP_P_MAX_INDEXES 10
extern unsigned int OPENSSL_ia32cap_P[];
#endif
void OPENSSL_showfatal(const char *fmta, ...);
int ossl_do_ex_data_init(OSSL_LIB_CTX *ctx);
void ossl_crypto_cleanup_all_ex_data_int(OSSL_LIB_CTX *ctx);