From acc26552369bb39de6d30737fc30a6bc4f2ebbae Mon Sep 17 00:00:00 2001 From: "Elizarova, Alina" Date: Wed, 4 Dec 2024 10:29:23 -0800 Subject: [PATCH] Extension of OPENSSL_ia32cap to accommodate additional CPUID bits bits 128 - 191 CPUID.(EAX=07H,ECX=0H).EDX and CPUID.(EAX=07H,ECX=1H).EAX bits 192 - 255 CPUID.(EAX=07H,ECX=1H).EDX and CPUID.(EAX=07H,ECX=1H).EBX bits 256 - 319 CPUID.(EAX=07H,ECX=1H).ECX and CPUID.(EAX=24H,ECX=0H).EBX Reviewed-by: Matt Caswell Reviewed-by: Tomas Mraz (Merged from https://github.com/openssl/openssl/pull/25709) --- CHANGES.md | 6 ++ crypto/cpuid.c | 55 ++++++---- crypto/info.c | 13 ++- crypto/perlasm/x86gas.pl | 3 +- crypto/perlasm/x86masm.pl | 3 +- crypto/perlasm/x86nasm.pl | 3 +- crypto/x86_64cpuid.pl | 36 ++++++- crypto/x86cpuid.pl | 26 ++++- doc/man3/OPENSSL_ia32cap.pod | 204 +++++++++++++++++++++++++---------- include/internal/cryptlib.h | 2 + 10 files changed, 264 insertions(+), 87 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 9a103d9f59..1e3a5b53ad 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -78,6 +78,12 @@ OpenSSL 3.5 *Paul Dale* + * Extended `OPENSSL_ia32cap` support to accommodate additional `CPUID` + feature/capability bits in leaf `0x7` (Extended Feature Flags) as well + as leaf `0x24` (Converged Vector ISA). + + *Dan Zimmerman, Alina Elizarova* + OpenSSL 3.4 ----------- diff --git a/crypto/cpuid.c b/crypto/cpuid.c index 51cbe5ea09..538a5a039f 100644 --- a/crypto/cpuid.c +++ b/crypto/cpuid.c @@ -14,7 +14,7 @@ defined(__x86_64) || defined(__x86_64__) || \ defined(_M_AMD64) || defined(_M_X64) -extern unsigned int OPENSSL_ia32cap_P[4]; +extern unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES]; # if defined(OPENSSL_CPUID_OBJ) @@ -29,7 +29,7 @@ extern unsigned int OPENSSL_ia32cap_P[4]; */ # ifdef _WIN32 typedef WCHAR variant_char; - +# define OPENSSL_IA32CAP_P_MAX_CHAR_SIZE 256 static variant_char *ossl_getenv(const char *name) { /* @@ -37,10 +37,10 @@ static variant_char *ossl_getenv(const char *name) * just ignore |name| and use equivalent wide-char L-literal. * As well as to ignore excessively long values... */ - static WCHAR value[48]; - DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, 48); + static WCHAR value[OPENSSL_IA32CAP_P_MAX_CHAR_SIZE]; + DWORD len = GetEnvironmentVariableW(L"OPENSSL_ia32cap", value, OPENSSL_IA32CAP_P_MAX_CHAR_SIZE); - return (len > 0 && len < 48) ? value : NULL; + return (len > 0 && len < OPENSSL_IA32CAP_P_MAX_CHAR_SIZE) ? value : NULL; } # else typedef char variant_char; @@ -98,6 +98,7 @@ void OPENSSL_cpuid_setup(void) IA32CAP OPENSSL_ia32_cpuid(unsigned int *); IA32CAP vec; const variant_char *env; + int index = 2; if (trigger) return; @@ -126,23 +127,37 @@ void OPENSSL_cpuid_setup(void) vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); } - if ((env = ossl_strchr(env, ':')) != NULL) { - IA32CAP vecx; - + /* Processed indexes 0, 1 */ + if ((env = ossl_strchr(env, ':')) != NULL) env++; - off = (env[0] == '~') ? 1 : 0; - vecx = ossl_strtouint64(env + off); - if (off) { - OPENSSL_ia32cap_P[2] &= ~(unsigned int)vecx; - OPENSSL_ia32cap_P[3] &= ~(unsigned int)(vecx >> 32); - } else { - OPENSSL_ia32cap_P[2] = (unsigned int)vecx; - OPENSSL_ia32cap_P[3] = (unsigned int)(vecx >> 32); + for (; index < OPENSSL_IA32CAP_P_MAX_INDEXES; index += 2) { + if ((env != NULL) && (env[0] != '\0')) { + /* if env[0] == ':' current index is skipped */ + if (env[0] != ':') { + IA32CAP vecx; + + off = (env[0] == '~') ? 1 : 0; + vecx = ossl_strtouint64(env + off); + if (off) { + OPENSSL_ia32cap_P[index] &= ~(unsigned int)vecx; + OPENSSL_ia32cap_P[index + 1] &= ~(unsigned int)(vecx >> 32); + } else { + OPENSSL_ia32cap_P[index] = (unsigned int)vecx; + OPENSSL_ia32cap_P[index + 1] = (unsigned int)(vecx >> 32); + } + } + /* skip delimeter */ + if ((env = ossl_strchr(env, ':')) != NULL) + env++; + } else { /* zeroize the next two indexes */ + OPENSSL_ia32cap_P[index] = 0; + OPENSSL_ia32cap_P[index + 1] = 0; } - } else { - OPENSSL_ia32cap_P[2] = 0; - OPENSSL_ia32cap_P[3] = 0; } + + /* If AVX10 is disabled, zero out its detailed cap bits */ + if (!(OPENSSL_ia32cap_P[6] & (1 << 19))) + OPENSSL_ia32cap_P[9] = 0; } else { vec = OPENSSL_ia32_cpuid(OPENSSL_ia32cap_P); } @@ -156,7 +171,7 @@ void OPENSSL_cpuid_setup(void) OPENSSL_ia32cap_P[1] = (unsigned int)(vec >> 32); } # else -unsigned int OPENSSL_ia32cap_P[4]; +unsigned int OPENSSL_ia32cap_P[OPENSSL_IA32CAP_P_MAX_INDEXES]; # endif #endif diff --git a/crypto/info.c b/crypto/info.c index ad31c9ec31..4d70471be2 100644 --- a/crypto/info.c +++ b/crypto/info.c @@ -30,7 +30,7 @@ # include "crypto/riscv_arch.h" # define CPU_INFO_STR_LEN 2048 #else -# define CPU_INFO_STR_LEN 128 +# define CPU_INFO_STR_LEN 256 #endif /* extern declaration to avoid warning */ @@ -52,11 +52,18 @@ DEFINE_RUN_ONCE_STATIC(init_info_strings) const char *env; BIO_snprintf(ossl_cpu_info_str, sizeof(ossl_cpu_info_str), - CPUINFO_PREFIX "OPENSSL_ia32cap=0x%llx:0x%llx", + CPUINFO_PREFIX "OPENSSL_ia32cap=0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx:0x%.16llx", (unsigned long long)OPENSSL_ia32cap_P[0] | (unsigned long long)OPENSSL_ia32cap_P[1] << 32, (unsigned long long)OPENSSL_ia32cap_P[2] | - (unsigned long long)OPENSSL_ia32cap_P[3] << 32); + (unsigned long long)OPENSSL_ia32cap_P[3] << 32, + (unsigned long long)OPENSSL_ia32cap_P[4] | + (unsigned long long)OPENSSL_ia32cap_P[5] << 32, + (unsigned long long)OPENSSL_ia32cap_P[6] | + (unsigned long long)OPENSSL_ia32cap_P[7] << 32, + (unsigned long long)OPENSSL_ia32cap_P[8] | + (unsigned long long)OPENSSL_ia32cap_P[9] << 32); + if ((env = getenv("OPENSSL_ia32cap")) != NULL) BIO_snprintf(ossl_cpu_info_str + strlen(ossl_cpu_info_str), sizeof(ossl_cpu_info_str) - strlen(ossl_cpu_info_str), diff --git a/crypto/perlasm/x86gas.pl b/crypto/perlasm/x86gas.pl index 1b2b27c022..f3c01ea89b 100644 --- a/crypto/perlasm/x86gas.pl +++ b/crypto/perlasm/x86gas.pl @@ -167,7 +167,8 @@ sub ::file_end } } if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) { - my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,16"; + # OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES + my $tmp=".comm\t${nmdecor}OPENSSL_ia32cap_P,40"; if ($::macosx) { push (@out,"$tmp,2\n"); } elsif ($::elf) { push (@out,"$tmp,4\n"); } else { push (@out,"$tmp\n"); } diff --git a/crypto/perlasm/x86masm.pl b/crypto/perlasm/x86masm.pl index 2dcd3f79f6..ccdba757dc 100644 --- a/crypto/perlasm/x86masm.pl +++ b/crypto/perlasm/x86masm.pl @@ -139,9 +139,10 @@ ___ push(@out,"$segment ENDS\n"); if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) + # OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES { my $comm=<<___; .bss SEGMENT 'BSS' -COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:4 +COMM ${nmdecor}OPENSSL_ia32cap_P:DWORD:10 .bss ENDS ___ # comment out OPENSSL_ia32cap_P declarations diff --git a/crypto/perlasm/x86nasm.pl b/crypto/perlasm/x86nasm.pl index 7017b88e80..a8cdd2d0bb 100644 --- a/crypto/perlasm/x86nasm.pl +++ b/crypto/perlasm/x86nasm.pl @@ -124,9 +124,10 @@ sub ::function_end_B sub ::file_end { if (grep {/\b${nmdecor}OPENSSL_ia32cap_P\b/i} @out) + # OPENSSL_ia32cap_P size should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES { my $comm=<<___; ${drdecor}segment .bss -${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 16 +${drdecor}common ${nmdecor}OPENSSL_ia32cap_P 40 ___ # comment out OPENSSL_ia32cap_P declarations grep {s/(^extern\s+${nmdecor}OPENSSL_ia32cap_P)/\;$1/} @out; diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl index 53685ec263..f0eb8510ed 100644 --- a/crypto/x86_64cpuid.pl +++ b/crypto/x86_64cpuid.pl @@ -27,14 +27,14 @@ open OUT,"| \"$^X\" \"$xlate\" $flavour \"$output\"" ("%rdi","%rsi","%rdx","%rcx"); # Unix order print<<___; +#include crypto/cryptlib.h .extern OPENSSL_cpuid_setup .hidden OPENSSL_cpuid_setup .section .init call OPENSSL_cpuid_setup .hidden OPENSSL_ia32cap_P -.comm OPENSSL_ia32cap_P,16,4 - +.comm OPENSSL_ia32cap_P,40,4 # <--Should match with internal/cryptlib.h OPENSSL_IA32CAP_P_MAX_INDEXES .text .globl OPENSSL_atomic_add @@ -192,6 +192,7 @@ OPENSSL_ia32_cpuid: mov \$7,%eax xor %ecx,%ecx cpuid + movd %eax,%xmm1 # put aside leaf 07H Max Sub-leaves bt \$26,%r9d # check XSAVE bit, cleared on Knights jc .Lnotknights and \$0xfff7ffff,%ebx # clear ADCX/ADOX flag @@ -202,9 +203,31 @@ OPENSSL_ia32_cpuid: jne .Lnotskylakex and \$0xfffeffff,%ebx # ~(1<<16) # suppress AVX512F flag on Skylake-X -.Lnotskylakex: - mov %ebx,8(%rdi) # save extended feature flags - mov %ecx,12(%rdi) + +.Lnotskylakex: # save extended feature flags + mov %ebx,8(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2] + mov %ecx,12(%rdi) # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3] + mov %edx,16(%rdi) # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4] + + movd %xmm1,%eax # Restore leaf 07H Max Sub-leaves + cmp \$0x1,%eax # Do we have cpuid(EAX=0x7, ECX=0x1)? + jb .Lno_extended_info + mov \$0x7,%eax + mov \$0x1,%ecx + cpuid # cpuid(EAX=0x7, ECX=0x1) + mov %eax,20(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5] + mov %edx,24(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6] + mov %ebx,28(%rdi) # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7] + mov %ecx,32(%rdi) # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8] + + and \$0x80000,%edx # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support + cmp \$0x0,%edx + je .Lno_extended_info + mov \$0x24,%eax # Have AVX10 Support, query for details + mov \$0x0,%ecx + cpuid # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf + mov %ebx,36(%rdi) # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9] + .Lno_extended_info: bt \$27,%r9d # check OSXSAVE bit @@ -223,6 +246,9 @@ OPENSSL_ia32_cpuid: cmp \$6,%eax je .Ldone .Lclear_avx: + andl \$0xff7fffff,20(%rdi) # ~(1<<23) + # clear AVXIFMA, which is VEX-encoded + # and requires YMM state support mov \$0xefffe7ff,%eax # ~(1<<28|1<<12|1<<11) and %eax,%r9d # clear AVX, FMA and AMD XOP bits mov \$0x3fdeffdf,%eax # ~(1<<31|1<<30|1<<21|1<<16|1<<5) diff --git a/crypto/x86cpuid.pl b/crypto/x86cpuid.pl index a7bcb27e26..35e2c5b0a5 100644 --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -137,7 +137,28 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &mov ("eax",7); &xor ("ecx","ecx"); &cpuid (); - &mov (&DWP(8,"edi"),"ebx"); # save extended feature flag + &mov (&DWP(8,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2] + &mov (&DWP(12,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3] + &mov (&DWP(16,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4] + &cmp ("eax",1); # Do we have cpuid(EAX=0x7, ECX=0x1)? + &jb (&label("no_extended_info")); + &mov ("eax",7); + &mov ("ecx",1); + &cpuid (); # cpuid(EAX=0x7, ECX=0x1) + &mov (&DWP(20,"edi"),"eax"); # save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5] + &mov (&DWP(24,"edi"),"edx"); # save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6] + &mov (&DWP(28,"edi"),"ebx"); # save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7] + &mov (&DWP(32,"edi"),"ecx"); # save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8] + + &and ("edx",0x80000); # Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support + &cmp ("edx",0x0); + &je (&label("no_extended_info")); + + &mov ("eax",0x24); # Have AVX10 Support, query for details + &mov ("ecx",0x0); + &cpuid (); # cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf + &mov (&DWP(36,"edi"),"ebx"); # save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9] + &set_label("no_extended_info"); &bt ("ebp",27); # check OSXSAVE bit @@ -154,6 +175,9 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); } &and ("esi",0xfeffffff); # clear FXSR &set_label("clear_avx"); &and ("ebp",0xefffe7ff); # clear AVX, FMA and AMD XOP bits + &and (&DWP(20,"edi"),0xff7fffff); # ~(1<<23) clear AVXIFMA, + # which is VEX-encoded + # and requires YMM state support &and (&DWP(8,"edi"),0xffffffdf); # clear AVX2 &set_label("done"); &mov ("eax","esi"); diff --git a/doc/man3/OPENSSL_ia32cap.pod b/doc/man3/OPENSSL_ia32cap.pod index c6c1c0185a..2b0dc93d49 100644 --- a/doc/man3/OPENSSL_ia32cap.pod +++ b/doc/man3/OPENSSL_ia32cap.pod @@ -10,81 +10,77 @@ OPENSSL_ia32cap - the x86[_64] processor capabilities vector =head1 DESCRIPTION -OpenSSL supports a range of x86[_64] instruction set extensions. These -extensions are denoted by individual bits in capability vector returned -by processor in EDX:ECX register pair after executing CPUID instruction -with EAX=1 input value (see Intel Application Note #241618). This vector -is copied to memory upon toolkit initialization and used to choose -between different code paths to provide optimal performance across wide -range of processors. For the moment of this writing following bits are -significant: +OpenSSL supports a range of x86[_64] instruction set extensions and +features. These extensions are denoted by individual bits or groups of bits +stored internally as ten 32-bit capability vectors and for simplicity +represented logically below as five 64-bit vectors. This logical +vector (LV) representation is used to streamline the definition of the +OPENSSL_ia32cap environment variable. + +Upon toolkit initialization, the capability vectors are populated through +successive executions of the CPUID instruction, after which any OPENSSL_ia32cap +environment variable capability bit modifications are applied. After toolkit +initialization is complete, populated vectors are then used to choose +between different code paths to provide optimal performance across a wide +range of x86[_64] based processors. + +Further CPUID information can be found in the Intel(R) Architecture +Instruction Set Extensions Programming Reference, and the AMD64 Architecture +Programmer's Manual (Volume 3). + +=head2 Notable Capability Bits for LV0 + +The following are notable capability bits from logical vector 0 (LV0) +resulting from the following execution of CPUID.(EAX=01H).EDX and +CPUID.(EAX=01H).ECX: =over 4 -=item bit #4 denoting presence of Time-Stamp Counter. +=item bit #0+4 denoting presence of Time-Stamp Counter; -=item bit #19 denoting availability of CLFLUSH instruction; +=item bit #0+19 denoting availability of CLFLUSH instruction; -=item bit #20, reserved by Intel, is used to choose among RC4 code paths; +=item bit #0+20, reserved by Intel, is used to choose among RC4 code paths; -=item bit #23 denoting MMX support; +=item bit #0+23 denoting MMX support; -=item bit #24, FXSR bit, denoting availability of XMM registers; +=item bit #0+24, FXSR bit, denoting availability of XMM registers; -=item bit #25 denoting SSE support; +=item bit #0+25 denoting SSE support; -=item bit #26 denoting SSE2 support; +=item bit #0+26 denoting SSE2 support; -=item bit #28 denoting Hyperthreading, which is used to distinguish +=item bit #0+28 denoting Hyperthreading, which is used to distinguish cores with shared cache; -=item bit #30, reserved by Intel, denotes specifically Intel CPUs; +=item bit #0+30, reserved by Intel, denotes specifically Intel CPUs; -=item bit #33 denoting availability of PCLMULQDQ instruction; +=item bit #0+33 denoting availability of PCLMULQDQ instruction; -=item bit #41 denoting SSSE3, Supplemental SSE3, support; +=item bit #0+41 denoting SSSE3, Supplemental SSE3, support; -=item bit #43 denoting AMD XOP support (forced to zero on non-AMD CPUs); +=item bit #0+43 denoting AMD XOP support (forced to zero on non-AMD CPUs); -=item bit #54 denoting availability of MOVBE instruction; +=item bit #0+54 denoting availability of MOVBE instruction; -=item bit #57 denoting AES-NI instruction set extension; +=item bit #0+57 denoting AES-NI instruction set extension; -=item bit #58, XSAVE bit, lack of which in combination with MOVBE is used +=item bit #0+58, XSAVE bit, lack of which in combination with MOVBE is used to identify Atom Silvermont core; -=item bit #59, OSXSAVE bit, denoting availability of YMM registers; +=item bit #0+59, OSXSAVE bit, denoting availability of YMM registers; -=item bit #60 denoting AVX extension; +=item bit #0+60 denoting AVX extension; -=item bit #62 denoting availability of RDRAND instruction; +=item bit #0+62 denoting availability of RDRAND instruction; =back -For example, in 32-bit application context clearing bit #26 at run-time -disables high-performance SSE2 code present in the crypto library, while -clearing bit #24 disables SSE2 code operating on 128-bit XMM register -bank. You might have to do the latter if target OpenSSL application is -executed on SSE2 capable CPU, but under control of OS that does not -enable XMM registers. Historically address of the capability vector copy -was exposed to application through OPENSSL_ia32cap_loc(), but not -anymore. Now the only way to affect the capability detection is to set -B environment variable prior target application start. To -give a specific example, on Intel P4 processor -C, or better yet -C would achieve the desired -effect. Alternatively you can reconfigure the toolkit with no-sse2 -option and recompile. +=head2 Notable Capability Bits for LV1 -Less intuitive is clearing bit #28, or ~0x10000000 in the "environment -variable" terms. The truth is that it's not copied from CPUID output -verbatim, but is adjusted to reflect whether or not the data cache is -actually shared between logical cores. This in turn affects the decision -on whether or not expensive countermeasures against cache-timing attacks -are applied, most notably in AES assembler module. - -The capability vector is further extended with EBX value returned by -CPUID with EAX=7 and ECX=0 as input. Following bits are significant: +The following are notable capability bits from logical vector 1 (LV1) +resulting from the following execution of CPUID.(EAX=07H,ECX=0H).EBX and +CPUID.(EAX=07H,ECX=0H).ECX: =over 4 @@ -103,8 +99,7 @@ and RORX; =item bit #64+19 denoting availability of ADCX and ADOX instructions; -=item bit #64+21 denoting availability of VPMADD52[LH]UQ instructions, -aka AVX512IFMA extension; +=item bit #64+21 denoting availability of AVX512IFMA extension; =item bit #64+29 denoting availability of SHA extension; @@ -118,10 +113,109 @@ aka AVX512IFMA extension; =back -To control this extended capability word use C<:> as delimiter when -setting up B environment variable. For example assigning -C<:~0x20> would disable AVX2 code paths, and C<:0> - all post-AVX -extensions. +=head2 Notable Capability Bits for LV2 + +The following are notable capability bits from logical vector 2 (LV2) +resulting from the following execution of CPUID.(EAX=07H,ECX=0H).EDX and +CPUID.(EAX=07H,ECX=1H).EAX: + +=over 4 + +=item bit #128+15 denoting availability of Hybrid CPU; + +=item bit #128+29 denoting support for IA32_ARCH_CAPABILITIES MSR; + +=item bit #128+32 denoting availability of SHA512 extension; + +=item bit #128+33 denoting availability of SM3 extension; + +=item bit #128+34 denoting availability of SM4 extension; + +=item bit #128+55 denoting availability of AVX-IFMA extension; + +=back + +=head2 Notable Capability Bits for LV3 + +The following are notable capability bits from logical vector 3 (LV3) +resulting from the following execution of CPUID.(EAX=07H,ECX=1H).EDX and +CPUID.(EAX=07H,ECX=1H).EBX: + +=over 4 + +=item bit #192+19 denoting availability of AVX10 Converged Vector ISA extension; + +=item bit #192+21 denoting availability of APX_F extension; + +=back + +=head2 Notable Capability Bits for LV4 + +The following are notable capability bits from logical vector 4 (LV4) +resulting from the following execution of CPUID.(EAX=07H,ECX=1H).ECX and +CPUID.(EAX=24H,ECX=0H).EBX: + +=over 4 + +=item bits #256+32+[0:7] denoting AVX10 Converged Vector ISA Version (8 bits); + +=item bit #256+48 denoting AVX10 XMM support; + +=item bit #256+49 denoting AVX10 YMM support; + +=item bit #256+50 denoting AVX10 ZMM support; + +=back + +=head2 OPENSSL_ia32cap environment variable + +The B environment variable provides a mechanism to override +the default capability vector values at library initialization time. +The variable consists of a series of 64-bit numbers representing each +of the logical vectors (LV) described above. Each value is delimited by a 'B<:>'. +Decimal/Octal/Hexadecimal values representations are supported. + +C + +Used in this form, each non-null logical vector will *overwrite* the entire corresponding +capability vector pair with the provided value. To keep compatibility with the +behaviour of the original OPENSSL_ia32cap environment variable +, the next capability vector pairs will be set to zero. + +To illustrate, the following will zero all capability bits in logical vectors 1 and further +(disable all post-AVX extensions): + +C + +The following will zero all capability bits in logical vectors 2 and further: + +C + +The following will zero all capability bits only in logical vector 1: +C + +A more likely usage scenario would be to disable specific instruction set extensions. +The 'B<~>' character is used to specify a bit mask of the extensions to be disabled for +a particular logical vector. + +To illustrate, the following will disable AVX2 code paths and further extensions: + +C + +The following will disable AESNI (LV0 bit 57) and VAES (LV1 bit 41) +extensions and therefore any code paths using those extensions but leave +the rest of the logical vectors unchanged: + +C + +=head1 NOTES + +Not all capability bits are copied from CPUID output verbatim. An example +of this is the somewhat less intuitive clearing of LV0 bit #28, or ~0x10000000 +in the "environment variable" terms. It has been adjusted to reflect whether or +not the data cache is actually shared between logical cores. This in turn affects +the decision on whether or not expensive countermeasures against cache-timing attacks +are applied, most notably in AES assembler module. =head1 RETURN VALUES diff --git a/include/internal/cryptlib.h b/include/internal/cryptlib.h index 3227f9fcf9..da442f8a86 100644 --- a/include/internal/cryptlib.h +++ b/include/internal/cryptlib.h @@ -36,8 +36,10 @@ void OPENSSL_cpuid_setup(void); #if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ defined(__x86_64) || defined(__x86_64__) || \ defined(_M_AMD64) || defined(_M_X64) +# define OPENSSL_IA32CAP_P_MAX_INDEXES 10 extern unsigned int OPENSSL_ia32cap_P[]; #endif + void OPENSSL_showfatal(const char *fmta, ...); int ossl_do_ex_data_init(OSSL_LIB_CTX *ctx); void ossl_crypto_cleanup_all_ex_data_int(OSSL_LIB_CTX *ctx);