Enable GCC to support Intel Key Locker ISA

gcc/ChangeLog

2018-12-15  Xuepeng Guo  <xuepeng.guo@intel.com>
	    Hongyu Wang  <hongyu.wang@intel.com>
	    Hongtao Liu  <hongtao.liu@intel.com>

	* common/config/i386/cpuinfo.h (get_available_features):
	Detect KL, AESKLE and WIDEKL features.
	* common/config/i386/i386-common.c
	(OPTION_MASK_ISA_KL_SET): New.
	(OPTION_MASK_ISA_WIDEKL_SET): Likewise.
	(OPTION_MASK_ISA_KL_UNSET): Likewise.
	(OPTION_MASK_ISA_WIDEKL_UNSET): Likewise.
	(OPTION_MASK_ISA2_AVX2_UNSET): Likewise.
	(OPTION_MASK_ISA2_AVX_UNSET): Likewise.
	(OPTION_MASK_ISA2_SSE4_2_UNSET): Likewise.
	(OPTION_MASK_ISA2_SSE4_1_UNSET): Likewise.
	(OPTION_MASK_ISA2_SSE4_UNSET): Likewise.
	(OPTION_MASK_ISA2_SSSE3_UNSET): Likewise.
	(OPTION_MASK_ISA2_SSE3_UNSET): Likewise.
	(OPTION_MASK_ISA2_SSE2_UNSET): Likewise.
	(OPTION_MASK_ISA2_SSE_UNSET): Likewise.
	(ix86_handle_option): Handle kl and widekl, add dependency chain
	for KL and SSE2.
	* common/config/i386/i386-cpuinfo.h (enum processor_features):
	(FEATURE_KL, FEATURE_AESKLE, FEATURE_WIDEKL): New.
	* common/config/i386/i386-isas.h: Add ISA_NAMES_TABLE_ENTRY
	for KL, AESKLE and WIDEKL.
	* config.gcc: Add keylockerintrin.h.
	* doc/invoke.texi: Document new option -mkl and -mwidekl.
	* doc/extend.texi: Document kl and widekl.
	* config/i386/cpuid.h (bit_KL, bit_AESKLE, bit_WIDEKL): New.
	* config/i386/i386-builtin-types.def ((UINT, UINT, V2DI, V2DI, PVOID),
	(UINT, UINT, V2DI, PVOID), (VOID, V2DI, V2DI, V2DI, UINT),
	(UINT8, PV2DI, V2DI, PCVOID), (UINT8, PV2DI, PCV2DI, PCVOID)): New
	function types.
	* config/i386/i386-builtin.def: Add
	__builtin_ia32_loadiwkey,
	__builtin_ia32_aesdec128kl_u8,
	__builtin_ia32_aesdec256kl_u8,
	__builtin_ia32_aesenc128kl_u8,
	__builtin_ia32_aesenc256kl_u8,
	__builtin_ia32_aesdecwide128kl_u8,
	__builtin_ia32_aesdecwide256kl_u8,
	__builtin_ia32_aesencwide128kl_u8,
	__builtin_ia32_aesencwide256kl_u8,
	__builtin_ia32_encodekey128_u32,
	__builtin_ia32_encodekey256_u32.
	* config/i386/i386-c.c (ix86_target_macros_internal): Handle
	kl and widekl.
	* config/i386/i386-options.c (isa2_opts): Add -mkl and -mwidekl.
	(ix86_option_override_internal): Handle KL and WIDEKL.
	(ix86_valid_target_attribute_inner_p): Add attribute for kl and widekl.
	* config/i386/i386-expand.c
	(ix86_expand_builtin): Expand Keylocker Builtins.
	* config/i386/i386.h (TARGET_KL): New.
	(TARGET_KL_P): Likewise.
	(TARGET_WIDEKL): Likewise.
	(TARGET_WIDEKL_P): Likewise.
	(PTA_KL): Likewise.
	(PTA_WIDEKL): Likewise.
	(PTA_TIGERLAKE): Add PTA_KL, PTA_WIDEKL.
	(PTA_ALDERLAKE): Likewise.
	* config/i386/i386.opt: Add new option mkl and mwidekl.
	* config/i386/keylockerintrin.h: New header file for Keylocker.
	* config/i386/immintrin.h: Include keylockerintrin.h.
	* config/i386/predicates.md (encodekey128_operation): New
	predicate.
	(encodekey256_operation): Likewise.
	(aeswidekl_operation): Likewise.
	* config/i386/sse.md (UNSPECV_LOADIWKEY): New.
	(UNSPECV_AESDEC128KLU8): Likewise.
	(UNSPECV_AESENC128KLU8): Likewise.
	(UNSPECV_AESDEC256KLU8): Likewise.
	(UNSPECV_AESENC256KLU8): Likewise.
	(UNSPECV_AESDECWIDE128KLU8): Likewise.
	(UNSPECV_AESENCWIDE128KLU8): Likewise.
	(UNSPECV_AESDECWIDE256KLU8): Likewise.
	(UNSPECV_AESENCWIDE256KLU8): Likewise.
	(UNSPECV_ENCODEKEY128U32): Likewise.
	(UNSPECV_ENCODEKEY256U32): Likewise.
	(encodekey128u32): New expander.
	(encodekey256u32): Likewise.
	(aes<aeswideklvariant>u8): Likewise.
	(loadiwkey): New insn pattern.
	(*encodekey128u32): Likewise.
	(*encodekey256u32): Likewise.
	(aes<aesklvariant>u8): Likewise.
	(*aes<aeswideklvariant>u8): Likewise.

gcc/testsuite/ChangeLog

	* gcc.target/i386/keylocker-aesdec128kl.c: New test.
	* gcc.target/i386/keylocker-aesdec256kl.c: Likewise.
	* gcc.target/i386/keylocker-aesdecwide128kl.c: Likewise.
	* gcc.target/i386/keylocker-aesdecwide256kl.c: Likewise.
	* gcc.target/i386/keylocker-aesenc128kl.c: Likewise.
	* gcc.target/i386/keylocker-aesencwide128kl.c: Likewise.
	* gcc.target/i386/keylocker-aesencwide256kl.c: Likewise.
	* gcc.target/i386/keylocker-encodekey128.c: Likewise.
	* gcc.target/i386/keylocker-encodekey256.c: Likewise.
	* gcc.target/i386/keylocker-loadiwkey.c: Likewise.
	* g++.dg/other/i386-2.C: Add -mkl and -mwidekl.
	* g++.dg/other/i386-3.C: Likewise.
	* gcc.target/i386/sse-12.c: Likewise.
	* gcc.target/i386/sse-13.c: Likewise.
	* gcc.target/i386/sse-14.c: Likewise.
	* gcc.target/i386/sse-22.c: Add kl and widekl.
	* gcc.target/i386/sse-23.c: Likewise.
	* gcc.target/i386/funcspec-56.inc: Add new target attribute test.
This commit is contained in:
liuhongt 2020-03-05 17:36:02 +08:00
parent 1e1e1edf88
commit 632a2f50b8
37 changed files with 1129 additions and 33 deletions

View File

@ -523,6 +523,8 @@ get_available_features (struct __processor_model *cpu_model,
int avx_usable = 0;
int avx512_usable = 0;
int amx_usable = 0;
/* Check if KL is usable. */
int has_kl = 0;
if ((ecx & bit_OSXSAVE))
{
/* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
@ -667,6 +669,8 @@ get_available_features (struct __processor_model *cpu_model,
if (edx & bit_AMX_BF16)
set_feature (FEATURE_AMX_BF16);
}
if (ecx & bit_KL)
has_kl = 1;
if (avx512_usable)
{
if (ebx & bit_AVX512F)
@ -733,6 +737,21 @@ get_available_features (struct __processor_model *cpu_model,
set_feature (FEATURE_PTWRITE);
}
/* Get Advanced Features at level 0x19 (eax = 0x19). */
if (max_cpuid_level >= 0x19)
{
set_feature (FEATURE_AESKLE);
__cpuid (19, eax, ebx, ecx, edx);
/* Check if OS support keylocker. */
if (ebx & bit_AESKLE)
{
if (ebx & bit_WIDEKL)
set_feature (FEATURE_WIDEKL);
if (has_kl)
set_feature (FEATURE_KL);
}
}
/* Check cpuid level of extended features. */
__cpuid (0x80000000, ext_level, ebx, ecx, edx);

View File

@ -165,6 +165,9 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_TSXLDTRK_SET OPTION_MASK_ISA2_TSXLDTRK
#define OPTION_MASK_ISA2_UINTR_SET OPTION_MASK_ISA2_UINTR
#define OPTION_MASK_ISA2_HRESET_SET OPTION_MASK_ISA2_HRESET
#define OPTION_MASK_ISA2_KL_SET OPTION_MASK_ISA2_KL
#define OPTION_MASK_ISA2_WIDEKL_SET \
(OPTION_MASK_ISA2_WIDEKL | OPTION_MASK_ISA2_KL_SET)
/* Define a set of ISAs which aren't available when a given ISA is
disabled. MMX and SSE ISAs are handled separately. */
@ -258,6 +261,9 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
#define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR
#define OPTION_MASK_ISA2_HRESET_UNSET OPTION_MASK_ISA2_HRESET
#define OPTION_MASK_ISA2_KL_UNSET \
(OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
@ -304,6 +310,16 @@ along with GCC; see the file COPYING3. If not see
| OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
#define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
(OPTION_MASK_ISA2_AVX512F_UNSET)
#define OPTION_MASK_ISA2_AVX2_UNSET OPTION_MASK_ISA2_AVX512F_UNSET
#define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
#define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET
#define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET
#define OPTION_MASK_ISA2_SSE4_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
#define OPTION_MASK_ISA2_SSSE3_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
#define OPTION_MASK_ISA2_SSE3_UNSET OPTION_MASK_ISA2_SSSE3_UNSET
#define OPTION_MASK_ISA2_SSE2_UNSET \
(OPTION_MASK_ISA2_SSE3_UNSET | OPTION_MASK_ISA2_KL_UNSET)
#define OPTION_MASK_ISA2_SSE_UNSET OPTION_MASK_ISA2_SSE2_UNSET
#define OPTION_MASK_ISA2_AVX512BW_UNSET OPTION_MASK_ISA2_AVX512BF16_UNSET
@ -399,8 +415,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE_UNSET;
}
return true;
@ -414,8 +430,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE2_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE2_UNSET;
}
return true;
@ -429,8 +445,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE3_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE3_UNSET;
}
return true;
@ -444,8 +460,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSSE3_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSSE3_UNSET;
}
return true;
@ -459,8 +475,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_1_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_1_UNSET;
}
return true;
@ -474,8 +490,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_2_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_2_UNSET;
}
return true;
@ -489,8 +505,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX_UNSET;
}
return true;
@ -504,8 +520,8 @@ ix86_handle_option (struct gcc_options *opts,
{
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX2_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX2_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX2_UNSET;
}
return true;
@ -691,6 +707,40 @@ ix86_handle_option (struct gcc_options *opts,
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_ENQCMD_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_ENQCMD_UNSET;
}
return true;
case OPT_mkl:
if (value)
{
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_KL_SET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_KL_SET;
/* The Keylocker instructions need XMM registers from SSE2. */
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
}
else
{
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_KL_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_KL_UNSET;
}
return true;
case OPT_mwidekl:
if (value)
{
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_WIDEKL_SET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_WIDEKL_SET;
/* The Widekl instructions need XMM registers from SSE2. */
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
}
else
{
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_WIDEKL_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_WIDEKL_UNSET;
}
return true;
case OPT_mserialize:
@ -1043,8 +1093,8 @@ ix86_handle_option (struct gcc_options *opts,
case OPT_mno_sse4:
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_UNSET;
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_UNSET;
return true;
case OPT_msse4a:

View File

@ -221,6 +221,9 @@ enum processor_features
FEATURE_AMX_BF16,
FEATURE_UINTR,
FEATURE_HRESET,
FEATURE_KL,
FEATURE_AESKLE,
FEATURE_WIDEKL,
CPU_FEATURE_MAX
};

View File

@ -165,4 +165,7 @@ ISA_NAMES_TABLE_START
ISA_NAMES_TABLE_ENTRY("amx-bf16", FEATURE_AMX_BF16, P_NONE, "-mamx-bf16")
ISA_NAMES_TABLE_ENTRY("uintr", FEATURE_UINTR, P_NONE, "-muintr")
ISA_NAMES_TABLE_ENTRY("hreset", FEATURE_HRESET, P_NONE, "-mhreset")
ISA_NAMES_TABLE_ENTRY("kl", FEATURE_KL, P_NONE, "-mkl")
ISA_NAMES_TABLE_ENTRY("aeskle", FEATURE_AESKLE, P_NONE, NULL)
ISA_NAMES_TABLE_ENTRY("widekl", FEATURE_WIDEKL, P_NONE, "-mwidekl")
ISA_NAMES_TABLE_END

View File

@ -414,7 +414,7 @@ i[34567]86-*-*)
avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
amxbf16intrin.h x86gprintrin.h uintrintrin.h
hresetintrin.h"
hresetintrin.h keylockerintrin.h"
;;
x86_64-*-*)
cpu_type=i386
@ -451,7 +451,7 @@ x86_64-*-*)
avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
amxbf16intrin.h x86gprintrin.h uintrintrin.h
hresetintrin.h"
hresetintrin.h keylockerintrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h

View File

@ -119,6 +119,7 @@
#define bit_MOVDIR64B (1 << 28)
#define bit_ENQCMD (1 << 29)
#define bit_CLDEMOTE (1 << 25)
#define bit_KL (1 << 23)
/* %edx */
#define bit_AVX5124VNNIW (1 << 2)
@ -146,6 +147,12 @@
/* %ebx */
#define bit_PTWRITE (1 << 4)
/* Keylocker leaf (%eax == 0x19) */
/* %ebx */
#define bit_AESKLE ( 1<<0 )
#define bit_WIDEKL ( 1<<2 )
/* Signatures for different CPU implementations as returned in uses
of cpuid with level 0. */
#define signature_AMD_ebx 0x68747541

View File

@ -1290,3 +1290,10 @@ DEF_FUNCTION_TYPE (V8SF, V8SF, V16HI, V16HI)
DEF_FUNCTION_TYPE (V8SF, V8SF, V16HI, V16HI, UQI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V8HI, V8HI)
DEF_FUNCTION_TYPE (V4SF, V4SF, V8HI, V8HI, UQI)
# KEYLOCKER builtins
DEF_FUNCTION_TYPE (UINT, UINT, V2DI, V2DI, PVOID)
DEF_FUNCTION_TYPE (UINT, UINT, V2DI, PVOID)
DEF_FUNCTION_TYPE (VOID, V2DI, V2DI, V2DI, UINT)
DEF_FUNCTION_TYPE (UINT8, PV2DI, V2DI, PCVOID)
DEF_FUNCTION_TYPE (UINT8, PV2DI, PCV2DI, PCVOID)

View File

@ -460,6 +460,19 @@ BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_UINTR, CODE_FOR_senduipi, "__buil
/* HRESET */
BDESC (0, OPTION_MASK_ISA2_HRESET, CODE_FOR_hreset, "__builtin_ia32_hreset", IX86_BUILTIN_HRESET, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
/* KEYLOCKER */
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_loadiwkey", IX86_BUILTIN_LOADIWKEY, UNKNOWN, (int) VOID_FTYPE_V2DI_V2DI_V2DI_UINT)
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_aesdec128kl_u8", IX86_BUILTIN_AESDEC128KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_V2DI_PCVOID)
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_aesdec256kl_u8", IX86_BUILTIN_AESDEC256KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_V2DI_PCVOID)
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_aesenc128kl_u8", IX86_BUILTIN_AESENC128KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_V2DI_PCVOID)
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_aesenc256kl_u8", IX86_BUILTIN_AESENC256KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_V2DI_PCVOID)
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_encodekey128_u32", IX86_BUILTIN_ENCODEKEY128U32, UNKNOWN, (int) UINT_FTYPE_UINT_V2DI_PVOID)
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_encodekey256_u32", IX86_BUILTIN_ENCODEKEY256U32, UNKNOWN, (int) UINT_FTYPE_UINT_V2DI_V2DI_PVOID)
BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesdecwide128kl_u8", IX86_BUILTIN_AESDECWIDE128KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID)
BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesdecwide256kl_u8", IX86_BUILTIN_AESDECWIDE256KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID)
BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesencwide128kl_u8", IX86_BUILTIN_AESENCWIDE128KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID)
BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesencwide256kl_u8", IX86_BUILTIN_AESENCWIDE256KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID)
BDESC_END (SPECIAL_ARGS, ARGS)
/* Builtins with variable number of arguments. */

View File

@ -602,6 +602,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__UINTR__");
if (isa_flag2 & OPTION_MASK_ISA2_HRESET)
def_or_undef (parse_in, "__HRESET__");
if (isa_flag2 & OPTION_MASK_ISA2_KL)
def_or_undef (parse_in, "__KL__");
if (isa_flag2 & OPTION_MASK_ISA2_WIDEKL)
def_or_undef (parse_in, "__WIDEKL__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");

View File

@ -11326,6 +11326,226 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
emit_insn (gen_cldemote (op0));
return 0;
case IX86_BUILTIN_LOADIWKEY:
{
arg0 = CALL_EXPR_ARG (exp, 0);
arg1 = CALL_EXPR_ARG (exp, 1);
arg2 = CALL_EXPR_ARG (exp, 2);
arg3 = CALL_EXPR_ARG (exp, 3);
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
op3 = expand_normal (arg3);
if (!REG_P (op0))
op0 = copy_to_mode_reg (V2DImode, op0);
if (!REG_P (op1))
op1 = copy_to_mode_reg (V2DImode, op1);
if (!REG_P (op2))
op2 = copy_to_mode_reg (V2DImode, op2);
if (!REG_P (op3))
op3 = copy_to_mode_reg (SImode, op3);
emit_insn (gen_loadiwkey (op0, op1, op2, op3));
return 0;
}
case IX86_BUILTIN_AESDEC128KLU8:
icode = CODE_FOR_aesdec128klu8;
goto aesdecenc_expand;
case IX86_BUILTIN_AESDEC256KLU8:
icode = CODE_FOR_aesdec256klu8;
goto aesdecenc_expand;
case IX86_BUILTIN_AESENC128KLU8:
icode = CODE_FOR_aesenc128klu8;
goto aesdecenc_expand;
case IX86_BUILTIN_AESENC256KLU8:
icode = CODE_FOR_aesenc256klu8;
aesdecenc_expand:
arg0 = CALL_EXPR_ARG (exp, 0); // __m128i *odata
arg1 = CALL_EXPR_ARG (exp, 1); // __m128i idata
arg2 = CALL_EXPR_ARG (exp, 2); // const void *p
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
if (!address_operand (op0, V2DImode))
{
op0 = convert_memory_address (Pmode, op0);
op0 = copy_addr_to_reg (op0);
}
op0 = gen_rtx_MEM (V2DImode, op0);
if (!REG_P (op1))
op1 = copy_to_mode_reg (V2DImode, op1);
if (!address_operand (op2, VOIDmode))
{
op2 = convert_memory_address (Pmode, op2);
op2 = copy_addr_to_reg (op2);
}
op2 = gen_rtx_MEM (BLKmode, op2);
emit_insn (GEN_FCN (icode) (op1, op1, op2));
if (target == 0)
target = gen_reg_rtx (QImode);
pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG),
const0_rtx);
emit_insn (gen_rtx_SET (target, pat));
emit_insn (gen_rtx_SET (op0, op1));
return target;
case IX86_BUILTIN_AESDECWIDE128KLU8:
icode = CODE_FOR_aesdecwide128klu8;
goto wideaesdecenc_expand;
case IX86_BUILTIN_AESDECWIDE256KLU8:
icode = CODE_FOR_aesdecwide256klu8;
goto wideaesdecenc_expand;
case IX86_BUILTIN_AESENCWIDE128KLU8:
icode = CODE_FOR_aesencwide128klu8;
goto wideaesdecenc_expand;
case IX86_BUILTIN_AESENCWIDE256KLU8:
icode = CODE_FOR_aesencwide256klu8;
wideaesdecenc_expand:
rtx xmm_regs[8];
rtx op;
arg0 = CALL_EXPR_ARG (exp, 0); // __m128i * odata
arg1 = CALL_EXPR_ARG (exp, 1); // const __m128i * idata
arg2 = CALL_EXPR_ARG (exp, 2); // const void *p
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
if (!address_operand (op2, VOIDmode))
{
op2 = convert_memory_address (Pmode, op2);
op2 = copy_addr_to_reg (op2);
}
op2 = gen_rtx_MEM (BLKmode, op2);
for (i = 0; i < 8; i++)
{
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
op = gen_rtx_MEM (V2DImode,
plus_constant (Pmode, op1, (i * 16)));
emit_move_insn (xmm_regs[i], op);
}
emit_insn (GEN_FCN (icode) (op2));
if (target == 0)
target = gen_reg_rtx (QImode);
pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG),
const0_rtx);
emit_insn (gen_rtx_SET (target, pat));
for (i = 0; i < 8; i++)
{
op = gen_rtx_MEM (V2DImode,
plus_constant (Pmode, op0, (i * 16)));
emit_move_insn (op, xmm_regs[i]);
}
return target;
case IX86_BUILTIN_ENCODEKEY128U32:
{
rtx op, xmm_regs[7];
arg0 = CALL_EXPR_ARG (exp, 0); // unsigned int htype
arg1 = CALL_EXPR_ARG (exp, 1); // __m128i key
arg2 = CALL_EXPR_ARG (exp, 2); // void *h
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
if (!REG_P (op0))
op0 = copy_to_mode_reg (SImode, op0);
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
emit_move_insn (op, op1);
for (i = 0; i < 3; i++)
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
if (target == 0)
target = gen_reg_rtx (SImode);
emit_insn (gen_encodekey128u32 (target, op0));
for (i = 0; i < 3; i++)
{
op = gen_rtx_MEM (V2DImode,
plus_constant (Pmode, op2, (i * 16)));
emit_move_insn (op, xmm_regs[i]);
}
return target;
}
case IX86_BUILTIN_ENCODEKEY256U32:
{
rtx op, xmm_regs[7];
arg0 = CALL_EXPR_ARG (exp, 0); // unsigned int htype
arg1 = CALL_EXPR_ARG (exp, 1); // __m128i keylow
arg2 = CALL_EXPR_ARG (exp, 2); // __m128i keyhi
arg3 = CALL_EXPR_ARG (exp, 3); // void *h
op0 = expand_normal (arg0);
op1 = expand_normal (arg1);
op2 = expand_normal (arg2);
op3 = expand_normal (arg3);
if (!REG_P (op0))
op0 = copy_to_mode_reg (SImode, op0);
/* Force to use xmm0, xmm1 for keylow, keyhi*/
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
emit_move_insn (op, op1);
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (1));
emit_move_insn (op, op2);
for (i = 0; i < 4; i++)
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
if (target == 0)
target = gen_reg_rtx (SImode);
emit_insn (gen_encodekey256u32 (target, op0));
for (i = 0; i < 4; i++)
{
op = gen_rtx_MEM (V2DImode,
plus_constant (Pmode, op3, (i * 16)));
emit_move_insn (op, xmm_regs[i]);
}
return target;
}
case IX86_BUILTIN_VEC_INIT_V2SI:
case IX86_BUILTIN_VEC_INIT_V4HI:
case IX86_BUILTIN_VEC_INIT_V8QI:

View File

@ -214,7 +214,9 @@ static struct ix86_target_opts isa2_opts[] =
{ "-mamx-int8", OPTION_MASK_ISA2_AMX_INT8 },
{ "-mamx-bf16", OPTION_MASK_ISA2_AMX_BF16 },
{ "-muintr", OPTION_MASK_ISA2_UINTR },
{ "-mhreset", OPTION_MASK_ISA2_HRESET }
{ "-mhreset", OPTION_MASK_ISA2_HRESET },
{ "-mkl", OPTION_MASK_ISA2_KL },
{ "-mwidekl", OPTION_MASK_ISA2_WIDEKL }
};
static struct ix86_target_opts isa_opts[] =
{
@ -1035,6 +1037,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("cldemote", OPT_mcldemote),
IX86_ATTR_ISA ("uintr", OPT_muintr),
IX86_ATTR_ISA ("ptwrite", OPT_mptwrite),
IX86_ATTR_ISA ("kl", OPT_mkl),
IX86_ATTR_ISA ("widekl", OPT_mwidekl),
IX86_ATTR_ISA ("avx512bf16", OPT_mavx512bf16),
IX86_ATTR_ISA ("enqcmd", OPT_menqcmd),
IX86_ATTR_ISA ("serialize", OPT_mserialize),
@ -2339,6 +2343,12 @@ ix86_option_override_internal (bool main_args_p,
if (((processor_alias_table[i].flags & PTA_TSXLDTRK) != 0)
&& !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_TSXLDTRK))
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_TSXLDTRK;
if (((processor_alias_table[i].flags & PTA_KL) != 0)
&& !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_KL))
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_KL;
if (((processor_alias_table[i].flags & PTA_WIDEKL) != 0)
&& !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_WIDEKL))
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_WIDEKL;
if ((processor_alias_table[i].flags
& (PTA_PREFETCH_SSE | PTA_SSE)) != 0)

View File

@ -213,6 +213,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
#define TARGET_UINTR_P(x) TARGET_ISA2_UINTR_P(x)
#define TARGET_HRESET TARGET_ISA2_HRESET
#define TARGET_HRESET_P(x) TARGET_ISA2_HRESET_P(x)
#define TARGET_KL TARGET_ISA2_KL
#define TARGET_KL_P(x) TARGET_ISA2_KL_P(x)
#define TARGET_WIDEKL TARGET_ISA2_WIDEKL
#define TARGET_WIDEKL_P(x) TARGET_ISA2_WIDEKL_P(x)
#define TARGET_LP64 TARGET_ABI_64
#define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
@ -2485,6 +2489,8 @@ const wide_int_bitmask PTA_AMX_INT8(0, HOST_WIDE_INT_1U << 20);
const wide_int_bitmask PTA_AMX_BF16(0, HOST_WIDE_INT_1U << 21);
const wide_int_bitmask PTA_UINTR (0, HOST_WIDE_INT_1U << 22);
const wide_int_bitmask PTA_HRESET(0, HOST_WIDE_INT_1U << 23);
const wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24);
const wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25);
const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE
| PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR;
@ -2525,13 +2531,13 @@ const wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI
const wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT | PTA_PCONFIG
| PTA_WBNOINVD | PTA_CLWB;
const wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI
| PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT;
| PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT | PTA_KL | PTA_WIDEKL;
const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI
| PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE
| PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE
| PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR;
const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE
| PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET;
| PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL;
const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER
| PTA_AVX512F | PTA_AVX512CD;
const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;

View File

@ -1135,3 +1135,11 @@ Support AMX-BF16 built-in functions and code generation.
mhreset
Target Report Mask(ISA2_HRESET) Var(ix86_isa_flags2) Save
Support HRESET built-in functions and code generation.
mkl
Target Report Mask(ISA2_KL) Var(ix86_isa_flags2) Save
Support KL built-in functions and code generation.
mwidekl
Target Report Mask(ISA2_WIDEKL) Var(ix86_isa_flags2) Save
Support WIDEKL built-in functions and code generation.

View File

@ -118,4 +118,6 @@
#include <prfchwintrin.h>
#include <keylockerintrin.h>
#endif /* _IMMINTRIN_H_INCLUDED */

View File

@ -0,0 +1,129 @@
/* Copyright (C) 2018 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#if !defined _IMMINTRIN_H_INCLUDED
# error "Never use <keylockerintrin.h> directly; include <x86intrin.h> instead."
#endif
#ifndef _KEYLOCKERINTRIN_H_INCLUDED
#define _KEYLOCKERINTRIN_H_INCLUDED
#ifndef __KL__
#pragma GCC push_options
#pragma GCC target("kl")
#define __DISABLE_KL__
#endif /* __KL__ */
extern __inline
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadiwkey (unsigned int __I, __m128i __A, __m128i __B, __m128i __C)
{
__builtin_ia32_loadiwkey ((__v2di) __B, (__v2di) __C, (__v2di) __A, __I);
}
extern __inline
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_encodekey128_u32 (unsigned int __I, __m128i __A, void * __P)
{
return __builtin_ia32_encodekey128_u32 (__I, (__v2di)__A, __P);
}
extern __inline
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_encodekey256_u32 (unsigned int __I, __m128i __A, __m128i __B, void * __P)
{
return __builtin_ia32_encodekey256_u32 (__I, (__v2di)__A, (__v2di)__B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesdec128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
{
return __builtin_ia32_aesdec128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesdec256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
{
return __builtin_ia32_aesdec256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesenc128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
{
return __builtin_ia32_aesenc128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesenc256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
{
return __builtin_ia32_aesenc256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
}
#ifdef __DISABLE_KL__
#undef __DISABLE_KL__
#pragma GCC pop_options
#endif /* __DISABLE_KL__ */
#ifndef __WIDEKL__
#pragma GCC push_options
#pragma GCC target("widekl")
#define __DISABLE_WIDEKL__
#endif /* __WIDEKL__ */
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesdecwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
{
return __builtin_ia32_aesdecwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesdecwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
{
return __builtin_ia32_aesdecwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesencwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
{
return __builtin_ia32_aesencwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
}
extern __inline
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_aesencwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
{
return __builtin_ia32_aesencwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
}
#ifdef __DISABLE_WIDEKL__
#undef __DISABLE_WIDEKL__
#pragma GCC pop_options
#endif /* __DISABLE_WIDEKL__ */
#endif /* _KEYLOCKERINTRIN_H_INCLUDED */

View File

@ -1726,3 +1726,121 @@
}
return (i >= 12 && i <= 18);
})
;; Keylocker specific predicates
(define_predicate "encodekey128_operation"
(match_code "parallel")
{
unsigned i;
rtx elt;
if (XVECLEN (op, 0) != 8)
return false;
for(i = 0; i < 3; i++)
{
elt = XVECEXP (op, 0, i + 1);
if (GET_CODE (elt) != SET
|| GET_CODE (SET_DEST (elt)) != REG
|| GET_MODE (SET_DEST (elt)) != V2DImode
|| REGNO (SET_DEST (elt)) != GET_SSE_REGNO (i)
|| GET_CODE (SET_SRC (elt)) != UNSPEC_VOLATILE
|| GET_MODE (SET_SRC (elt)) != V2DImode
|| XVECLEN(SET_SRC (elt), 0) != 1
|| XVECEXP(SET_SRC (elt), 0, 0) != const0_rtx)
return false;
}
for(i = 4; i < 7; i++)
{
elt = XVECEXP (op, 0, i);
if (GET_CODE (elt) != SET
|| GET_CODE (SET_DEST (elt)) != REG
|| GET_MODE (SET_DEST (elt)) != V2DImode
|| REGNO (SET_DEST (elt)) != GET_SSE_REGNO (i)
|| SET_SRC (elt) != CONST0_RTX (V2DImode))
return false;
}
elt = XVECEXP (op, 0, 7);
if (GET_CODE (elt) != CLOBBER
|| GET_MODE (elt) != VOIDmode
|| GET_CODE (XEXP (elt, 0)) != REG
|| GET_MODE (XEXP (elt, 0)) != CCmode
|| REGNO (XEXP (elt, 0)) != FLAGS_REG)
return false;
return true;
})
(define_predicate "encodekey256_operation"
(match_code "parallel")
{
unsigned i;
rtx elt;
if (XVECLEN (op, 0) != 9)
return false;
elt = SET_SRC (XVECEXP (op, 0, 0));
elt = XVECEXP (elt, 0, 2);
if (!REG_P (elt)
|| REGNO(elt) != GET_SSE_REGNO (1))
return false;
for(i = 0; i < 4; i++)
{
elt = XVECEXP (op, 0, i + 1);
if (GET_CODE (elt) != SET
|| GET_CODE (SET_DEST (elt)) != REG
|| GET_MODE (SET_DEST (elt)) != V2DImode
|| REGNO (SET_DEST (elt)) != GET_SSE_REGNO (i)
|| GET_CODE (SET_SRC (elt)) != UNSPEC_VOLATILE
|| GET_MODE (SET_SRC (elt)) != V2DImode
|| XVECLEN(SET_SRC (elt), 0) != 1
|| XVECEXP(SET_SRC (elt), 0, 0) != const0_rtx)
return false;
}
for(i = 4; i < 7; i++)
{
elt = XVECEXP (op, 0, i + 1);
if (GET_CODE (elt) != SET
|| GET_CODE (SET_DEST (elt)) != REG
|| GET_MODE (SET_DEST (elt)) != V2DImode
|| REGNO (SET_DEST (elt)) != GET_SSE_REGNO (i)
|| SET_SRC (elt) != CONST0_RTX (V2DImode))
return false;
}
elt = XVECEXP (op, 0, 8);
if (GET_CODE (elt) != CLOBBER
|| GET_MODE (elt) != VOIDmode
|| GET_CODE (XEXP (elt, 0)) != REG
|| GET_MODE (XEXP (elt, 0)) != CCmode
|| REGNO (XEXP (elt, 0)) != FLAGS_REG)
return false;
return true;
})
(define_predicate "aeswidekl_operation"
(match_code "parallel")
{
unsigned i;
rtx elt;
for (i = 0; i < 8; i++)
{
elt = XVECEXP (op, 0, i + 1);
if (GET_CODE (elt) != SET
|| GET_CODE (SET_DEST (elt)) != REG
|| GET_MODE (SET_DEST (elt)) != V2DImode
|| REGNO (SET_DEST (elt)) != GET_SSE_REGNO (i)
|| GET_CODE (SET_SRC (elt)) != UNSPEC_VOLATILE
|| GET_MODE (SET_SRC (elt)) != V2DImode
|| XVECLEN (SET_SRC (elt), 0) != 1
|| REGNO (XVECEXP (SET_SRC (elt), 0, 0)) != GET_SSE_REGNO (i))
return false;
}
return true;
})

View File

@ -205,6 +205,19 @@
UNSPECV_MWAIT
UNSPECV_VZEROALL
UNSPECV_VZEROUPPER
;; For KEYLOCKER
UNSPECV_LOADIWKEY
UNSPECV_AESDEC128KLU8
UNSPECV_AESENC128KLU8
UNSPECV_AESDEC256KLU8
UNSPECV_AESENC256KLU8
UNSPECV_AESDECWIDE128KLU8
UNSPECV_AESENCWIDE128KLU8
UNSPECV_AESDECWIDE256KLU8
UNSPECV_AESENCWIDE256KLU8
UNSPECV_ENCODEKEY128U32
UNSPECV_ENCODEKEY256U32
])
;; All vector modes including V?TImode, used in move patterns.
@ -23316,3 +23329,208 @@
(match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
"TARGET_AVX512BF16"
"vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
;; KEYLOCKER
(define_insn "loadiwkey"
[(unspec_volatile:V2DI [(match_operand:V2DI 0 "register_operand" "v")
(match_operand:V2DI 1 "register_operand" "v")
(match_operand:V2DI 2 "register_operand" "Yz")
(match_operand:SI 3 "register_operand" "a")]
UNSPECV_LOADIWKEY)
(clobber (reg:CC FLAGS_REG))]
"TARGET_KL"
"loadiwkey\t{%0, %1|%1, %0}"
[(set_attr "type" "other")])
(define_expand "encodekey128u32"
[(match_par_dup 2
[(set (match_operand:SI 0 "register_operand")
(unspec_volatile:SI
[(match_operand:SI 1 "register_operand")
(reg:V2DI XMM0_REG)]
UNSPECV_ENCODEKEY128U32))])]
"TARGET_KL"
{
rtx xmm_regs[7];
rtx tmp_unspec;
unsigned i;
/* parallel rtx for encodekey128 predicate */
operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (8));
for (i = 0; i < 7; i++)
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
tmp_unspec
= gen_rtx_UNSPEC_VOLATILE (SImode,
gen_rtvec (2, operands[1], xmm_regs[0]),
UNSPECV_ENCODEKEY128U32);
XVECEXP (operands[2], 0, 0)
= gen_rtx_SET (operands[0], tmp_unspec);
tmp_unspec
= gen_rtx_UNSPEC_VOLATILE (V2DImode,
gen_rtvec (1, const0_rtx),
UNSPECV_ENCODEKEY128U32);
for (i = 0; i < 3; i++)
XVECEXP (operands[2], 0, i + 1)
= gen_rtx_SET (xmm_regs[i], tmp_unspec);
for (i = 4; i < 7; i++)
XVECEXP (operands[2], 0, i)
= gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
XVECEXP (operands[2], 0, 7)
= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
})
(define_insn "*encodekey128u32"
[(match_parallel 2 "encodekey128_operation"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec_volatile:SI
[(match_operand:SI 1 "register_operand" "r")
(reg:V2DI XMM0_REG)]
UNSPECV_ENCODEKEY128U32))])]
"TARGET_KL"
"encodekey128\t{%1, %0|%0, %1}"
[(set_attr "type" "other")])
(define_expand "encodekey256u32"
[(match_par_dup 2
[(set (match_operand:SI 0 "register_operand")
(unspec_volatile:SI
[(match_operand:SI 1 "register_operand")
(reg:V2DI XMM0_REG)
(reg:V2DI XMM1_REG)]
UNSPECV_ENCODEKEY256U32))])]
"TARGET_KL"
{
rtx xmm_regs[7];
rtx tmp_unspec;
unsigned i;
/* parallel rtx for encodekey256 predicate */
operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
for (i = 0; i < 7; i++)
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
tmp_unspec
= gen_rtx_UNSPEC_VOLATILE (SImode,
gen_rtvec (3, operands[1],
xmm_regs[0], xmm_regs[1]),
UNSPECV_ENCODEKEY256U32);
XVECEXP (operands[2], 0, 0)
= gen_rtx_SET (operands[0], tmp_unspec);
tmp_unspec
= gen_rtx_UNSPEC_VOLATILE (V2DImode,
gen_rtvec (1, const0_rtx),
UNSPECV_ENCODEKEY256U32);
for (i = 0; i < 4; i++)
XVECEXP (operands[2], 0, i + 1)
= gen_rtx_SET (xmm_regs[i], tmp_unspec);
for (i = 4; i < 7; i++)
XVECEXP (operands[2], 0, i + 1)
= gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
XVECEXP (operands[2], 0, 8)
= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
})
(define_insn "*encodekey256u32"
[(match_parallel 2 "encodekey256_operation"
[(set (match_operand:SI 0 "register_operand" "=r")
(unspec_volatile:SI
[(match_operand:SI 1 "register_operand" "r")
(reg:V2DI XMM0_REG)
(reg:V2DI XMM1_REG)]
UNSPECV_ENCODEKEY256U32))])]
"TARGET_KL"
"encodekey256\t{%1, %0|%0, %1}"
[(set_attr "type" "other")])
(define_int_iterator AESDECENCKL [UNSPECV_AESDEC128KLU8 UNSPECV_AESDEC256KLU8
UNSPECV_AESENC128KLU8 UNSPECV_AESENC256KLU8])
(define_int_attr aesklvariant [(UNSPECV_AESDEC128KLU8 "dec128kl")
(UNSPECV_AESDEC256KLU8 "dec256kl")
(UNSPECV_AESENC128KLU8 "enc128kl")
(UNSPECV_AESENC256KLU8 "enc256kl")])
(define_insn "aes<aesklvariant>u8"
[(set (match_operand:V2DI 0 "register_operand" "=v")
(unspec_volatile:V2DI [(match_operand:V2DI 1 "register_operand" "0")
(match_operand:BLK 2 "memory_operand" "m")]
AESDECENCKL))
(set (reg:CCZ FLAGS_REG)
(unspec_volatile:CCZ [(match_dup 1) (match_dup 2)] AESDECENCKL))]
"TARGET_KL"
"aes<aesklvariant>\t{%2, %0|%0, %2}"
[(set_attr "type" "other")])
(define_int_iterator AESDECENCWIDEKL [UNSPECV_AESDECWIDE128KLU8 UNSPECV_AESDECWIDE256KLU8
UNSPECV_AESENCWIDE128KLU8 UNSPECV_AESENCWIDE256KLU8])
(define_int_attr aeswideklvariant [(UNSPECV_AESDECWIDE128KLU8 "decwide128kl")
(UNSPECV_AESDECWIDE256KLU8 "decwide256kl")
(UNSPECV_AESENCWIDE128KLU8 "encwide128kl")
(UNSPECV_AESENCWIDE256KLU8 "encwide256kl")])
(define_int_attr AESWIDEKLVARIANT [(UNSPECV_AESDECWIDE128KLU8 "AESDECWIDE128KLU8")
(UNSPECV_AESDECWIDE256KLU8 "AESDECWIDE256KLU8")
(UNSPECV_AESENCWIDE128KLU8 "AESENCWIDE128KLU8")
(UNSPECV_AESENCWIDE256KLU8 "AESENCWIDE256KLU8")])
(define_expand "aes<aeswideklvariant>u8"
[(match_par_dup 1
[(set (reg:CCZ FLAGS_REG)
(unspec_volatile:CCZ
[(match_operand:BLK 0 "memory_operand")]
AESDECENCWIDEKL))])]
"TARGET_WIDEKL"
{
rtx xmm_regs[8];
rtx tmp_unspec;
unsigned i;
/* parallel rtx for widekl predicate */
operands[1] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
for (i = 0; i < 8; i++)
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
tmp_unspec
= gen_rtx_UNSPEC_VOLATILE (CCZmode,
gen_rtvec (1, operands[0]),
UNSPECV_<AESWIDEKLVARIANT>);
XVECEXP (operands[1], 0, 0)
= gen_rtx_SET (gen_rtx_REG (CCZmode, FLAGS_REG),
tmp_unspec);
for (i = 0; i < 8; i++)
{
tmp_unspec
= gen_rtx_UNSPEC_VOLATILE (V2DImode,
gen_rtvec (1, xmm_regs[i]),
UNSPECV_<AESWIDEKLVARIANT>);
XVECEXP (operands[1], 0, i + 1)
= gen_rtx_SET (xmm_regs[i], tmp_unspec);
}
})
(define_insn "*aes<aeswideklvariant>u8"
[(match_parallel 1 "aeswidekl_operation"
[(set (reg:CCZ FLAGS_REG)
(unspec_volatile:CCZ
[(match_operand:BLK 0 "memory_operand" "m")]
AESDECENCWIDEKL))])]
"TARGET_WIDEKL"
"aes<aeswideklvariant>\t{%0}"
[(set_attr "type" "other")])

View File

@ -6652,6 +6652,16 @@ Enable/disable the generation of the UINTR instructions.
@cindex @code{target("hreset")} function attribute, x86
Enable/disable the generation of the HRESET instruction.
@item kl
@itemx no-kl
@cindex @code{target("kl")} function attribute, x86
Enable/disable the generation of the KEYLOCKER instructions.
@item widekl
@itemx no-widekl
@cindex @code{target("widekl")} function attribute, x86
Enable/disable the generation of the WIDEKL instructions.
@item cld
@itemx no-cld
@cindex @code{target("cld")} function attribute, x86

View File

@ -1369,6 +1369,7 @@ See RS/6000 and PowerPC Options.
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset@gol
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mkl -mwidekl @gol
-mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
-m96bit-long-double -mlong-double-64 -mlong-double-80 -mlong-double-128 @gol
@ -30365,6 +30366,11 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@need 200
@itemx -mhreset
@opindex mhreset
@itemx -mkl
@opindex mkl
@need 200
@itemx -mwidekl
@opindex mwidekl
These switches enable the use of instructions in the MMX, SSE,
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
@ -30374,8 +30380,9 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP,
XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
UINTR, HRESET or CLDEMOTE extended instruction sets. Each has a corresponding
@option{-mno-} option to disable use of these instructions.
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL or CLDEMOTE extended
instruction sets. Each has a corresponding @option{-mno-} option to disable
use of these instructions.
These extensions are also available as built-in functions: see
@ref{x86 Built-in Functions}, for details of the functions enabled and

View File

@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,

View File

@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,

View File

@ -76,6 +76,8 @@ extern void test_amx_int8 (void) __attribute__((__target__("amx-int8")));
extern void test_amx_bf16 (void) __attribute__((__target__("amx-bf16")));
extern void test_uintr (void) __attribute__((__target__("uintr")));
extern void test_hreset (void) __attribute__((__target__("hreset")));
extern void test_keylocker (void) __attribute__((__target__("kl")));
extern void test_widekl (void) __attribute__((__target__("widekl")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
@ -153,6 +155,8 @@ extern void test_no_amx_int8 (void) __attribute__((__target__("no-amx-int8")));
extern void test_no_amx_bf16 (void) __attribute__((__target__("no-amx-bf16")));
extern void test_no_uintr (void) __attribute__((__target__("no-uintr")));
extern void test_no_hreset (void) __attribute__((__target__("no-hreset")));
extern void test_no_keylocker (void) __attribute__((__target__("no-kl")));
extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-mkl -O2" } */
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k2\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "aesdec128kl\[ \\t\]+\[^\n\]*h1\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "sete" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*k1" } } */
#include <immintrin.h>
__m128i k1, k2;
const char h1[48];
unsigned char
test_keylocker_1 (void)
{
return _mm_aesdec128kl_u8 (&k1, k2, h1);
}

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-mkl -O2" } */
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k2\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "aesdec256kl\[ \\t\]+\[^\n\]*h1\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "sete" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*k1" } } */
#include <immintrin.h>
__m128i k1, k2;
const char h1[48];
unsigned char
test_keylocker_3 (void)
{
return _mm_aesdec256kl_u8 (&k1, k2, h1);
}

View File

@ -0,0 +1,32 @@
/* { dg-do compile } */
/* { dg-options "-mwidekl -O2" } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+16(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+32(\\(%rip\\))?\[^\n\r]*%xmm2" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+48(\\(%rip\\))?\[^\n\r]*%xmm3" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+64(\\(%rip\\))?\[^\n\r]*%xmm4" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+80(\\(%rip\\))?\[^\n\r]*%xmm5" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+96(\\(%rip\\))?\[^\n\r]*%xmm6" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+112(\\(%rip\\))?\[^\n\r]*%xmm7" } } */
/* { dg-final { scan-assembler "aesdecwide128kl\[ \\t\]+\[^\n\]*h1" } } */
/* { dg-final { scan-assembler "sete" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*odata(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*odata\\+16(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*odata\\+32(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm3\[^\n\r]*odata\\+48(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm4\[^\n\r]*odata\\+64(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm5\[^\n\r]*odata\\+80(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm6\[^\n\r]*odata\\+96(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm7\[^\n\r]*odata\\+112(\\(%rip\\))?" } } */
#include <immintrin.h>
const char h1[48];
const __m128i idata[8];
__m128i odata[8];
unsigned char
test_keylocker_5 (void)
{
return _mm_aesdecwide128kl_u8 (odata, idata, h1);
}

View File

@ -0,0 +1,32 @@
/* { dg-do compile } */
/* { dg-options "-mwidekl -O2" } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+16(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+32(\\(%rip\\))?\[^\n\r]*%xmm2" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+48(\\(%rip\\))?\[^\n\r]*%xmm3" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+64(\\(%rip\\))?\[^\n\r]*%xmm4" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+80(\\(%rip\\))?\[^\n\r]*%xmm5" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+96(\\(%rip\\))?\[^\n\r]*%xmm6" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+112(\\(%rip\\))?\[^\n\r]*%xmm7" } } */
/* { dg-final { scan-assembler "aesdecwide256kl\[ \\t\]+\[^\n\]*h1" } } */
/* { dg-final { scan-assembler "sete" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*odata(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*odata\\+16(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*odata\\+32(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm3\[^\n\r]*odata\\+48(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm4\[^\n\r]*odata\\+64(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm5\[^\n\r]*odata\\+80(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm6\[^\n\r]*odata\\+96(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm7\[^\n\r]*odata\\+112(\\(%rip\\))?" } } */
#include <immintrin.h>
const char h1[48];
const __m128i idata[8];
__m128i odata[8];
unsigned char
test_keylocker_6 (void)
{
return _mm_aesdecwide256kl_u8 (odata, idata, h1);
}

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-mkl -O2" } */
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k2\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "aesenc128kl\[ \\t\]+\[^\n\]*h1\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "sete" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*k1" } } */
#include <immintrin.h>
__m128i k1, k2;
const char h1[48];
unsigned char
test_keylocker_2 (void)
{
return _mm_aesenc128kl_u8 (&k1, k2, h1);
}

View File

@ -0,0 +1,32 @@
/* { dg-do compile } */
/* { dg-options "-mwidekl -O2" } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+16(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+32(\\(%rip\\))?\[^\n\r]*%xmm2" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+48(\\(%rip\\))?\[^\n\r]*%xmm3" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+64(\\(%rip\\))?\[^\n\r]*%xmm4" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+80(\\(%rip\\))?\[^\n\r]*%xmm5" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+96(\\(%rip\\))?\[^\n\r]*%xmm6" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+112(\\(%rip\\))?\[^\n\r]*%xmm7" } } */
/* { dg-final { scan-assembler "aesencwide128kl\[ \\t\]+\[^\n\]*h1(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "sete" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*odata(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*odata\\+16(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*odata\\+32(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm3\[^\n\r]*odata\\+48(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm4\[^\n\r]*odata\\+64(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm5\[^\n\r]*odata\\+80(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm6\[^\n\r]*odata\\+96(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm7\[^\n\r]*odata\\+112(\\(%rip\\))?" } } */
#include <immintrin.h>
const char h1[48];
const __m128i idata[8];
__m128i odata[8];
unsigned char
test_keylocker_7 (void)
{
return _mm_aesencwide128kl_u8 (odata, idata, h1);
}

View File

@ -0,0 +1,33 @@
/* { dg-do compile } */
/* { dg-options "-mwidekl -O2" } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+16(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+32(\\(%rip\\))?\[^\n\r]*%xmm2" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+48(\\(%rip\\))?\[^\n\r]*%xmm3" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+64(\\(%rip\\))?\[^\n\r]*%xmm4" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+80(\\(%rip\\))?\[^\n\r]*%xmm5" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+96(\\(%rip\\))?\[^\n\r]*%xmm6" } } */
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+112(\\(%rip\\))?\[^\n\r]*%xmm7" } } */
/* { dg-final { scan-assembler "aesencwide256kl\[ \\t\]+\[^\n\]*h1(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "sete" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*odata(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*odata\\+16(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*odata\\+32(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm3\[^\n\r]*odata\\+48(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm4\[^\n\r]*odata\\+64(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm5\[^\n\r]*odata\\+80(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm6\[^\n\r]*odata\\+96(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm7\[^\n\r]*odata\\+112(\\(%rip\\))?" } } */
#include <immintrin.h>
const char h1[48];
const __m128i idata[8];
__m128i odata[8];
unsigned char
test_keylocker_8 (void)
{
return _mm_aesencwide256kl_u8 (odata, idata, h1);
}

View File

@ -0,0 +1,29 @@
/* { dg-do compile } */
/* { dg-options "-mkl -O2" } */
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k1(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "movl\[ \\t\]+\[^\n\]*ctrl(\\(%rip\\))?\[^\n\r]*%eax" } } */
/* { dg-final { scan-assembler "encodekey128\[ \\t\]+\[^\n\]*%eax\[^\n\r]*%eax" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*h2(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*h2\\+16(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*h2\\+32(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqa|movaps)\[ \\t\]+\[^\n\]*%xmm\[4-6\]\[^\n\r]*k2(\\(%rip\\))?" } } */
#include <immintrin.h>
unsigned int ctrl;
char h2[48];
__m128i k1, k2;
unsigned int
test_keylocker_9 (void)
{
unsigned int ret;
ret = _mm_encodekey128_u32 (ctrl, k1, h2);
if (ret)
k2 = (__m128i){0};
return ret;
}

View File

@ -0,0 +1,30 @@
/* { dg-do compile } */
/* { dg-options "-mkl -O2" } */
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k1(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k2(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
/* { dg-final { scan-assembler "movl\[ \\t\]+\[^\n\]*ctrl(\\(%rip\\))?\[^\n\r]*%eax" } } */
/* { dg-final { scan-assembler "encodekey256\[ \\t\]+\[^\n\]*%eax\[^\n\r]*%eax" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*h2(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*h2\\+16(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*h2\\+32(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm3\[^\n\r]*h2\\+48(\\(%rip\\))?" } } */
/* { dg-final { scan-assembler "(?:movdqa|movaps)\[ \\t\]+\[^\n\]*%xmm\[4-6\]\[^\n\r]*k3(\\(%rip\\))?" } } */
#include <immintrin.h>
unsigned int ctrl;
char h2[48];
__m128i k1, k2, k3;
unsigned int
test_keylocker_10 (void)
{
unsigned int ret;
ret = _mm_encodekey256_u32 (ctrl, k1, k2, h2);
if (ret)
k3 = (__m128i){0};
return ret;
}

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-mkl -O2" } */
/* { dg-final { scan-assembler "movl\[ \\t\]+\[^\n\]*ctrl(\\(%rip\\))?\[^\n\r]*%eax" } } */
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k2(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k3(\\(%rip\\))?\[^\n\r]*%xmm2" } } */
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k1(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
/* { dg-final { scan-assembler "loadiwkey\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*%xmm2" } } */
#include <immintrin.h>
unsigned int ctrl;
__m128i k1, k2, k3;
void
test_keylocker_11 (void)
{
_mm_loadiwkey (ctrl, k1, k2, k3);
}

View File

@ -3,7 +3,7 @@
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
#include <x86intrin.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>

View File

@ -103,7 +103,7 @@
#ifndef DIFFERENT_PRAGMAS
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl")
#endif
/* Following intrinsics require immediate arguments. They
@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)

View File

@ -708,6 +708,6 @@
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl")
#include <x86intrin.h>