mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-06 04:00:25 +08:00
Enable GCC to support Intel Key Locker ISA
gcc/ChangeLog 2018-12-15 Xuepeng Guo <xuepeng.guo@intel.com> Hongyu Wang <hongyu.wang@intel.com> Hongtao Liu <hongtao.liu@intel.com> * common/config/i386/cpuinfo.h (get_available_features): Detect KL, AESKLE and WIDEKL features. * common/config/i386/i386-common.c (OPTION_MASK_ISA_KL_SET): New. (OPTION_MASK_ISA_WIDEKL_SET): Likewise. (OPTION_MASK_ISA_KL_UNSET): Likewise. (OPTION_MASK_ISA_WIDEKL_UNSET): Likewise. (OPTION_MASK_ISA2_AVX2_UNSET): Likewise. (OPTION_MASK_ISA2_AVX_UNSET): Likewise. (OPTION_MASK_ISA2_SSE4_2_UNSET): Likewise. (OPTION_MASK_ISA2_SSE4_1_UNSET): Likewise. (OPTION_MASK_ISA2_SSE4_UNSET): Likewise. (OPTION_MASK_ISA2_SSSE3_UNSET): Likewise. (OPTION_MASK_ISA2_SSE3_UNSET): Likewise. (OPTION_MASK_ISA2_SSE2_UNSET): Likewise. (OPTION_MASK_ISA2_SSE_UNSET): Likewise. (ix86_handle_option): Handle kl and widekl, add dependency chain for KL and SSE2. * common/config/i386/i386-cpuinfo.h (enum processor_features): (FEATURE_KL, FEATURE_AESKLE, FEATURE_WIDEKL): New. * common/config/i386/i386-isas.h: Add ISA_NAMES_TABLE_ENTRY for KL, AESKLE and WIDEKL. * config.gcc: Add keylockerintrin.h. * doc/invoke.texi: Document new option -mkl and -mwidekl. * doc/extend.texi: Document kl and widekl. * config/i386/cpuid.h (bit_KL, bit_AESKLE, bit_WIDEKL): New. * config/i386/i386-builtin-types.def ((UINT, UINT, V2DI, V2DI, PVOID), (UINT, UINT, V2DI, PVOID), (VOID, V2DI, V2DI, V2DI, UINT), (UINT8, PV2DI, V2DI, PCVOID), (UINT8, PV2DI, PCV2DI, PCVOID)): New function types. * config/i386/i386-builtin.def: Add __builtin_ia32_loadiwkey, __builtin_ia32_aesdec128kl_u8, __builtin_ia32_aesdec256kl_u8, __builtin_ia32_aesenc128kl_u8, __builtin_ia32_aesenc256kl_u8, __builtin_ia32_aesdecwide128kl_u8, __builtin_ia32_aesdecwide256kl_u8, __builtin_ia32_aesencwide128kl_u8, __builtin_ia32_aesencwide256kl_u8, __builtin_ia32_encodekey128_u32, __builtin_ia32_encodekey256_u32. * config/i386/i386-c.c (ix86_target_macros_internal): Handle kl and widekl. * config/i386/i386-options.c (isa2_opts): Add -mkl and -mwidekl. (ix86_option_override_internal): Handle KL and WIDEKL. (ix86_valid_target_attribute_inner_p): Add attribute for kl and widekl. * config/i386/i386-expand.c (ix86_expand_builtin): Expand Keylocker Builtins. * config/i386/i386.h (TARGET_KL): New. (TARGET_KL_P): Likewise. (TARGET_WIDEKL): Likewise. (TARGET_WIDEKL_P): Likewise. (PTA_KL): Likewise. (PTA_WIDEKL): Likewise. (PTA_TIGERLAKE): Add PTA_KL, PTA_WIDEKL. (PTA_ALDERLAKE): Likewise. * config/i386/i386.opt: Add new option mkl and mwidekl. * config/i386/keylockerintrin.h: New header file for Keylocker. * config/i386/immintrin.h: Include keylockerintrin.h. * config/i386/predicates.md (encodekey128_operation): New predicate. (encodekey256_operation): Likewise. (aeswidekl_operation): Likewise. * config/i386/sse.md (UNSPECV_LOADIWKEY): New. (UNSPECV_AESDEC128KLU8): Likewise. (UNSPECV_AESENC128KLU8): Likewise. (UNSPECV_AESDEC256KLU8): Likewise. (UNSPECV_AESENC256KLU8): Likewise. (UNSPECV_AESDECWIDE128KLU8): Likewise. (UNSPECV_AESENCWIDE128KLU8): Likewise. (UNSPECV_AESDECWIDE256KLU8): Likewise. (UNSPECV_AESENCWIDE256KLU8): Likewise. (UNSPECV_ENCODEKEY128U32): Likewise. (UNSPECV_ENCODEKEY256U32): Likewise. (encodekey128u32): New expander. (encodekey256u32): Likewise. (aes<aeswideklvariant>u8): Likewise. (loadiwkey): New insn pattern. (*encodekey128u32): Likewise. (*encodekey256u32): Likewise. (aes<aesklvariant>u8): Likewise. (*aes<aeswideklvariant>u8): Likewise. gcc/testsuite/ChangeLog * gcc.target/i386/keylocker-aesdec128kl.c: New test. * gcc.target/i386/keylocker-aesdec256kl.c: Likewise. * gcc.target/i386/keylocker-aesdecwide128kl.c: Likewise. * gcc.target/i386/keylocker-aesdecwide256kl.c: Likewise. * gcc.target/i386/keylocker-aesenc128kl.c: Likewise. * gcc.target/i386/keylocker-aesencwide128kl.c: Likewise. * gcc.target/i386/keylocker-aesencwide256kl.c: Likewise. * gcc.target/i386/keylocker-encodekey128.c: Likewise. * gcc.target/i386/keylocker-encodekey256.c: Likewise. * gcc.target/i386/keylocker-loadiwkey.c: Likewise. * g++.dg/other/i386-2.C: Add -mkl and -mwidekl. * g++.dg/other/i386-3.C: Likewise. * gcc.target/i386/sse-12.c: Likewise. * gcc.target/i386/sse-13.c: Likewise. * gcc.target/i386/sse-14.c: Likewise. * gcc.target/i386/sse-22.c: Add kl and widekl. * gcc.target/i386/sse-23.c: Likewise. * gcc.target/i386/funcspec-56.inc: Add new target attribute test.
This commit is contained in:
parent
1e1e1edf88
commit
632a2f50b8
@ -523,6 +523,8 @@ get_available_features (struct __processor_model *cpu_model,
|
||||
int avx_usable = 0;
|
||||
int avx512_usable = 0;
|
||||
int amx_usable = 0;
|
||||
/* Check if KL is usable. */
|
||||
int has_kl = 0;
|
||||
if ((ecx & bit_OSXSAVE))
|
||||
{
|
||||
/* Check if XMM, YMM, OPMASK, upper 256 bits of ZMM0-ZMM15 and
|
||||
@ -667,6 +669,8 @@ get_available_features (struct __processor_model *cpu_model,
|
||||
if (edx & bit_AMX_BF16)
|
||||
set_feature (FEATURE_AMX_BF16);
|
||||
}
|
||||
if (ecx & bit_KL)
|
||||
has_kl = 1;
|
||||
if (avx512_usable)
|
||||
{
|
||||
if (ebx & bit_AVX512F)
|
||||
@ -733,6 +737,21 @@ get_available_features (struct __processor_model *cpu_model,
|
||||
set_feature (FEATURE_PTWRITE);
|
||||
}
|
||||
|
||||
/* Get Advanced Features at level 0x19 (eax = 0x19). */
|
||||
if (max_cpuid_level >= 0x19)
|
||||
{
|
||||
set_feature (FEATURE_AESKLE);
|
||||
__cpuid (19, eax, ebx, ecx, edx);
|
||||
/* Check if OS support keylocker. */
|
||||
if (ebx & bit_AESKLE)
|
||||
{
|
||||
if (ebx & bit_WIDEKL)
|
||||
set_feature (FEATURE_WIDEKL);
|
||||
if (has_kl)
|
||||
set_feature (FEATURE_KL);
|
||||
}
|
||||
}
|
||||
|
||||
/* Check cpuid level of extended features. */
|
||||
__cpuid (0x80000000, ext_level, ebx, ecx, edx);
|
||||
|
||||
|
@ -165,6 +165,9 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_TSXLDTRK_SET OPTION_MASK_ISA2_TSXLDTRK
|
||||
#define OPTION_MASK_ISA2_UINTR_SET OPTION_MASK_ISA2_UINTR
|
||||
#define OPTION_MASK_ISA2_HRESET_SET OPTION_MASK_ISA2_HRESET
|
||||
#define OPTION_MASK_ISA2_KL_SET OPTION_MASK_ISA2_KL
|
||||
#define OPTION_MASK_ISA2_WIDEKL_SET \
|
||||
(OPTION_MASK_ISA2_WIDEKL | OPTION_MASK_ISA2_KL_SET)
|
||||
|
||||
/* Define a set of ISAs which aren't available when a given ISA is
|
||||
disabled. MMX and SSE ISAs are handled separately. */
|
||||
@ -258,6 +261,9 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_AMX_BF16_UNSET OPTION_MASK_ISA2_AMX_BF16
|
||||
#define OPTION_MASK_ISA2_UINTR_UNSET OPTION_MASK_ISA2_UINTR
|
||||
#define OPTION_MASK_ISA2_HRESET_UNSET OPTION_MASK_ISA2_HRESET
|
||||
#define OPTION_MASK_ISA2_KL_UNSET \
|
||||
(OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
|
||||
#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
|
||||
as -mno-sse4.1. */
|
||||
@ -304,6 +310,16 @@ along with GCC; see the file COPYING3. If not see
|
||||
| OPTION_MASK_ISA2_AVX512VP2INTERSECT_UNSET)
|
||||
#define OPTION_MASK_ISA2_GENERAL_REGS_ONLY_UNSET \
|
||||
(OPTION_MASK_ISA2_AVX512F_UNSET)
|
||||
#define OPTION_MASK_ISA2_AVX2_UNSET OPTION_MASK_ISA2_AVX512F_UNSET
|
||||
#define OPTION_MASK_ISA2_AVX_UNSET OPTION_MASK_ISA2_AVX2_UNSET
|
||||
#define OPTION_MASK_ISA2_SSE4_2_UNSET OPTION_MASK_ISA2_AVX_UNSET
|
||||
#define OPTION_MASK_ISA2_SSE4_1_UNSET OPTION_MASK_ISA2_SSE4_2_UNSET
|
||||
#define OPTION_MASK_ISA2_SSE4_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
|
||||
#define OPTION_MASK_ISA2_SSSE3_UNSET OPTION_MASK_ISA2_SSE4_1_UNSET
|
||||
#define OPTION_MASK_ISA2_SSE3_UNSET OPTION_MASK_ISA2_SSSE3_UNSET
|
||||
#define OPTION_MASK_ISA2_SSE2_UNSET \
|
||||
(OPTION_MASK_ISA2_SSE3_UNSET | OPTION_MASK_ISA2_KL_UNSET)
|
||||
#define OPTION_MASK_ISA2_SSE_UNSET OPTION_MASK_ISA2_SSE2_UNSET
|
||||
|
||||
#define OPTION_MASK_ISA2_AVX512BW_UNSET OPTION_MASK_ISA2_AVX512BF16_UNSET
|
||||
|
||||
@ -399,8 +415,8 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -414,8 +430,8 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE2_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE2_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -429,8 +445,8 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE3_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE3_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -444,8 +460,8 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSSE3_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSSE3_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -459,8 +475,8 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_1_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_1_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -474,8 +490,8 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_2_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_2_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -489,8 +505,8 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -504,8 +520,8 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
{
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_AVX2_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX2_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX2_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
@ -691,6 +707,40 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_ENQCMD_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_ENQCMD_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mkl:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_KL_SET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_KL_SET;
|
||||
|
||||
/* The Keylocker instructions need XMM registers from SSE2. */
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_KL_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_KL_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mwidekl:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_WIDEKL_SET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_WIDEKL_SET;
|
||||
|
||||
/* The Widekl instructions need XMM registers from SSE2. */
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_WIDEKL_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_WIDEKL_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mserialize:
|
||||
@ -1043,8 +1093,8 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
case OPT_mno_sse4:
|
||||
opts->x_ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_AVX512F_UNSET;
|
||||
opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_SSE4_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA2_SSE4_UNSET;
|
||||
return true;
|
||||
|
||||
case OPT_msse4a:
|
||||
|
@ -221,6 +221,9 @@ enum processor_features
|
||||
FEATURE_AMX_BF16,
|
||||
FEATURE_UINTR,
|
||||
FEATURE_HRESET,
|
||||
FEATURE_KL,
|
||||
FEATURE_AESKLE,
|
||||
FEATURE_WIDEKL,
|
||||
CPU_FEATURE_MAX
|
||||
};
|
||||
|
||||
|
@ -165,4 +165,7 @@ ISA_NAMES_TABLE_START
|
||||
ISA_NAMES_TABLE_ENTRY("amx-bf16", FEATURE_AMX_BF16, P_NONE, "-mamx-bf16")
|
||||
ISA_NAMES_TABLE_ENTRY("uintr", FEATURE_UINTR, P_NONE, "-muintr")
|
||||
ISA_NAMES_TABLE_ENTRY("hreset", FEATURE_HRESET, P_NONE, "-mhreset")
|
||||
ISA_NAMES_TABLE_ENTRY("kl", FEATURE_KL, P_NONE, "-mkl")
|
||||
ISA_NAMES_TABLE_ENTRY("aeskle", FEATURE_AESKLE, P_NONE, NULL)
|
||||
ISA_NAMES_TABLE_ENTRY("widekl", FEATURE_WIDEKL, P_NONE, "-mwidekl")
|
||||
ISA_NAMES_TABLE_END
|
||||
|
@ -414,7 +414,7 @@ i[34567]86-*-*)
|
||||
avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
|
||||
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
|
||||
amxbf16intrin.h x86gprintrin.h uintrintrin.h
|
||||
hresetintrin.h"
|
||||
hresetintrin.h keylockerintrin.h"
|
||||
;;
|
||||
x86_64-*-*)
|
||||
cpu_type=i386
|
||||
@ -451,7 +451,7 @@ x86_64-*-*)
|
||||
avx512vp2intersectintrin.h avx512vp2intersectvlintrin.h
|
||||
tsxldtrkintrin.h amxtileintrin.h amxint8intrin.h
|
||||
amxbf16intrin.h x86gprintrin.h uintrintrin.h
|
||||
hresetintrin.h"
|
||||
hresetintrin.h keylockerintrin.h"
|
||||
;;
|
||||
ia64-*-*)
|
||||
extra_headers=ia64intrin.h
|
||||
|
@ -119,6 +119,7 @@
|
||||
#define bit_MOVDIR64B (1 << 28)
|
||||
#define bit_ENQCMD (1 << 29)
|
||||
#define bit_CLDEMOTE (1 << 25)
|
||||
#define bit_KL (1 << 23)
|
||||
|
||||
/* %edx */
|
||||
#define bit_AVX5124VNNIW (1 << 2)
|
||||
@ -146,6 +147,12 @@
|
||||
/* %ebx */
|
||||
#define bit_PTWRITE (1 << 4)
|
||||
|
||||
/* Keylocker leaf (%eax == 0x19) */
|
||||
/* %ebx */
|
||||
#define bit_AESKLE ( 1<<0 )
|
||||
#define bit_WIDEKL ( 1<<2 )
|
||||
|
||||
|
||||
/* Signatures for different CPU implementations as returned in uses
|
||||
of cpuid with level 0. */
|
||||
#define signature_AMD_ebx 0x68747541
|
||||
|
@ -1290,3 +1290,10 @@ DEF_FUNCTION_TYPE (V8SF, V8SF, V16HI, V16HI)
|
||||
DEF_FUNCTION_TYPE (V8SF, V8SF, V16HI, V16HI, UQI)
|
||||
DEF_FUNCTION_TYPE (V4SF, V4SF, V8HI, V8HI)
|
||||
DEF_FUNCTION_TYPE (V4SF, V4SF, V8HI, V8HI, UQI)
|
||||
|
||||
# KEYLOCKER builtins
|
||||
DEF_FUNCTION_TYPE (UINT, UINT, V2DI, V2DI, PVOID)
|
||||
DEF_FUNCTION_TYPE (UINT, UINT, V2DI, PVOID)
|
||||
DEF_FUNCTION_TYPE (VOID, V2DI, V2DI, V2DI, UINT)
|
||||
DEF_FUNCTION_TYPE (UINT8, PV2DI, V2DI, PCVOID)
|
||||
DEF_FUNCTION_TYPE (UINT8, PV2DI, PCV2DI, PCVOID)
|
@ -460,6 +460,19 @@ BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_UINTR, CODE_FOR_senduipi, "__buil
|
||||
/* HRESET */
|
||||
BDESC (0, OPTION_MASK_ISA2_HRESET, CODE_FOR_hreset, "__builtin_ia32_hreset", IX86_BUILTIN_HRESET, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
|
||||
|
||||
/* KEYLOCKER */
|
||||
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_loadiwkey", IX86_BUILTIN_LOADIWKEY, UNKNOWN, (int) VOID_FTYPE_V2DI_V2DI_V2DI_UINT)
|
||||
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_aesdec128kl_u8", IX86_BUILTIN_AESDEC128KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_V2DI_PCVOID)
|
||||
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_aesdec256kl_u8", IX86_BUILTIN_AESDEC256KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_V2DI_PCVOID)
|
||||
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_aesenc128kl_u8", IX86_BUILTIN_AESENC128KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_V2DI_PCVOID)
|
||||
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_aesenc256kl_u8", IX86_BUILTIN_AESENC256KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_V2DI_PCVOID)
|
||||
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_encodekey128_u32", IX86_BUILTIN_ENCODEKEY128U32, UNKNOWN, (int) UINT_FTYPE_UINT_V2DI_PVOID)
|
||||
BDESC (0, OPTION_MASK_ISA2_KL, CODE_FOR_nothing, "__builtin_ia32_encodekey256_u32", IX86_BUILTIN_ENCODEKEY256U32, UNKNOWN, (int) UINT_FTYPE_UINT_V2DI_V2DI_PVOID)
|
||||
BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesdecwide128kl_u8", IX86_BUILTIN_AESDECWIDE128KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID)
|
||||
BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesdecwide256kl_u8", IX86_BUILTIN_AESDECWIDE256KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID)
|
||||
BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesencwide128kl_u8", IX86_BUILTIN_AESENCWIDE128KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID)
|
||||
BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesencwide256kl_u8", IX86_BUILTIN_AESENCWIDE256KLU8, UNKNOWN, (int) UINT8_FTYPE_PV2DI_PCV2DI_PCVOID)
|
||||
|
||||
BDESC_END (SPECIAL_ARGS, ARGS)
|
||||
|
||||
/* Builtins with variable number of arguments. */
|
||||
|
@ -602,6 +602,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__UINTR__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_HRESET)
|
||||
def_or_undef (parse_in, "__HRESET__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_KL)
|
||||
def_or_undef (parse_in, "__KL__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_WIDEKL)
|
||||
def_or_undef (parse_in, "__WIDEKL__");
|
||||
if (TARGET_IAMCU)
|
||||
{
|
||||
def_or_undef (parse_in, "__iamcu");
|
||||
|
@ -11326,6 +11326,226 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
|
||||
emit_insn (gen_cldemote (op0));
|
||||
return 0;
|
||||
|
||||
case IX86_BUILTIN_LOADIWKEY:
|
||||
{
|
||||
arg0 = CALL_EXPR_ARG (exp, 0);
|
||||
arg1 = CALL_EXPR_ARG (exp, 1);
|
||||
arg2 = CALL_EXPR_ARG (exp, 2);
|
||||
arg3 = CALL_EXPR_ARG (exp, 3);
|
||||
|
||||
op0 = expand_normal (arg0);
|
||||
op1 = expand_normal (arg1);
|
||||
op2 = expand_normal (arg2);
|
||||
op3 = expand_normal (arg3);
|
||||
|
||||
if (!REG_P (op0))
|
||||
op0 = copy_to_mode_reg (V2DImode, op0);
|
||||
if (!REG_P (op1))
|
||||
op1 = copy_to_mode_reg (V2DImode, op1);
|
||||
if (!REG_P (op2))
|
||||
op2 = copy_to_mode_reg (V2DImode, op2);
|
||||
if (!REG_P (op3))
|
||||
op3 = copy_to_mode_reg (SImode, op3);
|
||||
|
||||
emit_insn (gen_loadiwkey (op0, op1, op2, op3));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
case IX86_BUILTIN_AESDEC128KLU8:
|
||||
icode = CODE_FOR_aesdec128klu8;
|
||||
goto aesdecenc_expand;
|
||||
|
||||
case IX86_BUILTIN_AESDEC256KLU8:
|
||||
icode = CODE_FOR_aesdec256klu8;
|
||||
goto aesdecenc_expand;
|
||||
|
||||
case IX86_BUILTIN_AESENC128KLU8:
|
||||
icode = CODE_FOR_aesenc128klu8;
|
||||
goto aesdecenc_expand;
|
||||
|
||||
case IX86_BUILTIN_AESENC256KLU8:
|
||||
icode = CODE_FOR_aesenc256klu8;
|
||||
|
||||
aesdecenc_expand:
|
||||
|
||||
arg0 = CALL_EXPR_ARG (exp, 0); // __m128i *odata
|
||||
arg1 = CALL_EXPR_ARG (exp, 1); // __m128i idata
|
||||
arg2 = CALL_EXPR_ARG (exp, 2); // const void *p
|
||||
|
||||
op0 = expand_normal (arg0);
|
||||
op1 = expand_normal (arg1);
|
||||
op2 = expand_normal (arg2);
|
||||
|
||||
if (!address_operand (op0, V2DImode))
|
||||
{
|
||||
op0 = convert_memory_address (Pmode, op0);
|
||||
op0 = copy_addr_to_reg (op0);
|
||||
}
|
||||
op0 = gen_rtx_MEM (V2DImode, op0);
|
||||
|
||||
if (!REG_P (op1))
|
||||
op1 = copy_to_mode_reg (V2DImode, op1);
|
||||
|
||||
if (!address_operand (op2, VOIDmode))
|
||||
{
|
||||
op2 = convert_memory_address (Pmode, op2);
|
||||
op2 = copy_addr_to_reg (op2);
|
||||
}
|
||||
op2 = gen_rtx_MEM (BLKmode, op2);
|
||||
|
||||
emit_insn (GEN_FCN (icode) (op1, op1, op2));
|
||||
|
||||
if (target == 0)
|
||||
target = gen_reg_rtx (QImode);
|
||||
|
||||
pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG),
|
||||
const0_rtx);
|
||||
emit_insn (gen_rtx_SET (target, pat));
|
||||
|
||||
emit_insn (gen_rtx_SET (op0, op1));
|
||||
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_AESDECWIDE128KLU8:
|
||||
icode = CODE_FOR_aesdecwide128klu8;
|
||||
goto wideaesdecenc_expand;
|
||||
|
||||
case IX86_BUILTIN_AESDECWIDE256KLU8:
|
||||
icode = CODE_FOR_aesdecwide256klu8;
|
||||
goto wideaesdecenc_expand;
|
||||
|
||||
case IX86_BUILTIN_AESENCWIDE128KLU8:
|
||||
icode = CODE_FOR_aesencwide128klu8;
|
||||
goto wideaesdecenc_expand;
|
||||
|
||||
case IX86_BUILTIN_AESENCWIDE256KLU8:
|
||||
icode = CODE_FOR_aesencwide256klu8;
|
||||
|
||||
wideaesdecenc_expand:
|
||||
|
||||
rtx xmm_regs[8];
|
||||
rtx op;
|
||||
|
||||
arg0 = CALL_EXPR_ARG (exp, 0); // __m128i * odata
|
||||
arg1 = CALL_EXPR_ARG (exp, 1); // const __m128i * idata
|
||||
arg2 = CALL_EXPR_ARG (exp, 2); // const void *p
|
||||
|
||||
op0 = expand_normal (arg0);
|
||||
op1 = expand_normal (arg1);
|
||||
op2 = expand_normal (arg2);
|
||||
|
||||
if (!address_operand (op2, VOIDmode))
|
||||
{
|
||||
op2 = convert_memory_address (Pmode, op2);
|
||||
op2 = copy_addr_to_reg (op2);
|
||||
}
|
||||
op2 = gen_rtx_MEM (BLKmode, op2);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
|
||||
|
||||
op = gen_rtx_MEM (V2DImode,
|
||||
plus_constant (Pmode, op1, (i * 16)));
|
||||
|
||||
emit_move_insn (xmm_regs[i], op);
|
||||
}
|
||||
|
||||
emit_insn (GEN_FCN (icode) (op2));
|
||||
|
||||
if (target == 0)
|
||||
target = gen_reg_rtx (QImode);
|
||||
|
||||
pat = gen_rtx_EQ (QImode, gen_rtx_REG (CCZmode, FLAGS_REG),
|
||||
const0_rtx);
|
||||
emit_insn (gen_rtx_SET (target, pat));
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
op = gen_rtx_MEM (V2DImode,
|
||||
plus_constant (Pmode, op0, (i * 16)));
|
||||
emit_move_insn (op, xmm_regs[i]);
|
||||
}
|
||||
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_ENCODEKEY128U32:
|
||||
{
|
||||
rtx op, xmm_regs[7];
|
||||
|
||||
arg0 = CALL_EXPR_ARG (exp, 0); // unsigned int htype
|
||||
arg1 = CALL_EXPR_ARG (exp, 1); // __m128i key
|
||||
arg2 = CALL_EXPR_ARG (exp, 2); // void *h
|
||||
|
||||
op0 = expand_normal (arg0);
|
||||
op1 = expand_normal (arg1);
|
||||
op2 = expand_normal (arg2);
|
||||
|
||||
if (!REG_P (op0))
|
||||
op0 = copy_to_mode_reg (SImode, op0);
|
||||
|
||||
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
|
||||
emit_move_insn (op, op1);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
|
||||
|
||||
if (target == 0)
|
||||
target = gen_reg_rtx (SImode);
|
||||
|
||||
emit_insn (gen_encodekey128u32 (target, op0));
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
{
|
||||
op = gen_rtx_MEM (V2DImode,
|
||||
plus_constant (Pmode, op2, (i * 16)));
|
||||
emit_move_insn (op, xmm_regs[i]);
|
||||
}
|
||||
|
||||
return target;
|
||||
}
|
||||
case IX86_BUILTIN_ENCODEKEY256U32:
|
||||
{
|
||||
rtx op, xmm_regs[7];
|
||||
|
||||
arg0 = CALL_EXPR_ARG (exp, 0); // unsigned int htype
|
||||
arg1 = CALL_EXPR_ARG (exp, 1); // __m128i keylow
|
||||
arg2 = CALL_EXPR_ARG (exp, 2); // __m128i keyhi
|
||||
arg3 = CALL_EXPR_ARG (exp, 3); // void *h
|
||||
|
||||
op0 = expand_normal (arg0);
|
||||
op1 = expand_normal (arg1);
|
||||
op2 = expand_normal (arg2);
|
||||
op3 = expand_normal (arg3);
|
||||
|
||||
if (!REG_P (op0))
|
||||
op0 = copy_to_mode_reg (SImode, op0);
|
||||
|
||||
/* Force to use xmm0, xmm1 for keylow, keyhi*/
|
||||
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
|
||||
emit_move_insn (op, op1);
|
||||
op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (1));
|
||||
emit_move_insn (op, op2);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
|
||||
|
||||
if (target == 0)
|
||||
target = gen_reg_rtx (SImode);
|
||||
|
||||
emit_insn (gen_encodekey256u32 (target, op0));
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
{
|
||||
op = gen_rtx_MEM (V2DImode,
|
||||
plus_constant (Pmode, op3, (i * 16)));
|
||||
emit_move_insn (op, xmm_regs[i]);
|
||||
}
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
case IX86_BUILTIN_VEC_INIT_V2SI:
|
||||
case IX86_BUILTIN_VEC_INIT_V4HI:
|
||||
case IX86_BUILTIN_VEC_INIT_V8QI:
|
||||
|
@ -214,7 +214,9 @@ static struct ix86_target_opts isa2_opts[] =
|
||||
{ "-mamx-int8", OPTION_MASK_ISA2_AMX_INT8 },
|
||||
{ "-mamx-bf16", OPTION_MASK_ISA2_AMX_BF16 },
|
||||
{ "-muintr", OPTION_MASK_ISA2_UINTR },
|
||||
{ "-mhreset", OPTION_MASK_ISA2_HRESET }
|
||||
{ "-mhreset", OPTION_MASK_ISA2_HRESET },
|
||||
{ "-mkl", OPTION_MASK_ISA2_KL },
|
||||
{ "-mwidekl", OPTION_MASK_ISA2_WIDEKL }
|
||||
};
|
||||
static struct ix86_target_opts isa_opts[] =
|
||||
{
|
||||
@ -1035,6 +1037,8 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
||||
IX86_ATTR_ISA ("cldemote", OPT_mcldemote),
|
||||
IX86_ATTR_ISA ("uintr", OPT_muintr),
|
||||
IX86_ATTR_ISA ("ptwrite", OPT_mptwrite),
|
||||
IX86_ATTR_ISA ("kl", OPT_mkl),
|
||||
IX86_ATTR_ISA ("widekl", OPT_mwidekl),
|
||||
IX86_ATTR_ISA ("avx512bf16", OPT_mavx512bf16),
|
||||
IX86_ATTR_ISA ("enqcmd", OPT_menqcmd),
|
||||
IX86_ATTR_ISA ("serialize", OPT_mserialize),
|
||||
@ -2339,6 +2343,12 @@ ix86_option_override_internal (bool main_args_p,
|
||||
if (((processor_alias_table[i].flags & PTA_TSXLDTRK) != 0)
|
||||
&& !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_TSXLDTRK))
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_TSXLDTRK;
|
||||
if (((processor_alias_table[i].flags & PTA_KL) != 0)
|
||||
&& !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_KL))
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_KL;
|
||||
if (((processor_alias_table[i].flags & PTA_WIDEKL) != 0)
|
||||
&& !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_WIDEKL))
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_WIDEKL;
|
||||
|
||||
if ((processor_alias_table[i].flags
|
||||
& (PTA_PREFETCH_SSE | PTA_SSE)) != 0)
|
||||
|
@ -213,6 +213,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
#define TARGET_UINTR_P(x) TARGET_ISA2_UINTR_P(x)
|
||||
#define TARGET_HRESET TARGET_ISA2_HRESET
|
||||
#define TARGET_HRESET_P(x) TARGET_ISA2_HRESET_P(x)
|
||||
#define TARGET_KL TARGET_ISA2_KL
|
||||
#define TARGET_KL_P(x) TARGET_ISA2_KL_P(x)
|
||||
#define TARGET_WIDEKL TARGET_ISA2_WIDEKL
|
||||
#define TARGET_WIDEKL_P(x) TARGET_ISA2_WIDEKL_P(x)
|
||||
|
||||
#define TARGET_LP64 TARGET_ABI_64
|
||||
#define TARGET_LP64_P(x) TARGET_ABI_64_P(x)
|
||||
@ -2485,6 +2489,8 @@ const wide_int_bitmask PTA_AMX_INT8(0, HOST_WIDE_INT_1U << 20);
|
||||
const wide_int_bitmask PTA_AMX_BF16(0, HOST_WIDE_INT_1U << 21);
|
||||
const wide_int_bitmask PTA_UINTR (0, HOST_WIDE_INT_1U << 22);
|
||||
const wide_int_bitmask PTA_HRESET(0, HOST_WIDE_INT_1U << 23);
|
||||
const wide_int_bitmask PTA_KL (0, HOST_WIDE_INT_1U << 24);
|
||||
const wide_int_bitmask PTA_WIDEKL (0, HOST_WIDE_INT_1U << 25);
|
||||
|
||||
const wide_int_bitmask PTA_X86_64_BASELINE = PTA_64BIT | PTA_MMX | PTA_SSE
|
||||
| PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR;
|
||||
@ -2525,13 +2531,13 @@ const wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI
|
||||
const wide_int_bitmask PTA_ICELAKE_SERVER = PTA_ICELAKE_CLIENT | PTA_PCONFIG
|
||||
| PTA_WBNOINVD | PTA_CLWB;
|
||||
const wide_int_bitmask PTA_TIGERLAKE = PTA_ICELAKE_CLIENT | PTA_MOVDIRI
|
||||
| PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT;
|
||||
| PTA_MOVDIR64B | PTA_CLWB | PTA_AVX512VP2INTERSECT | PTA_KL | PTA_WIDEKL;
|
||||
const wide_int_bitmask PTA_SAPPHIRERAPIDS = PTA_COOPERLAKE | PTA_MOVDIRI
|
||||
| PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_ENQCMD | PTA_CLDEMOTE
|
||||
| PTA_PTWRITE | PTA_WAITPKG | PTA_SERIALIZE | PTA_TSXLDTRK | PTA_AMX_TILE
|
||||
| PTA_AMX_INT8 | PTA_AMX_BF16 | PTA_UINTR;
|
||||
const wide_int_bitmask PTA_ALDERLAKE = PTA_SKYLAKE | PTA_CLDEMOTE | PTA_PTWRITE
|
||||
| PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET;
|
||||
| PTA_WAITPKG | PTA_SERIALIZE | PTA_HRESET | PTA_KL | PTA_WIDEKL;
|
||||
const wide_int_bitmask PTA_KNL = PTA_BROADWELL | PTA_AVX512PF | PTA_AVX512ER
|
||||
| PTA_AVX512F | PTA_AVX512CD;
|
||||
const wide_int_bitmask PTA_BONNELL = PTA_CORE2 | PTA_MOVBE;
|
||||
|
@ -1135,3 +1135,11 @@ Support AMX-BF16 built-in functions and code generation.
|
||||
mhreset
|
||||
Target Report Mask(ISA2_HRESET) Var(ix86_isa_flags2) Save
|
||||
Support HRESET built-in functions and code generation.
|
||||
|
||||
mkl
|
||||
Target Report Mask(ISA2_KL) Var(ix86_isa_flags2) Save
|
||||
Support KL built-in functions and code generation.
|
||||
|
||||
mwidekl
|
||||
Target Report Mask(ISA2_WIDEKL) Var(ix86_isa_flags2) Save
|
||||
Support WIDEKL built-in functions and code generation.
|
||||
|
@ -118,4 +118,6 @@
|
||||
|
||||
#include <prfchwintrin.h>
|
||||
|
||||
#include <keylockerintrin.h>
|
||||
|
||||
#endif /* _IMMINTRIN_H_INCLUDED */
|
||||
|
129
gcc/config/i386/keylockerintrin.h
Normal file
129
gcc/config/i386/keylockerintrin.h
Normal file
@ -0,0 +1,129 @@
|
||||
/* Copyright (C) 2018 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
# error "Never use <keylockerintrin.h> directly; include <x86intrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _KEYLOCKERINTRIN_H_INCLUDED
|
||||
#define _KEYLOCKERINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __KL__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("kl")
|
||||
#define __DISABLE_KL__
|
||||
#endif /* __KL__ */
|
||||
|
||||
|
||||
extern __inline
|
||||
void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_loadiwkey (unsigned int __I, __m128i __A, __m128i __B, __m128i __C)
|
||||
{
|
||||
__builtin_ia32_loadiwkey ((__v2di) __B, (__v2di) __C, (__v2di) __A, __I);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_encodekey128_u32 (unsigned int __I, __m128i __A, void * __P)
|
||||
{
|
||||
return __builtin_ia32_encodekey128_u32 (__I, (__v2di)__A, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_encodekey256_u32 (unsigned int __I, __m128i __A, __m128i __B, void * __P)
|
||||
{
|
||||
return __builtin_ia32_encodekey256_u32 (__I, (__v2di)__A, (__v2di)__B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdec128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdec128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdec256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdec256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesenc128kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesenc128kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesenc256kl_u8 (__m128i * __A, __m128i __B, const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesenc256kl_u8 ((__v2di *) __A, (__v2di) __B, __P);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_KL__
|
||||
#undef __DISABLE_KL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_KL__ */
|
||||
|
||||
#ifndef __WIDEKL__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("widekl")
|
||||
#define __DISABLE_WIDEKL__
|
||||
#endif /* __WIDEKL__ */
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdecwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdecwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesdecwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesdecwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesencwide128kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesencwide128kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
|
||||
extern __inline
|
||||
unsigned char __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_aesencwide256kl_u8(__m128i __A[8], const __m128i __B[8], const void * __P)
|
||||
{
|
||||
return __builtin_ia32_aesencwide256kl_u8 ((__v2di *) __A, (__v2di *) __B, __P);
|
||||
}
|
||||
#ifdef __DISABLE_WIDEKL__
|
||||
#undef __DISABLE_WIDEKL__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_WIDEKL__ */
|
||||
#endif /* _KEYLOCKERINTRIN_H_INCLUDED */
|
@ -1726,3 +1726,121 @@
|
||||
}
|
||||
return (i >= 12 && i <= 18);
|
||||
})
|
||||
|
||||
;; Keylocker specific predicates
|
||||
(define_predicate "encodekey128_operation"
|
||||
(match_code "parallel")
|
||||
{
|
||||
unsigned i;
|
||||
rtx elt;
|
||||
|
||||
if (XVECLEN (op, 0) != 8)
|
||||
return false;
|
||||
|
||||
for(i = 0; i < 3; i++)
|
||||
{
|
||||
elt = XVECEXP (op, 0, i + 1);
|
||||
if (GET_CODE (elt) != SET
|
||||
|| GET_CODE (SET_DEST (elt)) != REG
|
||||
|| GET_MODE (SET_DEST (elt)) != V2DImode
|
||||
|| REGNO (SET_DEST (elt)) != GET_SSE_REGNO (i)
|
||||
|| GET_CODE (SET_SRC (elt)) != UNSPEC_VOLATILE
|
||||
|| GET_MODE (SET_SRC (elt)) != V2DImode
|
||||
|| XVECLEN(SET_SRC (elt), 0) != 1
|
||||
|| XVECEXP(SET_SRC (elt), 0, 0) != const0_rtx)
|
||||
return false;
|
||||
}
|
||||
|
||||
for(i = 4; i < 7; i++)
|
||||
{
|
||||
elt = XVECEXP (op, 0, i);
|
||||
if (GET_CODE (elt) != SET
|
||||
|| GET_CODE (SET_DEST (elt)) != REG
|
||||
|| GET_MODE (SET_DEST (elt)) != V2DImode
|
||||
|| REGNO (SET_DEST (elt)) != GET_SSE_REGNO (i)
|
||||
|| SET_SRC (elt) != CONST0_RTX (V2DImode))
|
||||
return false;
|
||||
}
|
||||
|
||||
elt = XVECEXP (op, 0, 7);
|
||||
if (GET_CODE (elt) != CLOBBER
|
||||
|| GET_MODE (elt) != VOIDmode
|
||||
|| GET_CODE (XEXP (elt, 0)) != REG
|
||||
|| GET_MODE (XEXP (elt, 0)) != CCmode
|
||||
|| REGNO (XEXP (elt, 0)) != FLAGS_REG)
|
||||
return false;
|
||||
return true;
|
||||
})
|
||||
|
||||
(define_predicate "encodekey256_operation"
|
||||
(match_code "parallel")
|
||||
{
|
||||
unsigned i;
|
||||
rtx elt;
|
||||
|
||||
if (XVECLEN (op, 0) != 9)
|
||||
return false;
|
||||
|
||||
elt = SET_SRC (XVECEXP (op, 0, 0));
|
||||
elt = XVECEXP (elt, 0, 2);
|
||||
if (!REG_P (elt)
|
||||
|| REGNO(elt) != GET_SSE_REGNO (1))
|
||||
return false;
|
||||
|
||||
for(i = 0; i < 4; i++)
|
||||
{
|
||||
elt = XVECEXP (op, 0, i + 1);
|
||||
if (GET_CODE (elt) != SET
|
||||
|| GET_CODE (SET_DEST (elt)) != REG
|
||||
|| GET_MODE (SET_DEST (elt)) != V2DImode
|
||||
|| REGNO (SET_DEST (elt)) != GET_SSE_REGNO (i)
|
||||
|| GET_CODE (SET_SRC (elt)) != UNSPEC_VOLATILE
|
||||
|| GET_MODE (SET_SRC (elt)) != V2DImode
|
||||
|| XVECLEN(SET_SRC (elt), 0) != 1
|
||||
|| XVECEXP(SET_SRC (elt), 0, 0) != const0_rtx)
|
||||
return false;
|
||||
}
|
||||
|
||||
for(i = 4; i < 7; i++)
|
||||
{
|
||||
elt = XVECEXP (op, 0, i + 1);
|
||||
if (GET_CODE (elt) != SET
|
||||
|| GET_CODE (SET_DEST (elt)) != REG
|
||||
|| GET_MODE (SET_DEST (elt)) != V2DImode
|
||||
|| REGNO (SET_DEST (elt)) != GET_SSE_REGNO (i)
|
||||
|| SET_SRC (elt) != CONST0_RTX (V2DImode))
|
||||
return false;
|
||||
}
|
||||
|
||||
elt = XVECEXP (op, 0, 8);
|
||||
if (GET_CODE (elt) != CLOBBER
|
||||
|| GET_MODE (elt) != VOIDmode
|
||||
|| GET_CODE (XEXP (elt, 0)) != REG
|
||||
|| GET_MODE (XEXP (elt, 0)) != CCmode
|
||||
|| REGNO (XEXP (elt, 0)) != FLAGS_REG)
|
||||
return false;
|
||||
return true;
|
||||
})
|
||||
|
||||
|
||||
(define_predicate "aeswidekl_operation"
|
||||
(match_code "parallel")
|
||||
{
|
||||
unsigned i;
|
||||
rtx elt;
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
elt = XVECEXP (op, 0, i + 1);
|
||||
if (GET_CODE (elt) != SET
|
||||
|| GET_CODE (SET_DEST (elt)) != REG
|
||||
|| GET_MODE (SET_DEST (elt)) != V2DImode
|
||||
|| REGNO (SET_DEST (elt)) != GET_SSE_REGNO (i)
|
||||
|| GET_CODE (SET_SRC (elt)) != UNSPEC_VOLATILE
|
||||
|| GET_MODE (SET_SRC (elt)) != V2DImode
|
||||
|| XVECLEN (SET_SRC (elt), 0) != 1
|
||||
|| REGNO (XVECEXP (SET_SRC (elt), 0, 0)) != GET_SSE_REGNO (i))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})
|
||||
|
@ -205,6 +205,19 @@
|
||||
UNSPECV_MWAIT
|
||||
UNSPECV_VZEROALL
|
||||
UNSPECV_VZEROUPPER
|
||||
|
||||
;; For KEYLOCKER
|
||||
UNSPECV_LOADIWKEY
|
||||
UNSPECV_AESDEC128KLU8
|
||||
UNSPECV_AESENC128KLU8
|
||||
UNSPECV_AESDEC256KLU8
|
||||
UNSPECV_AESENC256KLU8
|
||||
UNSPECV_AESDECWIDE128KLU8
|
||||
UNSPECV_AESENCWIDE128KLU8
|
||||
UNSPECV_AESDECWIDE256KLU8
|
||||
UNSPECV_AESENCWIDE256KLU8
|
||||
UNSPECV_ENCODEKEY128U32
|
||||
UNSPECV_ENCODEKEY256U32
|
||||
])
|
||||
|
||||
;; All vector modes including V?TImode, used in move patterns.
|
||||
@ -23316,3 +23329,208 @@
|
||||
(match_operand:<avx512fmaskhalfmode> 4 "register_operand" "Yk")))]
|
||||
"TARGET_AVX512BF16"
|
||||
"vdpbf16ps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}")
|
||||
|
||||
;; KEYLOCKER
|
||||
(define_insn "loadiwkey"
|
||||
[(unspec_volatile:V2DI [(match_operand:V2DI 0 "register_operand" "v")
|
||||
(match_operand:V2DI 1 "register_operand" "v")
|
||||
(match_operand:V2DI 2 "register_operand" "Yz")
|
||||
(match_operand:SI 3 "register_operand" "a")]
|
||||
UNSPECV_LOADIWKEY)
|
||||
(clobber (reg:CC FLAGS_REG))]
|
||||
"TARGET_KL"
|
||||
"loadiwkey\t{%0, %1|%1, %0}"
|
||||
[(set_attr "type" "other")])
|
||||
|
||||
(define_expand "encodekey128u32"
|
||||
[(match_par_dup 2
|
||||
[(set (match_operand:SI 0 "register_operand")
|
||||
(unspec_volatile:SI
|
||||
[(match_operand:SI 1 "register_operand")
|
||||
(reg:V2DI XMM0_REG)]
|
||||
UNSPECV_ENCODEKEY128U32))])]
|
||||
"TARGET_KL"
|
||||
{
|
||||
rtx xmm_regs[7];
|
||||
rtx tmp_unspec;
|
||||
unsigned i;
|
||||
|
||||
/* parallel rtx for encodekey128 predicate */
|
||||
operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (8));
|
||||
|
||||
for (i = 0; i < 7; i++)
|
||||
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
|
||||
|
||||
tmp_unspec
|
||||
= gen_rtx_UNSPEC_VOLATILE (SImode,
|
||||
gen_rtvec (2, operands[1], xmm_regs[0]),
|
||||
UNSPECV_ENCODEKEY128U32);
|
||||
|
||||
XVECEXP (operands[2], 0, 0)
|
||||
= gen_rtx_SET (operands[0], tmp_unspec);
|
||||
|
||||
tmp_unspec
|
||||
= gen_rtx_UNSPEC_VOLATILE (V2DImode,
|
||||
gen_rtvec (1, const0_rtx),
|
||||
UNSPECV_ENCODEKEY128U32);
|
||||
|
||||
for (i = 0; i < 3; i++)
|
||||
XVECEXP (operands[2], 0, i + 1)
|
||||
= gen_rtx_SET (xmm_regs[i], tmp_unspec);
|
||||
|
||||
for (i = 4; i < 7; i++)
|
||||
XVECEXP (operands[2], 0, i)
|
||||
= gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
|
||||
|
||||
XVECEXP (operands[2], 0, 7)
|
||||
= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
|
||||
})
|
||||
|
||||
(define_insn "*encodekey128u32"
|
||||
[(match_parallel 2 "encodekey128_operation"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(unspec_volatile:SI
|
||||
[(match_operand:SI 1 "register_operand" "r")
|
||||
(reg:V2DI XMM0_REG)]
|
||||
UNSPECV_ENCODEKEY128U32))])]
|
||||
"TARGET_KL"
|
||||
"encodekey128\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "other")])
|
||||
|
||||
(define_expand "encodekey256u32"
|
||||
[(match_par_dup 2
|
||||
[(set (match_operand:SI 0 "register_operand")
|
||||
(unspec_volatile:SI
|
||||
[(match_operand:SI 1 "register_operand")
|
||||
(reg:V2DI XMM0_REG)
|
||||
(reg:V2DI XMM1_REG)]
|
||||
UNSPECV_ENCODEKEY256U32))])]
|
||||
"TARGET_KL"
|
||||
{
|
||||
rtx xmm_regs[7];
|
||||
rtx tmp_unspec;
|
||||
unsigned i;
|
||||
|
||||
/* parallel rtx for encodekey256 predicate */
|
||||
operands[2] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
|
||||
|
||||
for (i = 0; i < 7; i++)
|
||||
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
|
||||
|
||||
tmp_unspec
|
||||
= gen_rtx_UNSPEC_VOLATILE (SImode,
|
||||
gen_rtvec (3, operands[1],
|
||||
xmm_regs[0], xmm_regs[1]),
|
||||
UNSPECV_ENCODEKEY256U32);
|
||||
|
||||
XVECEXP (operands[2], 0, 0)
|
||||
= gen_rtx_SET (operands[0], tmp_unspec);
|
||||
|
||||
tmp_unspec
|
||||
= gen_rtx_UNSPEC_VOLATILE (V2DImode,
|
||||
gen_rtvec (1, const0_rtx),
|
||||
UNSPECV_ENCODEKEY256U32);
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
XVECEXP (operands[2], 0, i + 1)
|
||||
= gen_rtx_SET (xmm_regs[i], tmp_unspec);
|
||||
|
||||
for (i = 4; i < 7; i++)
|
||||
XVECEXP (operands[2], 0, i + 1)
|
||||
= gen_rtx_SET (xmm_regs[i], CONST0_RTX (V2DImode));
|
||||
|
||||
XVECEXP (operands[2], 0, 8)
|
||||
= gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
|
||||
})
|
||||
|
||||
(define_insn "*encodekey256u32"
|
||||
[(match_parallel 2 "encodekey256_operation"
|
||||
[(set (match_operand:SI 0 "register_operand" "=r")
|
||||
(unspec_volatile:SI
|
||||
[(match_operand:SI 1 "register_operand" "r")
|
||||
(reg:V2DI XMM0_REG)
|
||||
(reg:V2DI XMM1_REG)]
|
||||
UNSPECV_ENCODEKEY256U32))])]
|
||||
"TARGET_KL"
|
||||
"encodekey256\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "other")])
|
||||
|
||||
(define_int_iterator AESDECENCKL [UNSPECV_AESDEC128KLU8 UNSPECV_AESDEC256KLU8
|
||||
UNSPECV_AESENC128KLU8 UNSPECV_AESENC256KLU8])
|
||||
|
||||
(define_int_attr aesklvariant [(UNSPECV_AESDEC128KLU8 "dec128kl")
|
||||
(UNSPECV_AESDEC256KLU8 "dec256kl")
|
||||
(UNSPECV_AESENC128KLU8 "enc128kl")
|
||||
(UNSPECV_AESENC256KLU8 "enc256kl")])
|
||||
|
||||
(define_insn "aes<aesklvariant>u8"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=v")
|
||||
(unspec_volatile:V2DI [(match_operand:V2DI 1 "register_operand" "0")
|
||||
(match_operand:BLK 2 "memory_operand" "m")]
|
||||
AESDECENCKL))
|
||||
(set (reg:CCZ FLAGS_REG)
|
||||
(unspec_volatile:CCZ [(match_dup 1) (match_dup 2)] AESDECENCKL))]
|
||||
"TARGET_KL"
|
||||
"aes<aesklvariant>\t{%2, %0|%0, %2}"
|
||||
[(set_attr "type" "other")])
|
||||
|
||||
(define_int_iterator AESDECENCWIDEKL [UNSPECV_AESDECWIDE128KLU8 UNSPECV_AESDECWIDE256KLU8
|
||||
UNSPECV_AESENCWIDE128KLU8 UNSPECV_AESENCWIDE256KLU8])
|
||||
|
||||
(define_int_attr aeswideklvariant [(UNSPECV_AESDECWIDE128KLU8 "decwide128kl")
|
||||
(UNSPECV_AESDECWIDE256KLU8 "decwide256kl")
|
||||
(UNSPECV_AESENCWIDE128KLU8 "encwide128kl")
|
||||
(UNSPECV_AESENCWIDE256KLU8 "encwide256kl")])
|
||||
|
||||
(define_int_attr AESWIDEKLVARIANT [(UNSPECV_AESDECWIDE128KLU8 "AESDECWIDE128KLU8")
|
||||
(UNSPECV_AESDECWIDE256KLU8 "AESDECWIDE256KLU8")
|
||||
(UNSPECV_AESENCWIDE128KLU8 "AESENCWIDE128KLU8")
|
||||
(UNSPECV_AESENCWIDE256KLU8 "AESENCWIDE256KLU8")])
|
||||
|
||||
(define_expand "aes<aeswideklvariant>u8"
|
||||
[(match_par_dup 1
|
||||
[(set (reg:CCZ FLAGS_REG)
|
||||
(unspec_volatile:CCZ
|
||||
[(match_operand:BLK 0 "memory_operand")]
|
||||
AESDECENCWIDEKL))])]
|
||||
"TARGET_WIDEKL"
|
||||
{
|
||||
rtx xmm_regs[8];
|
||||
rtx tmp_unspec;
|
||||
unsigned i;
|
||||
|
||||
/* parallel rtx for widekl predicate */
|
||||
operands[1] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (9));
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
xmm_regs[i] = gen_rtx_REG (V2DImode, GET_SSE_REGNO (i));
|
||||
|
||||
tmp_unspec
|
||||
= gen_rtx_UNSPEC_VOLATILE (CCZmode,
|
||||
gen_rtvec (1, operands[0]),
|
||||
UNSPECV_<AESWIDEKLVARIANT>);
|
||||
|
||||
XVECEXP (operands[1], 0, 0)
|
||||
= gen_rtx_SET (gen_rtx_REG (CCZmode, FLAGS_REG),
|
||||
tmp_unspec);
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
{
|
||||
tmp_unspec
|
||||
= gen_rtx_UNSPEC_VOLATILE (V2DImode,
|
||||
gen_rtvec (1, xmm_regs[i]),
|
||||
UNSPECV_<AESWIDEKLVARIANT>);
|
||||
XVECEXP (operands[1], 0, i + 1)
|
||||
= gen_rtx_SET (xmm_regs[i], tmp_unspec);
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn "*aes<aeswideklvariant>u8"
|
||||
[(match_parallel 1 "aeswidekl_operation"
|
||||
[(set (reg:CCZ FLAGS_REG)
|
||||
(unspec_volatile:CCZ
|
||||
[(match_operand:BLK 0 "memory_operand" "m")]
|
||||
AESDECENCWIDEKL))])]
|
||||
"TARGET_WIDEKL"
|
||||
"aes<aeswideklvariant>\t{%0}"
|
||||
[(set_attr "type" "other")])
|
||||
|
@ -6652,6 +6652,16 @@ Enable/disable the generation of the UINTR instructions.
|
||||
@cindex @code{target("hreset")} function attribute, x86
|
||||
Enable/disable the generation of the HRESET instruction.
|
||||
|
||||
@item kl
|
||||
@itemx no-kl
|
||||
@cindex @code{target("kl")} function attribute, x86
|
||||
Enable/disable the generation of the KEYLOCKER instructions.
|
||||
|
||||
@item widekl
|
||||
@itemx no-widekl
|
||||
@cindex @code{target("widekl")} function attribute, x86
|
||||
Enable/disable the generation of the WIDEKL instructions.
|
||||
|
||||
@item cld
|
||||
@itemx no-cld
|
||||
@cindex @code{target("cld")} function attribute, x86
|
||||
|
@ -1369,6 +1369,7 @@ See RS/6000 and PowerPC Options.
|
||||
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset@gol
|
||||
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
|
||||
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
|
||||
-mkl -mwidekl @gol
|
||||
-mmemcpy-strategy=@var{strategy} -mmemset-strategy=@var{strategy} @gol
|
||||
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
|
||||
-m96bit-long-double -mlong-double-64 -mlong-double-80 -mlong-double-128 @gol
|
||||
@ -30365,6 +30366,11 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@need 200
|
||||
@itemx -mhreset
|
||||
@opindex mhreset
|
||||
@itemx -mkl
|
||||
@opindex mkl
|
||||
@need 200
|
||||
@itemx -mwidekl
|
||||
@opindex mwidekl
|
||||
These switches enable the use of instructions in the MMX, SSE,
|
||||
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
|
||||
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
|
||||
@ -30374,8 +30380,9 @@ WBNOINVD, FMA4, PREFETCHW, RDPID, PREFETCHWT1, RDSEED, SGX, XOP, LWP,
|
||||
XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
|
||||
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
|
||||
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
|
||||
UINTR, HRESET or CLDEMOTE extended instruction sets. Each has a corresponding
|
||||
@option{-mno-} option to disable use of these instructions.
|
||||
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL or CLDEMOTE extended
|
||||
instruction sets. Each has a corresponding @option{-mno-} option to disable
|
||||
use of these instructions.
|
||||
|
||||
These extensions are also available as built-in functions: see
|
||||
@ref{x86 Built-in Functions}, for details of the functions enabled and
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
|
@ -76,6 +76,8 @@ extern void test_amx_int8 (void) __attribute__((__target__("amx-int8")));
|
||||
extern void test_amx_bf16 (void) __attribute__((__target__("amx-bf16")));
|
||||
extern void test_uintr (void) __attribute__((__target__("uintr")));
|
||||
extern void test_hreset (void) __attribute__((__target__("hreset")));
|
||||
extern void test_keylocker (void) __attribute__((__target__("kl")));
|
||||
extern void test_widekl (void) __attribute__((__target__("widekl")));
|
||||
|
||||
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
|
||||
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
|
||||
@ -153,6 +155,8 @@ extern void test_no_amx_int8 (void) __attribute__((__target__("no-amx-int8")));
|
||||
extern void test_no_amx_bf16 (void) __attribute__((__target__("no-amx-bf16")));
|
||||
extern void test_no_uintr (void) __attribute__((__target__("no-uintr")));
|
||||
extern void test_no_hreset (void) __attribute__((__target__("no-hreset")));
|
||||
extern void test_no_keylocker (void) __attribute__((__target__("no-kl")));
|
||||
extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
|
||||
|
||||
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
|
||||
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
|
||||
|
17
gcc/testsuite/gcc.target/i386/keylocker-aesdec128kl.c
Normal file
17
gcc/testsuite/gcc.target/i386/keylocker-aesdec128kl.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mkl -O2" } */
|
||||
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k2\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "aesdec128kl\[ \\t\]+\[^\n\]*h1\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "sete" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*k1" } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
__m128i k1, k2;
|
||||
const char h1[48];
|
||||
|
||||
unsigned char
|
||||
test_keylocker_1 (void)
|
||||
{
|
||||
return _mm_aesdec128kl_u8 (&k1, k2, h1);
|
||||
}
|
17
gcc/testsuite/gcc.target/i386/keylocker-aesdec256kl.c
Normal file
17
gcc/testsuite/gcc.target/i386/keylocker-aesdec256kl.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mkl -O2" } */
|
||||
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k2\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "aesdec256kl\[ \\t\]+\[^\n\]*h1\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "sete" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*k1" } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
__m128i k1, k2;
|
||||
const char h1[48];
|
||||
|
||||
unsigned char
|
||||
test_keylocker_3 (void)
|
||||
{
|
||||
return _mm_aesdec256kl_u8 (&k1, k2, h1);
|
||||
}
|
32
gcc/testsuite/gcc.target/i386/keylocker-aesdecwide128kl.c
Normal file
32
gcc/testsuite/gcc.target/i386/keylocker-aesdecwide128kl.c
Normal file
@ -0,0 +1,32 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mwidekl -O2" } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+16(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+32(\\(%rip\\))?\[^\n\r]*%xmm2" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+48(\\(%rip\\))?\[^\n\r]*%xmm3" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+64(\\(%rip\\))?\[^\n\r]*%xmm4" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+80(\\(%rip\\))?\[^\n\r]*%xmm5" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+96(\\(%rip\\))?\[^\n\r]*%xmm6" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+112(\\(%rip\\))?\[^\n\r]*%xmm7" } } */
|
||||
/* { dg-final { scan-assembler "aesdecwide128kl\[ \\t\]+\[^\n\]*h1" } } */
|
||||
/* { dg-final { scan-assembler "sete" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*odata(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*odata\\+16(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*odata\\+32(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm3\[^\n\r]*odata\\+48(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm4\[^\n\r]*odata\\+64(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm5\[^\n\r]*odata\\+80(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm6\[^\n\r]*odata\\+96(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm7\[^\n\r]*odata\\+112(\\(%rip\\))?" } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
const char h1[48];
|
||||
const __m128i idata[8];
|
||||
__m128i odata[8];
|
||||
|
||||
unsigned char
|
||||
test_keylocker_5 (void)
|
||||
{
|
||||
return _mm_aesdecwide128kl_u8 (odata, idata, h1);
|
||||
}
|
32
gcc/testsuite/gcc.target/i386/keylocker-aesdecwide256kl.c
Normal file
32
gcc/testsuite/gcc.target/i386/keylocker-aesdecwide256kl.c
Normal file
@ -0,0 +1,32 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mwidekl -O2" } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+16(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+32(\\(%rip\\))?\[^\n\r]*%xmm2" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+48(\\(%rip\\))?\[^\n\r]*%xmm3" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+64(\\(%rip\\))?\[^\n\r]*%xmm4" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+80(\\(%rip\\))?\[^\n\r]*%xmm5" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+96(\\(%rip\\))?\[^\n\r]*%xmm6" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+112(\\(%rip\\))?\[^\n\r]*%xmm7" } } */
|
||||
/* { dg-final { scan-assembler "aesdecwide256kl\[ \\t\]+\[^\n\]*h1" } } */
|
||||
/* { dg-final { scan-assembler "sete" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*odata(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*odata\\+16(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*odata\\+32(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm3\[^\n\r]*odata\\+48(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm4\[^\n\r]*odata\\+64(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm5\[^\n\r]*odata\\+80(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm6\[^\n\r]*odata\\+96(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm7\[^\n\r]*odata\\+112(\\(%rip\\))?" } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
const char h1[48];
|
||||
const __m128i idata[8];
|
||||
__m128i odata[8];
|
||||
|
||||
unsigned char
|
||||
test_keylocker_6 (void)
|
||||
{
|
||||
return _mm_aesdecwide256kl_u8 (odata, idata, h1);
|
||||
}
|
17
gcc/testsuite/gcc.target/i386/keylocker-aesenc128kl.c
Normal file
17
gcc/testsuite/gcc.target/i386/keylocker-aesenc128kl.c
Normal file
@ -0,0 +1,17 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mkl -O2" } */
|
||||
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k2\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "aesenc128kl\[ \\t\]+\[^\n\]*h1\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "sete" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*k1" } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
__m128i k1, k2;
|
||||
const char h1[48];
|
||||
|
||||
unsigned char
|
||||
test_keylocker_2 (void)
|
||||
{
|
||||
return _mm_aesenc128kl_u8 (&k1, k2, h1);
|
||||
}
|
32
gcc/testsuite/gcc.target/i386/keylocker-aesencwide128kl.c
Normal file
32
gcc/testsuite/gcc.target/i386/keylocker-aesencwide128kl.c
Normal file
@ -0,0 +1,32 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mwidekl -O2" } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+16(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+32(\\(%rip\\))?\[^\n\r]*%xmm2" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+48(\\(%rip\\))?\[^\n\r]*%xmm3" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+64(\\(%rip\\))?\[^\n\r]*%xmm4" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+80(\\(%rip\\))?\[^\n\r]*%xmm5" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+96(\\(%rip\\))?\[^\n\r]*%xmm6" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+112(\\(%rip\\))?\[^\n\r]*%xmm7" } } */
|
||||
/* { dg-final { scan-assembler "aesencwide128kl\[ \\t\]+\[^\n\]*h1(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "sete" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*odata(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*odata\\+16(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*odata\\+32(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm3\[^\n\r]*odata\\+48(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm4\[^\n\r]*odata\\+64(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm5\[^\n\r]*odata\\+80(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm6\[^\n\r]*odata\\+96(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm7\[^\n\r]*odata\\+112(\\(%rip\\))?" } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
const char h1[48];
|
||||
const __m128i idata[8];
|
||||
__m128i odata[8];
|
||||
|
||||
unsigned char
|
||||
test_keylocker_7 (void)
|
||||
{
|
||||
return _mm_aesencwide128kl_u8 (odata, idata, h1);
|
||||
}
|
33
gcc/testsuite/gcc.target/i386/keylocker-aesencwide256kl.c
Normal file
33
gcc/testsuite/gcc.target/i386/keylocker-aesencwide256kl.c
Normal file
@ -0,0 +1,33 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mwidekl -O2" } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+16(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+32(\\(%rip\\))?\[^\n\r]*%xmm2" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+48(\\(%rip\\))?\[^\n\r]*%xmm3" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+64(\\(%rip\\))?\[^\n\r]*%xmm4" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+80(\\(%rip\\))?\[^\n\r]*%xmm5" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+96(\\(%rip\\))?\[^\n\r]*%xmm6" } } */
|
||||
/* { dg-final { scan-assembler "movdqu\[ \\t\]+\[^\n\]*idata\\+112(\\(%rip\\))?\[^\n\r]*%xmm7" } } */
|
||||
/* { dg-final { scan-assembler "aesencwide256kl\[ \\t\]+\[^\n\]*h1(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "sete" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*odata(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*odata\\+16(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*odata\\+32(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm3\[^\n\r]*odata\\+48(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm4\[^\n\r]*odata\\+64(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm5\[^\n\r]*odata\\+80(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm6\[^\n\r]*odata\\+96(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm7\[^\n\r]*odata\\+112(\\(%rip\\))?" } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
const char h1[48];
|
||||
const __m128i idata[8];
|
||||
__m128i odata[8];
|
||||
|
||||
unsigned char
|
||||
test_keylocker_8 (void)
|
||||
{
|
||||
return _mm_aesencwide256kl_u8 (odata, idata, h1);
|
||||
}
|
||||
|
29
gcc/testsuite/gcc.target/i386/keylocker-encodekey128.c
Normal file
29
gcc/testsuite/gcc.target/i386/keylocker-encodekey128.c
Normal file
@ -0,0 +1,29 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mkl -O2" } */
|
||||
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k1(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "movl\[ \\t\]+\[^\n\]*ctrl(\\(%rip\\))?\[^\n\r]*%eax" } } */
|
||||
/* { dg-final { scan-assembler "encodekey128\[ \\t\]+\[^\n\]*%eax\[^\n\r]*%eax" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*h2(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*h2\\+16(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*h2\\+32(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqa|movaps)\[ \\t\]+\[^\n\]*%xmm\[4-6\]\[^\n\r]*k2(\\(%rip\\))?" } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
unsigned int ctrl;
|
||||
char h2[48];
|
||||
__m128i k1, k2;
|
||||
|
||||
unsigned int
|
||||
test_keylocker_9 (void)
|
||||
{
|
||||
unsigned int ret;
|
||||
|
||||
ret = _mm_encodekey128_u32 (ctrl, k1, h2);
|
||||
|
||||
if (ret)
|
||||
k2 = (__m128i){0};
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
30
gcc/testsuite/gcc.target/i386/keylocker-encodekey256.c
Normal file
30
gcc/testsuite/gcc.target/i386/keylocker-encodekey256.c
Normal file
@ -0,0 +1,30 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mkl -O2" } */
|
||||
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k1(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k2(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
|
||||
/* { dg-final { scan-assembler "movl\[ \\t\]+\[^\n\]*ctrl(\\(%rip\\))?\[^\n\r]*%eax" } } */
|
||||
/* { dg-final { scan-assembler "encodekey256\[ \\t\]+\[^\n\]*%eax\[^\n\r]*%eax" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm0\[^\n\r]*h2(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*h2\\+16(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm2\[^\n\r]*h2\\+32(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqu|movups)\[ \\t\]+\[^\n\]*%xmm3\[^\n\r]*h2\\+48(\\(%rip\\))?" } } */
|
||||
/* { dg-final { scan-assembler "(?:movdqa|movaps)\[ \\t\]+\[^\n\]*%xmm\[4-6\]\[^\n\r]*k3(\\(%rip\\))?" } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
unsigned int ctrl;
|
||||
char h2[48];
|
||||
__m128i k1, k2, k3;
|
||||
|
||||
unsigned int
|
||||
test_keylocker_10 (void)
|
||||
{
|
||||
unsigned int ret;
|
||||
ret = _mm_encodekey256_u32 (ctrl, k1, k2, h2);
|
||||
|
||||
if (ret)
|
||||
k3 = (__m128i){0};
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
19
gcc/testsuite/gcc.target/i386/keylocker-loadiwkey.c
Normal file
19
gcc/testsuite/gcc.target/i386/keylocker-loadiwkey.c
Normal file
@ -0,0 +1,19 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mkl -O2" } */
|
||||
/* { dg-final { scan-assembler "movl\[ \\t\]+\[^\n\]*ctrl(\\(%rip\\))?\[^\n\r]*%eax" } } */
|
||||
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k2(\\(%rip\\))?\[^\n\r]*%xmm1" } } */
|
||||
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k3(\\(%rip\\))?\[^\n\r]*%xmm2" } } */
|
||||
/* { dg-final { scan-assembler "movdqa\[ \\t\]+\[^\n\]*k1(\\(%rip\\))?\[^\n\r]*%xmm0" } } */
|
||||
/* { dg-final { scan-assembler "loadiwkey\[ \\t\]+\[^\n\]*%xmm1\[^\n\r]*%xmm2" } } */
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
unsigned int ctrl;
|
||||
__m128i k1, k2, k3;
|
||||
|
||||
void
|
||||
test_keylocker_11 (void)
|
||||
{
|
||||
_mm_loadiwkey (ctrl, k1, k2, k3);
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
|
||||
with -O -std=c89 -pedantic-errors. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
|
||||
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16" } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
@ -103,7 +103,7 @@
|
||||
|
||||
|
||||
#ifndef DIFFERENT_PRAGMAS
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl")
|
||||
#endif
|
||||
|
||||
/* Following intrinsics require immediate arguments. They
|
||||
@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
|
||||
|
||||
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
|
||||
#ifdef DIFFERENT_PRAGMAS
|
||||
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
|
||||
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl")
|
||||
#endif
|
||||
#include <immintrin.h>
|
||||
test_1 (_cvtss_sh, unsigned short, float, 1)
|
||||
|
@ -708,6 +708,6 @@
|
||||
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
||||
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
|
||||
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16")
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl")
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
Loading…
x
Reference in New Issue
Block a user