mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-21 14:30:59 +08:00
Support Intel AVX-VNNI-INT8
gcc/ChangeLog * common/config/i386/cpuinfo.h (get_available_features): Detect avxvnniint8. * common/config/i386/i386-common.cc (OPTION_MASK_ISA2_AVXVNNIINT8_SET): New. (OPTION_MASK_ISA2_AVXVNNIINT8_UNSET): Ditto. (ix86_handle_option): Handle -mavxvnniint8. * common/config/i386/i386-cpuinfo.h (enum processor_features): Add FEATURE_AVXVNNIINT8. * common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for avxvnniint8. * config.gcc: Add avxvnniint8intrin.h. * config/i386/avxvnniint8intrin.h: New file. * config/i386/cpuid.h (bit_AVXVNNIINT8): New. * config/i386/i386-builtin.def: Add new builtins. * config/i386/i386-c.cc (ix86_target_macros_internal): Define __AVXVNNIINT8__. * config/i386/i386-options.cc (isa2_opts): Add -mavxvnniint8. (ix86_valid_target_attribute_inner_p): Handle avxvnniint8. * config/i386/i386-isa.def: Add DEF_PTA(AVXVNNIINT8) New.. * config/i386/i386.opt: Add option -mavxvnniint8. * config/i386/immintrin.h: Include avxvnniint8intrin.h. * config/i386/sse.md (UNSPEC_VPMADDUBSWACCD UNSPEC_VPMADDUBSWACCSSD,UNSPEC_VPMADDWDACCD, UNSPEC_VPMADDWDACCSSD): Rename according to new style. (vpdp<vpdotprodtype>_<mode>): New define_insn. * doc/extend.texi: Document avxvnniint8. * doc/invoke.texi: Document -mavxvnniint8. * doc/sourcebuild.texi: Document target avxvnniint8. gcc/testsuite/ChangeLog * g++.dg/other/i386-2.C: Add -mavxvnniint8. * g++.dg/other/i386-3.C: Ditto. * gcc.target/i386/avx-check.h: Add avxvnniint8 check. * gcc.target/i386/sse-12.c: Add -mavxvnniint8. * gcc.target/i386/sse-13.c: Ditto. * gcc.target/i386/sse-14.c: Ditto. * gcc.target/i386/sse-22.c: Ditto. * gcc.target/i386/sse-23.c: Ditto. * gcc.target/i386/funcspec-56.inc: Add new target attribute. * lib/target-supports.exp (check_effective_target_avxvnniint8): New. * gcc.target/i386/avxvnniint8-1.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbssd-2.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbssds-2.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbsud-2.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbsuds-2.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbuud-2.c: Ditto. * gcc.target/i386/avxvnniint8-vpdpbuuds-2.c: Ditto. Co-authored-by: Hongyu Wang <hongyu.wang@intel.com> Co-authored-by: Haochen Jiang <haochen.jiang@intel.com>
This commit is contained in:
parent
825d004138
commit
406675947d
@ -795,6 +795,8 @@ get_available_features (struct __processor_model *cpu_model,
|
||||
set_feature (FEATURE_AVXVNNI);
|
||||
if (eax & bit_AVXIFMA)
|
||||
set_feature (FEATURE_AVXIFMA);
|
||||
if (edx & bit_AVXVNNIINT8)
|
||||
set_feature (FEATURE_AVXVNNIINT8);
|
||||
}
|
||||
if (avx512_usable)
|
||||
{
|
||||
|
@ -108,6 +108,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
|
||||
#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
|
||||
#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
|
||||
#define OPTION_MASK_ISA2_AVXVNNIINT8_SET OPTION_MASK_ISA2_AVXVNNIINT8
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
|
||||
as -msse4.2. */
|
||||
@ -214,7 +215,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
(OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
|
||||
#define OPTION_MASK_ISA2_AVX2_UNSET \
|
||||
(OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \
|
||||
| OPTION_MASK_ISA2_AVX512F_UNSET)
|
||||
| OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
|
||||
#define OPTION_MASK_ISA_AVX512F_UNSET \
|
||||
(OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
|
||||
| OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
|
||||
@ -278,6 +279,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define OPTION_MASK_ISA2_KL_UNSET \
|
||||
(OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
|
||||
#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
|
||||
#define OPTION_MASK_ISA2_AVXVNNIINT8_UNSET OPTION_MASK_ISA2_AVXVNNIINT8
|
||||
|
||||
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
|
||||
as -mno-sse4.1. */
|
||||
@ -1142,6 +1144,24 @@ ix86_handle_option (struct gcc_options *opts,
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mavxvnniint8:
|
||||
if (value)
|
||||
{
|
||||
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNIINT8_SET;
|
||||
opts->x_ix86_isa_flags2_explicit |=
|
||||
OPTION_MASK_ISA2_AVXVNNIINT8_SET;
|
||||
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
|
||||
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
|
||||
}
|
||||
else
|
||||
{
|
||||
opts->x_ix86_isa_flags2 &=
|
||||
~OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
|
||||
opts->x_ix86_isa_flags2_explicit |=
|
||||
OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_mfma:
|
||||
if (value)
|
||||
{
|
||||
|
@ -241,6 +241,7 @@ enum processor_features
|
||||
FEATURE_X86_64_V3,
|
||||
FEATURE_X86_64_V4,
|
||||
FEATURE_AVXIFMA,
|
||||
FEATURE_AVXVNNIINT8,
|
||||
CPU_FEATURE_MAX
|
||||
};
|
||||
|
||||
|
@ -176,4 +176,6 @@ ISA_NAMES_TABLE_START
|
||||
ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
|
||||
ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
|
||||
ISA_NAMES_TABLE_ENTRY("avxifma", FEATURE_AVXIFMA, P_NONE, "-mavxifma")
|
||||
ISA_NAMES_TABLE_ENTRY("avxvnniint8", FEATURE_AVXVNNIINT8,
|
||||
P_NONE, "-mavxvnniint8")
|
||||
ISA_NAMES_TABLE_END
|
||||
|
@ -422,7 +422,7 @@ i[34567]86-*-* | x86_64-*-*)
|
||||
amxbf16intrin.h x86gprintrin.h uintrintrin.h
|
||||
hresetintrin.h keylockerintrin.h avxvnniintrin.h
|
||||
mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
|
||||
avxifmaintrin.h"
|
||||
avxifmaintrin.h avxvnniint8intrin.h"
|
||||
;;
|
||||
ia64-*-*)
|
||||
extra_headers=ia64intrin.h
|
||||
|
138
gcc/config/i386/avxvnniint8intrin.h
Normal file
138
gcc/config/i386/avxvnniint8intrin.h
Normal file
@ -0,0 +1,138 @@
|
||||
/* Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 3, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
Under Section 7 of GPL version 3, you are granted additional
|
||||
permissions described in the GCC Runtime Library Exception, version
|
||||
3.1, as published by the Free Software Foundation.
|
||||
|
||||
You should have received a copy of the GNU General Public License and
|
||||
a copy of the GCC Runtime Library Exception along with this program;
|
||||
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#if !defined _IMMINTRIN_H_INCLUDED
|
||||
#error "Never use <avxvnniint8vlintrin.h> directly; include <immintrin.h> instead."
|
||||
#endif
|
||||
|
||||
#ifndef _AVXVNNIINT8INTRIN_H_INCLUDED
|
||||
#define _AVXVNNIINT8INTRIN_H_INCLUDED
|
||||
|
||||
#if !defined(__AVXVNNIINT8__)
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avxvnniint8")
|
||||
#define __DISABLE_AVXVNNIINT8__
|
||||
#endif /* __AVXVNNIINT8__ */
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbssd_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbssd128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbssds_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbssds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbsud_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbsud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbsuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbsuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbuud_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbuud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m128i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_dpbuuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
|
||||
{
|
||||
return (__m128i)
|
||||
__builtin_ia32_vpdpbuuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbssd_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbssd256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbssds_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbssds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbsud_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbsud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbsuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbsuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbuud_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbuud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
extern __inline __m256i
|
||||
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_dpbuuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
|
||||
{
|
||||
return (__m256i)
|
||||
__builtin_ia32_vpdpbuuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
|
||||
}
|
||||
|
||||
#ifdef __DISABLE_AVXVNNIINT8__
|
||||
#undef __DISABLE_AVXVNNIINT8__
|
||||
#pragma GCC pop_options
|
||||
#endif /* __DISABLE_AVXVNNIINT8__ */
|
||||
|
||||
#endif /* __AVXVNNIINT8INTRIN_H_INCLUDED */
|
@ -49,6 +49,7 @@
|
||||
#define bit_RDRND (1 << 30)
|
||||
|
||||
/* %edx */
|
||||
#define bit_AVXVNNIINT8 (1 << 4)
|
||||
#define bit_CMPXCHG8B (1 << 8)
|
||||
#define bit_CMOV (1 << 15)
|
||||
#define bit_MMX (1 << 23)
|
||||
|
@ -2694,6 +2694,20 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
|
||||
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
|
||||
|
||||
/* AVXVNNIINT8 */
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v8si, "__builtin_ia32_vpdpbssd256", IX86_BUILTIN_VPDPBSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v8si, "__builtin_ia32_vpdpbssds256", IX86_BUILTIN_VPDPBSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v8si, "__builtin_ia32_vpdpbsud256", IX86_BUILTIN_VPDPBSUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v8si, "__builtin_ia32_vpdpbsuds256", IX86_BUILTIN_VPDPBSUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v8si, "__builtin_ia32_vpdpbuud256", IX86_BUILTIN_VPDPBUUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v8si, "__builtin_ia32_vpdpbuuds256", IX86_BUILTIN_VPDPBUUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v4si, "__builtin_ia32_vpdpbssd128", IX86_BUILTIN_VPDPBSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v4si, "__builtin_ia32_vpdpbssds128", IX86_BUILTIN_VPDPBSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v4si, "__builtin_ia32_vpdpbsud128", IX86_BUILTIN_VPDPBSUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v4si, "__builtin_ia32_vpdpbsuds128", IX86_BUILTIN_VPDPBSUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v4si, "__builtin_ia32_vpdpbuud128", IX86_BUILTIN_VPDPBUUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v4si, "__builtin_ia32_vpdpbuuds128", IX86_BUILTIN_VPDPBUUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
|
||||
|
||||
/* VPCLMULQDQ */
|
||||
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
|
||||
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
|
||||
|
@ -635,6 +635,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__AVXVNNI__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA)
|
||||
def_or_undef (parse_in, "__AVXIFMA__");
|
||||
if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNIINT8)
|
||||
def_or_undef (parse_in, "__AVXVNNIINT8__");
|
||||
if (TARGET_IAMCU)
|
||||
{
|
||||
def_or_undef (parse_in, "__iamcu");
|
||||
|
@ -110,3 +110,4 @@ DEF_PTA(WIDEKL)
|
||||
DEF_PTA(AVXVNNI)
|
||||
DEF_PTA(AVX512FP16)
|
||||
DEF_PTA(AVXIFMA)
|
||||
DEF_PTA(AVXVNNIINT8)
|
||||
|
@ -227,7 +227,8 @@ static struct ix86_target_opts isa2_opts[] =
|
||||
{ "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
|
||||
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
|
||||
{ "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
|
||||
{ "-mavxifma", OPTION_MASK_ISA2_AVXIFMA }
|
||||
{ "-mavxifma", OPTION_MASK_ISA2_AVXIFMA },
|
||||
{ "-mavxvnniint8", OPTION_MASK_ISA2_AVXVNNIINT8 }
|
||||
};
|
||||
static struct ix86_target_opts isa_opts[] =
|
||||
{
|
||||
@ -1074,6 +1075,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
|
||||
IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
|
||||
IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
|
||||
IX86_ATTR_ISA ("avxifma", OPT_mavxifma),
|
||||
IX86_ATTR_ISA ("avxvnniint8", OPT_mavxvnniint8),
|
||||
|
||||
/* enum options */
|
||||
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),
|
||||
|
@ -1219,3 +1219,8 @@ mavxifma
|
||||
Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and
|
||||
AVXIFMA built-in functions and code generation.
|
||||
|
||||
mavxvnniint8
|
||||
Target Mask(ISA2_AVXVNNIINT8) Var(ix86_isa_flags2) Save
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and
|
||||
AVXVNNIINT8 built-in functions and code generation.
|
||||
|
@ -46,6 +46,8 @@
|
||||
|
||||
#include <avxifmaintrin.h>
|
||||
|
||||
#include <avxvnniint8intrin.h>
|
||||
|
||||
#include <avx2intrin.h>
|
||||
|
||||
#include <avx512fintrin.h>
|
||||
|
@ -166,10 +166,10 @@
|
||||
UNSPEC_VPSHLDV
|
||||
|
||||
;; For AVX512VNNI support
|
||||
UNSPEC_VPMADDUBSWACCD
|
||||
UNSPEC_VPMADDUBSWACCSSD
|
||||
UNSPEC_VPMADDWDACCD
|
||||
UNSPEC_VPMADDWDACCSSD
|
||||
UNSPEC_VPDPBUSD
|
||||
UNSPEC_VPDPBUSDS
|
||||
UNSPEC_VPDPWSSD
|
||||
UNSPEC_VPDPWSSDS
|
||||
|
||||
;; For VAES support
|
||||
UNSPEC_VAESDEC
|
||||
@ -200,6 +200,13 @@
|
||||
UNSPEC_COMPLEX_FCMUL
|
||||
UNSPEC_COMPLEX_MASK
|
||||
|
||||
;; For AVX-VNNI-INT8 support
|
||||
UNSPEC_VPDPBSSD
|
||||
UNSPEC_VPDPBSSDS
|
||||
UNSPEC_VPDPBSUD
|
||||
UNSPEC_VPDPBSUDS
|
||||
UNSPEC_VPDPBUUD
|
||||
UNSPEC_VPDPBUUDS
|
||||
])
|
||||
|
||||
(define_c_enum "unspecv" [
|
||||
@ -28535,7 +28542,7 @@
|
||||
[(match_operand:V16SI 1 "register_operand" "0")
|
||||
(match_operand:V16SI 2 "register_operand" "v")
|
||||
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
|
||||
UNSPEC_VPMADDUBSWACCD))]
|
||||
UNSPEC_VPDPBUSD))]
|
||||
"TARGET_AVX512VNNI"
|
||||
"vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
|
||||
[(set_attr ("prefix") ("evex"))])
|
||||
@ -28546,7 +28553,7 @@
|
||||
[(match_operand:VI4_AVX2 1 "register_operand" "0,0")
|
||||
(match_operand:VI4_AVX2 2 "register_operand" "x,v")
|
||||
(match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
|
||||
UNSPEC_VPMADDUBSWACCD))]
|
||||
UNSPEC_VPDPBUSD))]
|
||||
"TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
|
||||
"@
|
||||
%{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3}
|
||||
@ -28561,7 +28568,7 @@
|
||||
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
|
||||
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
|
||||
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
|
||||
UNSPEC_VPMADDUBSWACCD)
|
||||
UNSPEC_VPDPBUSD)
|
||||
(match_dup 1)
|
||||
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
|
||||
"TARGET_AVX512VNNI"
|
||||
@ -28590,7 +28597,7 @@
|
||||
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
|
||||
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
|
||||
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
|
||||
] UNSPEC_VPMADDUBSWACCD)
|
||||
] UNSPEC_VPDPBUSD)
|
||||
(match_operand:VI4_AVX512VL 4 "const0_operand")
|
||||
(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
|
||||
"TARGET_AVX512VNNI"
|
||||
@ -28603,7 +28610,7 @@
|
||||
[(match_operand:V16SI 1 "register_operand" "0")
|
||||
(match_operand:V16SI 2 "register_operand" "v")
|
||||
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
|
||||
UNSPEC_VPMADDUBSWACCSSD))]
|
||||
UNSPEC_VPDPBUSDS))]
|
||||
"TARGET_AVX512VNNI"
|
||||
"vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
|
||||
[(set_attr ("prefix") ("evex"))])
|
||||
@ -28614,7 +28621,7 @@
|
||||
[(match_operand:VI4_AVX2 1 "register_operand" "0,0")
|
||||
(match_operand:VI4_AVX2 2 "register_operand" "x,v")
|
||||
(match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
|
||||
UNSPEC_VPMADDUBSWACCSSD))]
|
||||
UNSPEC_VPDPBUSDS))]
|
||||
"TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
|
||||
"@
|
||||
%{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3}
|
||||
@ -28629,7 +28636,7 @@
|
||||
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
|
||||
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
|
||||
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
|
||||
UNSPEC_VPMADDUBSWACCSSD)
|
||||
UNSPEC_VPDPBUSDS)
|
||||
(match_dup 1)
|
||||
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
|
||||
"TARGET_AVX512VNNI"
|
||||
@ -28658,7 +28665,7 @@
|
||||
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
|
||||
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
|
||||
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
|
||||
UNSPEC_VPMADDUBSWACCSSD)
|
||||
UNSPEC_VPDPBUSDS)
|
||||
(match_operand:VI4_AVX512VL 4 "const0_operand")
|
||||
(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
|
||||
"TARGET_AVX512VNNI"
|
||||
@ -28671,7 +28678,7 @@
|
||||
[(match_operand:V16SI 1 "register_operand" "0")
|
||||
(match_operand:V16SI 2 "register_operand" "v")
|
||||
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
|
||||
UNSPEC_VPMADDWDACCD))]
|
||||
UNSPEC_VPDPWSSD))]
|
||||
"TARGET_AVX512VNNI"
|
||||
"vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
|
||||
[(set_attr ("prefix") ("evex"))])
|
||||
@ -28682,7 +28689,7 @@
|
||||
[(match_operand:VI4_AVX2 1 "register_operand" "0,0")
|
||||
(match_operand:VI4_AVX2 2 "register_operand" "x,v")
|
||||
(match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
|
||||
UNSPEC_VPMADDWDACCD))]
|
||||
UNSPEC_VPDPWSSD))]
|
||||
"TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
|
||||
"@
|
||||
%{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3}
|
||||
@ -28697,7 +28704,7 @@
|
||||
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
|
||||
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
|
||||
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
|
||||
UNSPEC_VPMADDWDACCD)
|
||||
UNSPEC_VPDPWSSD)
|
||||
(match_dup 1)
|
||||
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
|
||||
"TARGET_AVX512VNNI"
|
||||
@ -28726,7 +28733,7 @@
|
||||
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
|
||||
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
|
||||
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
|
||||
UNSPEC_VPMADDWDACCD)
|
||||
UNSPEC_VPDPWSSD)
|
||||
(match_operand:VI4_AVX512VL 4 "const0_operand")
|
||||
(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
|
||||
"TARGET_AVX512VNNI"
|
||||
@ -28739,7 +28746,7 @@
|
||||
[(match_operand:V16SI 1 "register_operand" "0")
|
||||
(match_operand:V16SI 2 "register_operand" "v")
|
||||
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
|
||||
UNSPEC_VPMADDWDACCSSD))]
|
||||
UNSPEC_VPDPWSSDS))]
|
||||
"TARGET_AVX512VNNI"
|
||||
"vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
|
||||
[(set_attr ("prefix") ("evex"))])
|
||||
@ -28750,7 +28757,7 @@
|
||||
[(match_operand:VI4_AVX2 1 "register_operand" "0,0")
|
||||
(match_operand:VI4_AVX2 2 "register_operand" "x,v")
|
||||
(match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
|
||||
UNSPEC_VPMADDWDACCSSD))]
|
||||
UNSPEC_VPDPWSSDS))]
|
||||
"TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
|
||||
"@
|
||||
%{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3}
|
||||
@ -28765,7 +28772,7 @@
|
||||
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
|
||||
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
|
||||
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
|
||||
UNSPEC_VPMADDWDACCSSD)
|
||||
UNSPEC_VPDPWSSDS)
|
||||
(match_dup 1)
|
||||
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
|
||||
"TARGET_AVX512VNNI"
|
||||
@ -28794,7 +28801,7 @@
|
||||
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
|
||||
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
|
||||
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
|
||||
UNSPEC_VPMADDWDACCSSD)
|
||||
UNSPEC_VPDPWSSDS)
|
||||
(match_operand:VI4_AVX512VL 4 "const0_operand")
|
||||
(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
|
||||
"TARGET_AVX512VNNI"
|
||||
@ -29235,3 +29242,27 @@
|
||||
gcc_unreachable ();
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_int_iterator VPDOTPROD
|
||||
[UNSPEC_VPDPBSSD
|
||||
UNSPEC_VPDPBSSDS
|
||||
UNSPEC_VPDPBSUD
|
||||
UNSPEC_VPDPBSUDS
|
||||
UNSPEC_VPDPBUUD
|
||||
UNSPEC_VPDPBUUDS])
|
||||
|
||||
(define_int_attr vpdotprodtype
|
||||
[(UNSPEC_VPDPBSSD "bssd") (UNSPEC_VPDPBSSDS "bssds")
|
||||
(UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
|
||||
(UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
|
||||
|
||||
(define_insn "vpdp<vpdotprodtype>_<mode>"
|
||||
[(set (match_operand:VI4_AVX 0 "register_operand" "=x")
|
||||
(unspec:VI4_AVX
|
||||
[(match_operand:VI4_AVX 1 "register_operand" "0")
|
||||
(match_operand:VI4_AVX 2 "register_operand" "x")
|
||||
(match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")]
|
||||
VPDOTPROD))]
|
||||
"TARGET_AVXVNNIINT8"
|
||||
"vpdp<vpdotprodtype>\t{%3, %2, %0|%0, %2, %3}"
|
||||
[(set_attr "prefix" "vex")])
|
||||
|
@ -7065,6 +7065,11 @@ Enable/disable the generation of the AVXVNNI instructions.
|
||||
@cindex @code{target("avxifma")} function attribute, x86
|
||||
Enable/disable the generation of the AVXIFMA instructions.
|
||||
|
||||
@item avxvnniint8
|
||||
@itemx no-avxvnniint8
|
||||
@cindex @code{target("avxvnniint8")} function attribute, x86
|
||||
Enable/disable the generation of the AVXVNNIINT8 instructions.
|
||||
|
||||
@item cld
|
||||
@itemx no-cld
|
||||
@cindex @code{target("cld")} function attribute, x86
|
||||
|
@ -1436,7 +1436,7 @@ See RS/6000 and PowerPC Options.
|
||||
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
|
||||
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
|
||||
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
|
||||
-mavx512fp16 -mavxifma @gol
|
||||
-mavx512fp16 -mavxifma -mavxvnniint8 @gol
|
||||
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
|
||||
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
|
||||
-mkl -mwidekl @gol
|
||||
@ -32907,6 +32907,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@need 200
|
||||
@itemx -mavxifma
|
||||
@opindex mavxifma
|
||||
@need 200
|
||||
@itemx -mavxvnniint8
|
||||
@opindex mavxvnniint8
|
||||
These switches enable the use of instructions in the MMX, SSE,
|
||||
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
|
||||
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
|
||||
@ -32917,8 +32920,8 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
|
||||
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
|
||||
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
|
||||
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16,
|
||||
AVXIFMA or CLDEMOTE extended instruction sets. Each has a corresponding
|
||||
@option{-mno-} option to disable use of these instructions.
|
||||
AVXIFMA, AVXVNNIINT8 or CLDEMOTE extended instruction sets. Each has a
|
||||
corresponding @option{-mno-} option to disable use of these instructions.
|
||||
|
||||
These extensions are also available as built-in functions: see
|
||||
@ref{x86 Built-in Functions}, for details of the functions enabled and
|
||||
|
@ -2493,6 +2493,9 @@ Target supports the execution of @code{avx512vp2intersect} instructions.
|
||||
@item avxifma
|
||||
Target supports the execution of @code{avxifma} instructions.
|
||||
|
||||
@item avxvnniint8
|
||||
Target supports the execution of @code{avxvnniint8} instructions.
|
||||
|
||||
@item amx_tile
|
||||
Target supports the execution of @code{amx-tile} instructions.
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
|
||||
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
|
||||
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
|
||||
|
||||
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
|
||||
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,
|
||||
|
@ -25,6 +25,9 @@ main ()
|
||||
&& avx_os_support ()
|
||||
#ifdef AVXIFMA
|
||||
&& __builtin_cpu_supports ("avxifma")
|
||||
#endif
|
||||
#ifdef AVXVNNIINT8
|
||||
&& __builtin_cpu_supports ("avxvnniint8")
|
||||
#endif
|
||||
)
|
||||
{
|
||||
|
43
gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
Normal file
43
gcc/testsuite/gcc.target/i386/avxvnniint8-1.c
Normal file
@ -0,0 +1,43 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-mavxvnniint8 -O2" } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
|
||||
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
volatile __m256i x,y,z;
|
||||
volatile __m128i x_,y_,z_;
|
||||
volatile __mmask8 m;
|
||||
|
||||
void extern
|
||||
avxvnniint8_test (void)
|
||||
{
|
||||
x = _mm256_dpbssd_epi32 (x, y, z);
|
||||
x_ = _mm_dpbssd_epi32 (x_, y_, z_);
|
||||
|
||||
x = _mm256_dpbssds_epi32 (x, y, z);
|
||||
x_ = _mm_dpbssds_epi32 (x_, y_, z_);
|
||||
|
||||
x = _mm256_dpbsud_epi32 (x, y, z);
|
||||
x_ = _mm_dpbsud_epi32 (x_, y_, z_);
|
||||
|
||||
x = _mm256_dpbsuds_epi32 (x, y, z);
|
||||
x_ = _mm_dpbsuds_epi32 (x_, y_, z_);
|
||||
|
||||
x = _mm256_dpbuud_epi32 (x, y, z);
|
||||
x_ = _mm_dpbuud_epi32 (x_, y_, z_);
|
||||
|
||||
x = _mm256_dpbuuds_epi32 (x, y, z);
|
||||
x_ = _mm_dpbuuds_epi32 (x_, y_, z_);
|
||||
}
|
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
Normal file
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssd-2.c
Normal file
@ -0,0 +1,72 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavxvnniint8" } */
|
||||
/* { dg-require-effective-target avxvnniint8 } */
|
||||
#define AVXVNNIINT8
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx_test
|
||||
#endif
|
||||
|
||||
#include CHECK
|
||||
|
||||
static void
|
||||
CALC (int *r, int *dst, char *s1, char *s2, int size)
|
||||
{
|
||||
short tempres[32];
|
||||
for (int i = 0; i < size; i++) {
|
||||
tempres[i] = (short) s1[i] * (short) s2[i];
|
||||
}
|
||||
for (int i = 0; i < size / 4; i++) {
|
||||
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
|
||||
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
|
||||
r[i] = test;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
union256i_d res_256;
|
||||
union256i_b src2_256;
|
||||
union256i_b src1_256;
|
||||
int res_ref_256[8];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_256.a[i] = 10 + 3 * i + sign;
|
||||
src2_256.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
res_256.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
|
||||
res_256.x = _mm256_dpbssd_epi32 (res_256.x, src1_256.x, src2_256.x);
|
||||
if (check_union256i_d (res_256, res_ref_256))
|
||||
abort ();
|
||||
|
||||
union128i_d res_128;
|
||||
union128i_b src2_128;
|
||||
union128i_b src1_128;
|
||||
int res_ref_128[4];
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_128.a[i] = 10 + 3 * i * i + sign;
|
||||
src2_128.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
res_128.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
|
||||
res_128.x = _mm_dpbssd_epi32 (res_128.x, src1_128.x, src2_128.x);
|
||||
if (check_union128i_d (res_128, res_ref_128))
|
||||
abort ();
|
||||
}
|
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
Normal file
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbssds-2.c
Normal file
@ -0,0 +1,72 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavxvnniint8" } */
|
||||
/* { dg-require-effective-target avxvnniint8 } */
|
||||
#define AVXVNNIINT8
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx_test
|
||||
#endif
|
||||
|
||||
#include CHECK
|
||||
|
||||
static void
|
||||
CALC (int *r, int *dst, char *s1, char *s2, int size)
|
||||
{
|
||||
short tempres[32];
|
||||
for (int i = 0; i < size; i++) {
|
||||
tempres[i] = (short) s1[i] * (short) s2[i];
|
||||
}
|
||||
for (int i = 0; i < size / 4; i++) {
|
||||
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
|
||||
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
|
||||
r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
union256i_d res_256;
|
||||
union256i_b src2_256;
|
||||
union256i_b src1_256;
|
||||
int res_ref_256[8];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_256.a[i] = 10 + 3 * i + sign;
|
||||
src2_256.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
res_256.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
|
||||
res_256.x = _mm256_dpbssds_epi32 (res_256.x, src1_256.x, src2_256.x);
|
||||
if (check_union256i_d (res_256, res_ref_256))
|
||||
abort ();
|
||||
|
||||
union128i_d res_128;
|
||||
union128i_b src2_128;
|
||||
union128i_b src1_128;
|
||||
int res_ref_128[4];
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_128.a[i] = 10 + 3 * i * i + sign;
|
||||
src2_128.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
res_128.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
|
||||
res_128.x = _mm_dpbssds_epi32 (res_128.x, src1_128.x, src2_128.x);
|
||||
if (check_union128i_d (res_128, res_ref_128))
|
||||
abort ();
|
||||
}
|
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
Normal file
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsud-2.c
Normal file
@ -0,0 +1,72 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavxvnniint8" } */
|
||||
/* { dg-require-effective-target avxvnniint8 } */
|
||||
#define AVXVNNIINT8
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx_test
|
||||
#endif
|
||||
|
||||
#include CHECK
|
||||
|
||||
static void
|
||||
CALC (int *r, int *dst, char *s1, unsigned char *s2, int size)
|
||||
{
|
||||
short tempres[32];
|
||||
for (int i = 0; i < size; i++) {
|
||||
tempres[i] = (short) s1[i] * (unsigned short) s2[i];
|
||||
}
|
||||
for (int i = 0; i < size / 4; i++) {
|
||||
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
|
||||
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
|
||||
r[i] = test;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
union256i_d res_256;
|
||||
union256i_b src1_256;
|
||||
union256i_ub src2_256;
|
||||
int res_ref_256[8];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_256.a[i] = 10 + 3 * i + sign;
|
||||
src2_256.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
res_256.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
|
||||
res_256.x = _mm256_dpbsud_epi32 (res_256.x, src1_256.x, src2_256.x);
|
||||
if (check_union256i_d (res_256, res_ref_256))
|
||||
abort ();
|
||||
|
||||
union128i_d res_128;
|
||||
union128i_b src1_128;
|
||||
union128i_ub src2_128;
|
||||
int res_ref_128[4];
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_128.a[i] = 10 + 3 * i * i + sign;
|
||||
src2_128.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
res_128.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
|
||||
res_128.x = _mm_dpbsud_epi32 (res_128.x, src1_128.x, src2_128.x);
|
||||
if (check_union128i_d (res_128, res_ref_128))
|
||||
abort ();
|
||||
}
|
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
Normal file
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbsuds-2.c
Normal file
@ -0,0 +1,72 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavxvnniint8" } */
|
||||
/* { dg-require-effective-target avxvnniint8 } */
|
||||
#define AVXVNNIINT8
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx_test
|
||||
#endif
|
||||
|
||||
#include CHECK
|
||||
|
||||
static void
|
||||
CALC (int *r, int *dst, char *s1, unsigned char *s2, int size)
|
||||
{
|
||||
short tempres[32];
|
||||
for (int i = 0; i < size; i++) {
|
||||
tempres[i] = (short) s1[i] * (unsigned short) s2[i];
|
||||
}
|
||||
for (int i = 0; i < size / 4; i++) {
|
||||
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
|
||||
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
|
||||
r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
union256i_d res_256;
|
||||
union256i_b src1_256;
|
||||
union256i_ub src2_256;
|
||||
int res_ref_256[8];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_256.a[i] = 10 + 3 * i + sign;
|
||||
src2_256.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
res_256.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
|
||||
res_256.x = _mm256_dpbsuds_epi32 (res_256.x, src1_256.x, src2_256.x);
|
||||
if (check_union256i_d (res_256, res_ref_256))
|
||||
abort ();
|
||||
|
||||
union128i_d res_128;
|
||||
union128i_b src1_128;
|
||||
union128i_ub src2_128;
|
||||
int res_ref_128[4];
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_128.a[i] = 10 + 3 * i * i + sign;
|
||||
src2_128.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
res_128.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
|
||||
res_128.x = _mm_dpbsuds_epi32 (res_128.x, src1_128.x, src2_128.x);
|
||||
if (check_union128i_d (res_128, res_ref_128))
|
||||
abort ();
|
||||
}
|
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
Normal file
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuud-2.c
Normal file
@ -0,0 +1,72 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavxvnniint8" } */
|
||||
/* { dg-require-effective-target avxvnniint8 } */
|
||||
#define AVXVNNIINT8
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx_test
|
||||
#endif
|
||||
|
||||
#include CHECK
|
||||
|
||||
static void
|
||||
CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned char *s2, int size)
|
||||
{
|
||||
unsigned short tempres[32];
|
||||
for (int i = 0; i < size; i++) {
|
||||
tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
|
||||
}
|
||||
for (int i = 0; i < size / 4; i++) {
|
||||
unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
|
||||
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
|
||||
r[i] = test;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
union256i_ud res_256;
|
||||
union256i_ub src2_256;
|
||||
union256i_ub src1_256;
|
||||
unsigned int res_ref_256[8];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_256.a[i] = 10 + 3 * i + sign;
|
||||
src2_256.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
res_256.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
|
||||
res_256.x = _mm256_dpbuud_epi32 (res_256.x, src1_256.x, src2_256.x);
|
||||
if (check_union256i_ud (res_256, res_ref_256))
|
||||
abort ();
|
||||
|
||||
union128i_ud res_128;
|
||||
union128i_ub src2_128;
|
||||
union128i_ub src1_128;
|
||||
unsigned int res_ref_128[4];
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_128.a[i] = 10 + 3 * i * i + sign;
|
||||
src2_128.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
res_128.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
|
||||
res_128.x = _mm_dpbuud_epi32 (res_128.x, src1_128.x, src2_128.x);
|
||||
if (check_union128i_ud (res_128, res_ref_128))
|
||||
abort ();
|
||||
}
|
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
Normal file
72
gcc/testsuite/gcc.target/i386/avxvnniint8-vpdpbuuds-2.c
Normal file
@ -0,0 +1,72 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-options "-O2 -mavxvnniint8" } */
|
||||
/* { dg-require-effective-target avxvnniint8 } */
|
||||
#define AVXVNNIINT8
|
||||
#ifndef CHECK
|
||||
#define CHECK "avx-check.h"
|
||||
#endif
|
||||
|
||||
#ifndef TEST
|
||||
#define TEST avx_test
|
||||
#endif
|
||||
|
||||
#include CHECK
|
||||
|
||||
static void
|
||||
CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned char *s2, int size)
|
||||
{
|
||||
unsigned short tempres[32];
|
||||
for (int i = 0; i < size; i++) {
|
||||
tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
|
||||
}
|
||||
for (int i = 0; i < size / 4; i++) {
|
||||
unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
|
||||
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
|
||||
r[i] = test > 0xFFFFFFFF ? 0xFFFFFFFF : test;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
TEST (void)
|
||||
{
|
||||
int i;
|
||||
union256i_ud res_256;
|
||||
union256i_ub src2_256;
|
||||
union256i_ub src1_256;
|
||||
unsigned int res_ref_256[8];
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_256.a[i] = 10 + 3 * i + sign;
|
||||
src2_256.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 8; i++)
|
||||
res_256.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
|
||||
res_256.x = _mm256_dpbuuds_epi32 (res_256.x, src1_256.x, src2_256.x);
|
||||
if (check_union256i_ud (res_256, res_ref_256))
|
||||
abort ();
|
||||
|
||||
union128i_ud res_128;
|
||||
union128i_ub src2_128;
|
||||
union128i_ub src1_128;
|
||||
unsigned int res_ref_128[4];
|
||||
|
||||
for (i = 0; i < 16; i++)
|
||||
{
|
||||
int sign = i % 2 ? 1 : -1;
|
||||
src1_128.a[i] = 10 + 3 * i * i + sign;
|
||||
src2_128.a[i] = sign * 10 * i * i;
|
||||
}
|
||||
|
||||
for (i = 0; i < 4; i++)
|
||||
res_128.a[i] = 0x7fffffff;
|
||||
|
||||
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
|
||||
res_128.x = _mm_dpbuuds_epi32 (res_128.x, src1_128.x, src2_128.x);
|
||||
if (check_union128i_ud (res_128, res_ref_128))
|
||||
abort ();
|
||||
}
|
@ -81,6 +81,7 @@ extern void test_widekl (void) __attribute__((__target__("widekl")));
|
||||
extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
|
||||
extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16")));
|
||||
extern void test_avxifma (void) __attribute__((__target__("avxifma")));
|
||||
extern void test_avxvnniint8 (void) __attribute__((__target__("avxvnniint8")));
|
||||
|
||||
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
|
||||
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
|
||||
@ -163,6 +164,7 @@ extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
|
||||
extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
|
||||
extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16")));
|
||||
extern void test_no_avxifma (void) __attribute__((__target__("no-avxifma")));
|
||||
extern void test_no_avxvnniint8 (void) __attribute__((__target__("no-avxvnniint8")));
|
||||
|
||||
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
|
||||
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));
|
||||
|
@ -3,7 +3,7 @@
|
||||
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
|
||||
with -O -std=c89 -pedantic-errors. */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma" } */
|
||||
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
|
||||
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
|
||||
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
|
||||
/* { dg-add-options bind_pic_locally } */
|
||||
|
||||
#include <mm_malloc.h>
|
||||
|
@ -103,7 +103,7 @@
|
||||
|
||||
|
||||
#ifndef DIFFERENT_PRAGMAS
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
|
||||
#endif
|
||||
|
||||
/* Following intrinsics require immediate arguments. They
|
||||
@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
|
||||
|
||||
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
|
||||
#ifdef DIFFERENT_PRAGMAS
|
||||
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
|
||||
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
|
||||
#endif
|
||||
#include <immintrin.h>
|
||||
test_1 (_cvtss_sh, unsigned short, float, 1)
|
||||
|
@ -843,6 +843,6 @@
|
||||
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
|
||||
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
|
||||
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
|
||||
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
@ -9534,6 +9534,18 @@ proc check_effective_target_avxifma { } {
|
||||
} "-O0 -mavxifma" ]
|
||||
}
|
||||
|
||||
# Return 1 if avxvnniint8 instructions can be compiled.
|
||||
proc check_effective_target_avxvnniint8 { } {
|
||||
return [check_no_compiler_messages avxvnniint8 object {
|
||||
typedef int __v8si __attribute__ ((__vector_size__ (32)));
|
||||
__v8si
|
||||
_mm256_dpbssd_epi32 (__v8si __A, __v8si __B, __v8si __C)
|
||||
{
|
||||
return __builtin_ia32_vpdpbssd256 (__A, __B, __C);
|
||||
}
|
||||
} "-O0 -mavxvnniint8" ]
|
||||
}
|
||||
|
||||
# Return 1 if sse instructions can be compiled.
|
||||
proc check_effective_target_sse { } {
|
||||
return [check_no_compiler_messages sse object {
|
||||
|
Loading…
x
Reference in New Issue
Block a user