Support Intel AVX-VNNI-INT8

gcc/ChangeLog

	* common/config/i386/cpuinfo.h (get_available_features): Detect
	avxvnniint8.
	* common/config/i386/i386-common.cc
	(OPTION_MASK_ISA2_AVXVNNIINT8_SET): New.
	(OPTION_MASK_ISA2_AVXVNNIINT8_UNSET): Ditto.
	(ix86_handle_option): Handle -mavxvnniint8.
	* common/config/i386/i386-cpuinfo.h (enum processor_features):
	Add FEATURE_AVXVNNIINT8.
	* common/config/i386/i386-isas.h: Add ISA_NAME_TABLE_ENTRY for
	avxvnniint8.
	* config.gcc: Add avxvnniint8intrin.h.
	* config/i386/avxvnniint8intrin.h: New file.
	* config/i386/cpuid.h (bit_AVXVNNIINT8): New.
	* config/i386/i386-builtin.def: Add new builtins.
	* config/i386/i386-c.cc (ix86_target_macros_internal): Define
	__AVXVNNIINT8__.
	* config/i386/i386-options.cc (isa2_opts): Add -mavxvnniint8.
	(ix86_valid_target_attribute_inner_p): Handle avxvnniint8.
	* config/i386/i386-isa.def: Add DEF_PTA(AVXVNNIINT8) New..
	* config/i386/i386.opt: Add option -mavxvnniint8.
	* config/i386/immintrin.h: Include avxvnniint8intrin.h.
	* config/i386/sse.md (UNSPEC_VPMADDUBSWACCD
	UNSPEC_VPMADDUBSWACCSSD,UNSPEC_VPMADDWDACCD,
	UNSPEC_VPMADDWDACCSSD): Rename according to new style.
	(vpdp<vpdotprodtype>_<mode>): New define_insn.
	* doc/extend.texi: Document avxvnniint8.
	* doc/invoke.texi: Document -mavxvnniint8.
	* doc/sourcebuild.texi: Document target avxvnniint8.

gcc/testsuite/ChangeLog

	* g++.dg/other/i386-2.C: Add -mavxvnniint8.
	* g++.dg/other/i386-3.C: Ditto.
	* gcc.target/i386/avx-check.h: Add avxvnniint8 check.
	* gcc.target/i386/sse-12.c: Add -mavxvnniint8.
	* gcc.target/i386/sse-13.c: Ditto.
	* gcc.target/i386/sse-14.c: Ditto.
	* gcc.target/i386/sse-22.c: Ditto.
	* gcc.target/i386/sse-23.c: Ditto.
	* gcc.target/i386/funcspec-56.inc: Add new target attribute.
	* lib/target-supports.exp
	(check_effective_target_avxvnniint8): New.
	* gcc.target/i386/avxvnniint8-1.c: Ditto.
	* gcc.target/i386/avxvnniint8-vpdpbssd-2.c: Ditto.
	* gcc.target/i386/avxvnniint8-vpdpbssds-2.c: Ditto.
	* gcc.target/i386/avxvnniint8-vpdpbsud-2.c: Ditto.
	* gcc.target/i386/avxvnniint8-vpdpbsuds-2.c: Ditto.
	* gcc.target/i386/avxvnniint8-vpdpbuud-2.c: Ditto.
	* gcc.target/i386/avxvnniint8-vpdpbuuds-2.c: Ditto.

Co-authored-by: Hongyu Wang <hongyu.wang@intel.com>
Co-authored-by: Haochen Jiang <haochen.jiang@intel.com>
This commit is contained in:
Kong Lingling 2021-09-29 09:48:20 +08:00 committed by Haochen Jiang
parent 825d004138
commit 406675947d
34 changed files with 758 additions and 34 deletions

View File

@ -795,6 +795,8 @@ get_available_features (struct __processor_model *cpu_model,
set_feature (FEATURE_AVXVNNI);
if (eax & bit_AVXIFMA)
set_feature (FEATURE_AVXIFMA);
if (edx & bit_AVXVNNIINT8)
set_feature (FEATURE_AVXVNNIINT8);
}
if (avx512_usable)
{

View File

@ -108,6 +108,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_AMX_TILE_SET OPTION_MASK_ISA2_AMX_TILE
#define OPTION_MASK_ISA2_AMX_INT8_SET OPTION_MASK_ISA2_AMX_INT8
#define OPTION_MASK_ISA2_AMX_BF16_SET OPTION_MASK_ISA2_AMX_BF16
#define OPTION_MASK_ISA2_AVXVNNIINT8_SET OPTION_MASK_ISA2_AVXVNNIINT8
/* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
as -msse4.2. */
@ -214,7 +215,7 @@ along with GCC; see the file COPYING3. If not see
(OPTION_MASK_ISA_AVX2 | OPTION_MASK_ISA_AVX512F_UNSET)
#define OPTION_MASK_ISA2_AVX2_UNSET \
(OPTION_MASK_ISA2_AVXIFMA_UNSET | OPTION_MASK_ISA2_AVXVNNI_UNSET \
| OPTION_MASK_ISA2_AVX512F_UNSET)
| OPTION_MASK_ISA2_AVXVNNIINT8_UNSET | OPTION_MASK_ISA2_AVX512F_UNSET)
#define OPTION_MASK_ISA_AVX512F_UNSET \
(OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD_UNSET \
| OPTION_MASK_ISA_AVX512PF_UNSET | OPTION_MASK_ISA_AVX512ER_UNSET \
@ -278,6 +279,7 @@ along with GCC; see the file COPYING3. If not see
#define OPTION_MASK_ISA2_KL_UNSET \
(OPTION_MASK_ISA2_KL | OPTION_MASK_ISA2_WIDEKL_UNSET)
#define OPTION_MASK_ISA2_WIDEKL_UNSET OPTION_MASK_ISA2_WIDEKL
#define OPTION_MASK_ISA2_AVXVNNIINT8_UNSET OPTION_MASK_ISA2_AVXVNNIINT8
/* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
as -mno-sse4.1. */
@ -1142,6 +1144,24 @@ ix86_handle_option (struct gcc_options *opts,
}
return true;
case OPT_mavxvnniint8:
if (value)
{
opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_AVXVNNIINT8_SET;
opts->x_ix86_isa_flags2_explicit |=
OPTION_MASK_ISA2_AVXVNNIINT8_SET;
opts->x_ix86_isa_flags |= OPTION_MASK_ISA_AVX2_SET;
opts->x_ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX2_SET;
}
else
{
opts->x_ix86_isa_flags2 &=
~OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
opts->x_ix86_isa_flags2_explicit |=
OPTION_MASK_ISA2_AVXVNNIINT8_UNSET;
}
return true;
case OPT_mfma:
if (value)
{

View File

@ -241,6 +241,7 @@ enum processor_features
FEATURE_X86_64_V3,
FEATURE_X86_64_V4,
FEATURE_AVXIFMA,
FEATURE_AVXVNNIINT8,
CPU_FEATURE_MAX
};

View File

@ -176,4 +176,6 @@ ISA_NAMES_TABLE_START
ISA_NAMES_TABLE_ENTRY("x86-64-v3", FEATURE_X86_64_V3, P_X86_64_V3, NULL)
ISA_NAMES_TABLE_ENTRY("x86-64-v4", FEATURE_X86_64_V4, P_X86_64_V4, NULL)
ISA_NAMES_TABLE_ENTRY("avxifma", FEATURE_AVXIFMA, P_NONE, "-mavxifma")
ISA_NAMES_TABLE_ENTRY("avxvnniint8", FEATURE_AVXVNNIINT8,
P_NONE, "-mavxvnniint8")
ISA_NAMES_TABLE_END

View File

@ -422,7 +422,7 @@ i[34567]86-*-* | x86_64-*-*)
amxbf16intrin.h x86gprintrin.h uintrintrin.h
hresetintrin.h keylockerintrin.h avxvnniintrin.h
mwaitintrin.h avx512fp16intrin.h avx512fp16vlintrin.h
avxifmaintrin.h"
avxifmaintrin.h avxvnniint8intrin.h"
;;
ia64-*-*)
extra_headers=ia64intrin.h

View File

@ -0,0 +1,138 @@
/* Copyright (C) 2020 Free Software Foundation, Inc.
This file is part of GCC.
GCC is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3, or (at your option)
any later version.
GCC is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
Under Section 7 of GPL version 3, you are granted additional
permissions described in the GCC Runtime Library Exception, version
3.1, as published by the Free Software Foundation.
You should have received a copy of the GNU General Public License and
a copy of the GCC Runtime Library Exception along with this program;
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
#if !defined _IMMINTRIN_H_INCLUDED
#error "Never use <avxvnniint8vlintrin.h> directly; include <immintrin.h> instead."
#endif
#ifndef _AVXVNNIINT8INTRIN_H_INCLUDED
#define _AVXVNNIINT8INTRIN_H_INCLUDED
#if !defined(__AVXVNNIINT8__)
#pragma GCC push_options
#pragma GCC target("avxvnniint8")
#define __DISABLE_AVXVNNIINT8__
#endif /* __AVXVNNIINT8__ */
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbssd_epi32 (__m128i __W, __m128i __A, __m128i __B)
{
return (__m128i)
__builtin_ia32_vpdpbssd128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbssds_epi32 (__m128i __W, __m128i __A, __m128i __B)
{
return (__m128i)
__builtin_ia32_vpdpbssds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbsud_epi32 (__m128i __W, __m128i __A, __m128i __B)
{
return (__m128i)
__builtin_ia32_vpdpbsud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbsuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
{
return (__m128i)
__builtin_ia32_vpdpbsuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbuud_epi32 (__m128i __W, __m128i __A, __m128i __B)
{
return (__m128i)
__builtin_ia32_vpdpbuud128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
}
extern __inline __m128i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_dpbuuds_epi32 (__m128i __W, __m128i __A, __m128i __B)
{
return (__m128i)
__builtin_ia32_vpdpbuuds128 ((__v4si) __W, (__v4si) __A, (__v4si) __B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbssd_epi32 (__m256i __W, __m256i __A, __m256i __B)
{
return (__m256i)
__builtin_ia32_vpdpbssd256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbssds_epi32 (__m256i __W, __m256i __A, __m256i __B)
{
return (__m256i)
__builtin_ia32_vpdpbssds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbsud_epi32 (__m256i __W, __m256i __A, __m256i __B)
{
return (__m256i)
__builtin_ia32_vpdpbsud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbsuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
{
return (__m256i)
__builtin_ia32_vpdpbsuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbuud_epi32 (__m256i __W, __m256i __A, __m256i __B)
{
return (__m256i)
__builtin_ia32_vpdpbuud256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
}
extern __inline __m256i
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_dpbuuds_epi32 (__m256i __W, __m256i __A, __m256i __B)
{
return (__m256i)
__builtin_ia32_vpdpbuuds256 ((__v8si) __W, (__v8si) __A, (__v8si) __B);
}
#ifdef __DISABLE_AVXVNNIINT8__
#undef __DISABLE_AVXVNNIINT8__
#pragma GCC pop_options
#endif /* __DISABLE_AVXVNNIINT8__ */
#endif /* __AVXVNNIINT8INTRIN_H_INCLUDED */

View File

@ -49,6 +49,7 @@
#define bit_RDRND (1 << 30)
/* %edx */
#define bit_AVXVNNIINT8 (1 << 4)
#define bit_CMPXCHG8B (1 << 8)
#define bit_CMOV (1 << 15)
#define bit_MMX (1 << 23)

View File

@ -2694,6 +2694,20 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_mask, "__builtin_ia32_vpdpwssds_v4si_mask", IX86_BUILTIN_VPDPWSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
/* AVXVNNIINT8 */
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v8si, "__builtin_ia32_vpdpbssd256", IX86_BUILTIN_VPDPBSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v8si, "__builtin_ia32_vpdpbssds256", IX86_BUILTIN_VPDPBSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v8si, "__builtin_ia32_vpdpbsud256", IX86_BUILTIN_VPDPBSUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v8si, "__builtin_ia32_vpdpbsuds256", IX86_BUILTIN_VPDPBSUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v8si, "__builtin_ia32_vpdpbuud256", IX86_BUILTIN_VPDPBUUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v8si, "__builtin_ia32_vpdpbuuds256", IX86_BUILTIN_VPDPBUUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssd_v4si, "__builtin_ia32_vpdpbssd128", IX86_BUILTIN_VPDPBSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbssds_v4si, "__builtin_ia32_vpdpbssds128", IX86_BUILTIN_VPDPBSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsud_v4si, "__builtin_ia32_vpdpbsud128", IX86_BUILTIN_VPDPBSUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbsuds_v4si, "__builtin_ia32_vpdpbsuds128", IX86_BUILTIN_VPDPBSUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuud_v4si, "__builtin_ia32_vpdpbuud128", IX86_BUILTIN_VPDPBUUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8, CODE_FOR_vpdpbuuds_v4si, "__builtin_ia32_vpdpbuuds128", IX86_BUILTIN_VPDPBUUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
/* VPCLMULQDQ */
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)

View File

@ -635,6 +635,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__AVXVNNI__");
if (isa_flag2 & OPTION_MASK_ISA2_AVXIFMA)
def_or_undef (parse_in, "__AVXIFMA__");
if (isa_flag2 & OPTION_MASK_ISA2_AVXVNNIINT8)
def_or_undef (parse_in, "__AVXVNNIINT8__");
if (TARGET_IAMCU)
{
def_or_undef (parse_in, "__iamcu");

View File

@ -110,3 +110,4 @@ DEF_PTA(WIDEKL)
DEF_PTA(AVXVNNI)
DEF_PTA(AVX512FP16)
DEF_PTA(AVXIFMA)
DEF_PTA(AVXVNNIINT8)

View File

@ -227,7 +227,8 @@ static struct ix86_target_opts isa2_opts[] =
{ "-mwidekl", OPTION_MASK_ISA2_WIDEKL },
{ "-mavxvnni", OPTION_MASK_ISA2_AVXVNNI },
{ "-mavx512fp16", OPTION_MASK_ISA2_AVX512FP16 },
{ "-mavxifma", OPTION_MASK_ISA2_AVXIFMA }
{ "-mavxifma", OPTION_MASK_ISA2_AVXIFMA },
{ "-mavxvnniint8", OPTION_MASK_ISA2_AVXVNNIINT8 }
};
static struct ix86_target_opts isa_opts[] =
{
@ -1074,6 +1075,7 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
IX86_ATTR_ISA ("avxvnni", OPT_mavxvnni),
IX86_ATTR_ISA ("avx512fp16", OPT_mavx512fp16),
IX86_ATTR_ISA ("avxifma", OPT_mavxifma),
IX86_ATTR_ISA ("avxvnniint8", OPT_mavxvnniint8),
/* enum options */
IX86_ATTR_ENUM ("fpmath=", OPT_mfpmath_),

View File

@ -1219,3 +1219,8 @@ mavxifma
Target Mask(ISA2_AVXIFMA) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2, and
AVXIFMA built-in functions and code generation.
mavxvnniint8
Target Mask(ISA2_AVXVNNIINT8) Var(ix86_isa_flags2) Save
Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2 and
AVXVNNIINT8 built-in functions and code generation.

View File

@ -46,6 +46,8 @@
#include <avxifmaintrin.h>
#include <avxvnniint8intrin.h>
#include <avx2intrin.h>
#include <avx512fintrin.h>

View File

@ -166,10 +166,10 @@
UNSPEC_VPSHLDV
;; For AVX512VNNI support
UNSPEC_VPMADDUBSWACCD
UNSPEC_VPMADDUBSWACCSSD
UNSPEC_VPMADDWDACCD
UNSPEC_VPMADDWDACCSSD
UNSPEC_VPDPBUSD
UNSPEC_VPDPBUSDS
UNSPEC_VPDPWSSD
UNSPEC_VPDPWSSDS
;; For VAES support
UNSPEC_VAESDEC
@ -200,6 +200,13 @@
UNSPEC_COMPLEX_FCMUL
UNSPEC_COMPLEX_MASK
;; For AVX-VNNI-INT8 support
UNSPEC_VPDPBSSD
UNSPEC_VPDPBSSDS
UNSPEC_VPDPBSUD
UNSPEC_VPDPBSUDS
UNSPEC_VPDPBUUD
UNSPEC_VPDPBUUDS
])
(define_c_enum "unspecv" [
@ -28535,7 +28542,7 @@
[(match_operand:V16SI 1 "register_operand" "0")
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDUBSWACCD))]
UNSPEC_VPDPBUSD))]
"TARGET_AVX512VNNI"
"vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@ -28546,7 +28553,7 @@
[(match_operand:VI4_AVX2 1 "register_operand" "0,0")
(match_operand:VI4_AVX2 2 "register_operand" "x,v")
(match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
UNSPEC_VPMADDUBSWACCD))]
UNSPEC_VPDPBUSD))]
"TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
"@
%{vex%} vpdpbusd\t{%3, %2, %0|%0, %2, %3}
@ -28561,7 +28568,7 @@
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDUBSWACCD)
UNSPEC_VPDPBUSD)
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX512VNNI"
@ -28590,7 +28597,7 @@
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")
] UNSPEC_VPMADDUBSWACCD)
] UNSPEC_VPDPBUSD)
(match_operand:VI4_AVX512VL 4 "const0_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
"TARGET_AVX512VNNI"
@ -28603,7 +28610,7 @@
[(match_operand:V16SI 1 "register_operand" "0")
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDUBSWACCSSD))]
UNSPEC_VPDPBUSDS))]
"TARGET_AVX512VNNI"
"vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@ -28614,7 +28621,7 @@
[(match_operand:VI4_AVX2 1 "register_operand" "0,0")
(match_operand:VI4_AVX2 2 "register_operand" "x,v")
(match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
UNSPEC_VPMADDUBSWACCSSD))]
UNSPEC_VPDPBUSDS))]
"TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
"@
%{vex%} vpdpbusds\t{%3, %2, %0|%0, %2, %3}
@ -28629,7 +28636,7 @@
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDUBSWACCSSD)
UNSPEC_VPDPBUSDS)
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX512VNNI"
@ -28658,7 +28665,7 @@
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDUBSWACCSSD)
UNSPEC_VPDPBUSDS)
(match_operand:VI4_AVX512VL 4 "const0_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
"TARGET_AVX512VNNI"
@ -28671,7 +28678,7 @@
[(match_operand:V16SI 1 "register_operand" "0")
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDWDACCD))]
UNSPEC_VPDPWSSD))]
"TARGET_AVX512VNNI"
"vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@ -28682,7 +28689,7 @@
[(match_operand:VI4_AVX2 1 "register_operand" "0,0")
(match_operand:VI4_AVX2 2 "register_operand" "x,v")
(match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
UNSPEC_VPMADDWDACCD))]
UNSPEC_VPDPWSSD))]
"TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
"@
%{vex%} vpdpwssd\t{%3, %2, %0|%0, %2, %3}
@ -28697,7 +28704,7 @@
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDWDACCD)
UNSPEC_VPDPWSSD)
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX512VNNI"
@ -28726,7 +28733,7 @@
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDWDACCD)
UNSPEC_VPDPWSSD)
(match_operand:VI4_AVX512VL 4 "const0_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
"TARGET_AVX512VNNI"
@ -28739,7 +28746,7 @@
[(match_operand:V16SI 1 "register_operand" "0")
(match_operand:V16SI 2 "register_operand" "v")
(match_operand:V16SI 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDWDACCSSD))]
UNSPEC_VPDPWSSDS))]
"TARGET_AVX512VNNI"
"vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
[(set_attr ("prefix") ("evex"))])
@ -28750,7 +28757,7 @@
[(match_operand:VI4_AVX2 1 "register_operand" "0,0")
(match_operand:VI4_AVX2 2 "register_operand" "x,v")
(match_operand:VI4_AVX2 3 "nonimmediate_operand" "xm,vm")]
UNSPEC_VPMADDWDACCSSD))]
UNSPEC_VPDPWSSDS))]
"TARGET_AVXVNNI || (TARGET_AVX512VNNI && TARGET_AVX512VL)"
"@
%{vex%} vpdpwssds\t{%3, %2, %0|%0, %2, %3}
@ -28765,7 +28772,7 @@
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDWDACCSSD)
UNSPEC_VPDPWSSDS)
(match_dup 1)
(match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
"TARGET_AVX512VNNI"
@ -28794,7 +28801,7 @@
[(match_operand:VI4_AVX512VL 1 "register_operand" "0")
(match_operand:VI4_AVX512VL 2 "register_operand" "v")
(match_operand:VI4_AVX512VL 3 "nonimmediate_operand" "vm")]
UNSPEC_VPMADDWDACCSSD)
UNSPEC_VPDPWSSDS)
(match_operand:VI4_AVX512VL 4 "const0_operand")
(match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
"TARGET_AVX512VNNI"
@ -29235,3 +29242,27 @@
gcc_unreachable ();
DONE;
})
(define_int_iterator VPDOTPROD
[UNSPEC_VPDPBSSD
UNSPEC_VPDPBSSDS
UNSPEC_VPDPBSUD
UNSPEC_VPDPBSUDS
UNSPEC_VPDPBUUD
UNSPEC_VPDPBUUDS])
(define_int_attr vpdotprodtype
[(UNSPEC_VPDPBSSD "bssd") (UNSPEC_VPDPBSSDS "bssds")
(UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
(UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
(define_insn "vpdp<vpdotprodtype>_<mode>"
[(set (match_operand:VI4_AVX 0 "register_operand" "=x")
(unspec:VI4_AVX
[(match_operand:VI4_AVX 1 "register_operand" "0")
(match_operand:VI4_AVX 2 "register_operand" "x")
(match_operand:VI4_AVX 3 "nonimmediate_operand" "xm")]
VPDOTPROD))]
"TARGET_AVXVNNIINT8"
"vpdp<vpdotprodtype>\t{%3, %2, %0|%0, %2, %3}"
[(set_attr "prefix" "vex")])

View File

@ -7065,6 +7065,11 @@ Enable/disable the generation of the AVXVNNI instructions.
@cindex @code{target("avxifma")} function attribute, x86
Enable/disable the generation of the AVXIFMA instructions.
@item avxvnniint8
@itemx no-avxvnniint8
@cindex @code{target("avxvnniint8")} function attribute, x86
Enable/disable the generation of the AVXVNNIINT8 instructions.
@item cld
@itemx no-cld
@cindex @code{target("cld")} function attribute, x86

View File

@ -1436,7 +1436,7 @@ See RS/6000 and PowerPC Options.
-mavx5124fmaps -mavx512vnni -mavx5124vnniw -mprfchw -mrdpid @gol
-mrdseed -msgx -mavx512vp2intersect -mserialize -mtsxldtrk@gol
-mamx-tile -mamx-int8 -mamx-bf16 -muintr -mhreset -mavxvnni@gol
-mavx512fp16 -mavxifma @gol
-mavx512fp16 -mavxifma -mavxvnniint8 @gol
-mcldemote -mms-bitfields -mno-align-stringops -minline-all-stringops @gol
-minline-stringops-dynamically -mstringop-strategy=@var{alg} @gol
-mkl -mwidekl @gol
@ -32907,6 +32907,9 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
@need 200
@itemx -mavxifma
@opindex mavxifma
@need 200
@itemx -mavxvnniint8
@opindex mavxvnniint8
These switches enable the use of instructions in the MMX, SSE,
SSE2, SSE3, SSSE3, SSE4, SSE4A, SSE4.1, SSE4.2, AVX, AVX2, AVX512F, AVX512PF,
AVX512ER, AVX512CD, AVX512VL, AVX512BW, AVX512DQ, AVX512IFMA, AVX512VBMI, SHA,
@ -32917,8 +32920,8 @@ XSAVEOPT, XSAVEC, XSAVES, RTM, HLE, TBM, MWAITX, CLZERO, PKU, AVX512VBMI2,
GFNI, VAES, WAITPKG, VPCLMULQDQ, AVX512BITALG, MOVDIRI, MOVDIR64B, AVX512BF16,
ENQCMD, AVX512VPOPCNTDQ, AVX5124FMAPS, AVX512VNNI, AVX5124VNNIW, SERIALIZE,
UINTR, HRESET, AMXTILE, AMXINT8, AMXBF16, KL, WIDEKL, AVXVNNI, AVX512FP16,
AVXIFMA or CLDEMOTE extended instruction sets. Each has a corresponding
@option{-mno-} option to disable use of these instructions.
AVXIFMA, AVXVNNIINT8 or CLDEMOTE extended instruction sets. Each has a
corresponding @option{-mno-} option to disable use of these instructions.
These extensions are also available as built-in functions: see
@ref{x86 Built-in Functions}, for details of the functions enabled and

View File

@ -2493,6 +2493,9 @@ Target supports the execution of @code{avx512vp2intersect} instructions.
@item avxifma
Target supports the execution of @code{avxifma} instructions.
@item avxvnniint8
Target supports the execution of @code{avxvnniint8} instructions.
@item amx_tile
Target supports the execution of @code{amx-tile} instructions.

View File

@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
/* { dg-options "-O -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,

View File

@ -1,5 +1,5 @@
/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
/* { dg-options "-O -fkeep-inline-functions -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
/* Test that {,x,e,p,t,s,w,a,b,i}mmintrin.h, mm3dnow.h, fma4intrin.h,
xopintrin.h, abmintrin.h, bmiintrin.h, tbmintrin.h, lwpintrin.h,

View File

@ -25,6 +25,9 @@ main ()
&& avx_os_support ()
#ifdef AVXIFMA
&& __builtin_cpu_supports ("avxifma")
#endif
#ifdef AVXVNNIINT8
&& __builtin_cpu_supports ("avxvnniint8")
#endif
)
{

View File

@ -0,0 +1,43 @@
/* { dg-do compile } */
/* { dg-options "-mavxvnniint8 -O2" } */
/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpdpbssd\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpdpbssds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpdpbsud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpdpbsuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpdpbuud\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+\[^\n\r]*%ymm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
/* { dg-final { scan-assembler-times "vpdpbuuds\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+\[^\n\r]*%xmm\[0-9\]+(?:\n|\[ \\t\]+#)" 1 } } */
#include <immintrin.h>
volatile __m256i x,y,z;
volatile __m128i x_,y_,z_;
volatile __mmask8 m;
void extern
avxvnniint8_test (void)
{
x = _mm256_dpbssd_epi32 (x, y, z);
x_ = _mm_dpbssd_epi32 (x_, y_, z_);
x = _mm256_dpbssds_epi32 (x, y, z);
x_ = _mm_dpbssds_epi32 (x_, y_, z_);
x = _mm256_dpbsud_epi32 (x, y, z);
x_ = _mm_dpbsud_epi32 (x_, y_, z_);
x = _mm256_dpbsuds_epi32 (x, y, z);
x_ = _mm_dpbsuds_epi32 (x_, y_, z_);
x = _mm256_dpbuud_epi32 (x, y, z);
x_ = _mm_dpbuud_epi32 (x_, y_, z_);
x = _mm256_dpbuuds_epi32 (x, y, z);
x_ = _mm_dpbuuds_epi32 (x_, y_, z_);
}

View File

@ -0,0 +1,72 @@
/* { dg-do run } */
/* { dg-options "-O2 -mavxvnniint8" } */
/* { dg-require-effective-target avxvnniint8 } */
#define AVXVNNIINT8
#ifndef CHECK
#define CHECK "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#include CHECK
static void
CALC (int *r, int *dst, char *s1, char *s2, int size)
{
short tempres[32];
for (int i = 0; i < size; i++) {
tempres[i] = (short) s1[i] * (short) s2[i];
}
for (int i = 0; i < size / 4; i++) {
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
r[i] = test;
}
}
void
TEST (void)
{
int i;
union256i_d res_256;
union256i_b src2_256;
union256i_b src1_256;
int res_ref_256[8];
for (i = 0; i < 32; i++)
{
int sign = i % 2 ? 1 : -1;
src1_256.a[i] = 10 + 3 * i + sign;
src2_256.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 8; i++)
res_256.a[i] = 0x7fffffff;
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
res_256.x = _mm256_dpbssd_epi32 (res_256.x, src1_256.x, src2_256.x);
if (check_union256i_d (res_256, res_ref_256))
abort ();
union128i_d res_128;
union128i_b src2_128;
union128i_b src1_128;
int res_ref_128[4];
for (i = 0; i < 16; i++)
{
int sign = i % 2 ? 1 : -1;
src1_128.a[i] = 10 + 3 * i * i + sign;
src2_128.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 4; i++)
res_128.a[i] = 0x7fffffff;
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
res_128.x = _mm_dpbssd_epi32 (res_128.x, src1_128.x, src2_128.x);
if (check_union128i_d (res_128, res_ref_128))
abort ();
}

View File

@ -0,0 +1,72 @@
/* { dg-do run } */
/* { dg-options "-O2 -mavxvnniint8" } */
/* { dg-require-effective-target avxvnniint8 } */
#define AVXVNNIINT8
#ifndef CHECK
#define CHECK "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#include CHECK
static void
CALC (int *r, int *dst, char *s1, char *s2, int size)
{
short tempres[32];
for (int i = 0; i < size; i++) {
tempres[i] = (short) s1[i] * (short) s2[i];
}
for (int i = 0; i < size / 4; i++) {
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
}
}
void
TEST (void)
{
int i;
union256i_d res_256;
union256i_b src2_256;
union256i_b src1_256;
int res_ref_256[8];
for (i = 0; i < 32; i++)
{
int sign = i % 2 ? 1 : -1;
src1_256.a[i] = 10 + 3 * i + sign;
src2_256.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 8; i++)
res_256.a[i] = 0x7fffffff;
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
res_256.x = _mm256_dpbssds_epi32 (res_256.x, src1_256.x, src2_256.x);
if (check_union256i_d (res_256, res_ref_256))
abort ();
union128i_d res_128;
union128i_b src2_128;
union128i_b src1_128;
int res_ref_128[4];
for (i = 0; i < 16; i++)
{
int sign = i % 2 ? 1 : -1;
src1_128.a[i] = 10 + 3 * i * i + sign;
src2_128.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 4; i++)
res_128.a[i] = 0x7fffffff;
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
res_128.x = _mm_dpbssds_epi32 (res_128.x, src1_128.x, src2_128.x);
if (check_union128i_d (res_128, res_ref_128))
abort ();
}

View File

@ -0,0 +1,72 @@
/* { dg-do run } */
/* { dg-options "-O2 -mavxvnniint8" } */
/* { dg-require-effective-target avxvnniint8 } */
#define AVXVNNIINT8
#ifndef CHECK
#define CHECK "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#include CHECK
static void
CALC (int *r, int *dst, char *s1, unsigned char *s2, int size)
{
short tempres[32];
for (int i = 0; i < size; i++) {
tempres[i] = (short) s1[i] * (unsigned short) s2[i];
}
for (int i = 0; i < size / 4; i++) {
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
r[i] = test;
}
}
void
TEST (void)
{
int i;
union256i_d res_256;
union256i_b src1_256;
union256i_ub src2_256;
int res_ref_256[8];
for (i = 0; i < 32; i++)
{
int sign = i % 2 ? 1 : -1;
src1_256.a[i] = 10 + 3 * i + sign;
src2_256.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 8; i++)
res_256.a[i] = 0x7fffffff;
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
res_256.x = _mm256_dpbsud_epi32 (res_256.x, src1_256.x, src2_256.x);
if (check_union256i_d (res_256, res_ref_256))
abort ();
union128i_d res_128;
union128i_b src1_128;
union128i_ub src2_128;
int res_ref_128[4];
for (i = 0; i < 16; i++)
{
int sign = i % 2 ? 1 : -1;
src1_128.a[i] = 10 + 3 * i * i + sign;
src2_128.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 4; i++)
res_128.a[i] = 0x7fffffff;
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
res_128.x = _mm_dpbsud_epi32 (res_128.x, src1_128.x, src2_128.x);
if (check_union128i_d (res_128, res_ref_128))
abort ();
}

View File

@ -0,0 +1,72 @@
/* { dg-do run } */
/* { dg-options "-O2 -mavxvnniint8" } */
/* { dg-require-effective-target avxvnniint8 } */
#define AVXVNNIINT8
#ifndef CHECK
#define CHECK "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#include CHECK
static void
CALC (int *r, int *dst, char *s1, unsigned char *s2, int size)
{
short tempres[32];
for (int i = 0; i < size; i++) {
tempres[i] = (short) s1[i] * (unsigned short) s2[i];
}
for (int i = 0; i < size / 4; i++) {
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
r[i] = test > 0x7FFFFFFF ? 0x7FFFFFFF : test;
}
}
void
TEST (void)
{
int i;
union256i_d res_256;
union256i_b src1_256;
union256i_ub src2_256;
int res_ref_256[8];
for (i = 0; i < 32; i++)
{
int sign = i % 2 ? 1 : -1;
src1_256.a[i] = 10 + 3 * i + sign;
src2_256.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 8; i++)
res_256.a[i] = 0x7fffffff;
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
res_256.x = _mm256_dpbsuds_epi32 (res_256.x, src1_256.x, src2_256.x);
if (check_union256i_d (res_256, res_ref_256))
abort ();
union128i_d res_128;
union128i_b src1_128;
union128i_ub src2_128;
int res_ref_128[4];
for (i = 0; i < 16; i++)
{
int sign = i % 2 ? 1 : -1;
src1_128.a[i] = 10 + 3 * i * i + sign;
src2_128.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 4; i++)
res_128.a[i] = 0x7fffffff;
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
res_128.x = _mm_dpbsuds_epi32 (res_128.x, src1_128.x, src2_128.x);
if (check_union128i_d (res_128, res_ref_128))
abort ();
}

View File

@ -0,0 +1,72 @@
/* { dg-do run } */
/* { dg-options "-O2 -mavxvnniint8" } */
/* { dg-require-effective-target avxvnniint8 } */
#define AVXVNNIINT8
#ifndef CHECK
#define CHECK "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#include CHECK
static void
CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned char *s2, int size)
{
unsigned short tempres[32];
for (int i = 0; i < size; i++) {
tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
}
for (int i = 0; i < size / 4; i++) {
unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
r[i] = test;
}
}
void
TEST (void)
{
int i;
union256i_ud res_256;
union256i_ub src2_256;
union256i_ub src1_256;
unsigned int res_ref_256[8];
for (i = 0; i < 32; i++)
{
int sign = i % 2 ? 1 : -1;
src1_256.a[i] = 10 + 3 * i + sign;
src2_256.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 8; i++)
res_256.a[i] = 0x7fffffff;
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
res_256.x = _mm256_dpbuud_epi32 (res_256.x, src1_256.x, src2_256.x);
if (check_union256i_ud (res_256, res_ref_256))
abort ();
union128i_ud res_128;
union128i_ub src2_128;
union128i_ub src1_128;
unsigned int res_ref_128[4];
for (i = 0; i < 16; i++)
{
int sign = i % 2 ? 1 : -1;
src1_128.a[i] = 10 + 3 * i * i + sign;
src2_128.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 4; i++)
res_128.a[i] = 0x7fffffff;
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
res_128.x = _mm_dpbuud_epi32 (res_128.x, src1_128.x, src2_128.x);
if (check_union128i_ud (res_128, res_ref_128))
abort ();
}

View File

@ -0,0 +1,72 @@
/* { dg-do run } */
/* { dg-options "-O2 -mavxvnniint8" } */
/* { dg-require-effective-target avxvnniint8 } */
#define AVXVNNIINT8
#ifndef CHECK
#define CHECK "avx-check.h"
#endif
#ifndef TEST
#define TEST avx_test
#endif
#include CHECK
static void
CALC (unsigned int *r, unsigned int *dst, unsigned char *s1, unsigned char *s2, int size)
{
unsigned short tempres[32];
for (int i = 0; i < size; i++) {
tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
}
for (int i = 0; i < size / 4; i++) {
unsigned int test = (unsigned int) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
r[i] = test > 0xFFFFFFFF ? 0xFFFFFFFF : test;
}
}
void
TEST (void)
{
int i;
union256i_ud res_256;
union256i_ub src2_256;
union256i_ub src1_256;
unsigned int res_ref_256[8];
for (i = 0; i < 32; i++)
{
int sign = i % 2 ? 1 : -1;
src1_256.a[i] = 10 + 3 * i + sign;
src2_256.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 8; i++)
res_256.a[i] = 0x7fffffff;
CALC (res_ref_256, res_256.a, src1_256.a, src2_256.a, 32);
res_256.x = _mm256_dpbuuds_epi32 (res_256.x, src1_256.x, src2_256.x);
if (check_union256i_ud (res_256, res_ref_256))
abort ();
union128i_ud res_128;
union128i_ub src2_128;
union128i_ub src1_128;
unsigned int res_ref_128[4];
for (i = 0; i < 16; i++)
{
int sign = i % 2 ? 1 : -1;
src1_128.a[i] = 10 + 3 * i * i + sign;
src2_128.a[i] = sign * 10 * i * i;
}
for (i = 0; i < 4; i++)
res_128.a[i] = 0x7fffffff;
CALC (res_ref_128, res_128.a, src1_128.a, src2_128.a, 16);
res_128.x = _mm_dpbuuds_epi32 (res_128.x, src1_128.x, src2_128.x);
if (check_union128i_ud (res_128, res_ref_128))
abort ();
}

View File

@ -81,6 +81,7 @@ extern void test_widekl (void) __attribute__((__target__("widekl")));
extern void test_avxvnni (void) __attribute__((__target__("avxvnni")));
extern void test_avx512fp16 (void) __attribute__((__target__("avx512fp16")));
extern void test_avxifma (void) __attribute__((__target__("avxifma")));
extern void test_avxvnniint8 (void) __attribute__((__target__("avxvnniint8")));
extern void test_no_sgx (void) __attribute__((__target__("no-sgx")));
extern void test_no_avx5124fmaps(void) __attribute__((__target__("no-avx5124fmaps")));
@ -163,6 +164,7 @@ extern void test_no_widekl (void) __attribute__((__target__("no-widekl")));
extern void test_no_avxvnni (void) __attribute__((__target__("no-avxvnni")));
extern void test_no_avx512fp16 (void) __attribute__((__target__("no-avx512fp16")));
extern void test_no_avxifma (void) __attribute__((__target__("no-avxifma")));
extern void test_no_avxvnniint8 (void) __attribute__((__target__("no-avxvnniint8")));
extern void test_arch_nocona (void) __attribute__((__target__("arch=nocona")));
extern void test_arch_core2 (void) __attribute__((__target__("arch=core2")));

View File

@ -3,7 +3,7 @@
popcntintrin.h gfniintrin.h and mm_malloc.h are usable
with -O -std=c89 -pedantic-errors. */
/* { dg-do compile } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma" } */
/* { dg-options "-O -std=c89 -pedantic-errors -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512bw -mavx512dq -mavx512vl -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavxifma -mavxvnniint8" } */
#include <x86intrin.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
/* { dg-options "-O2 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512vl -mavx512dq -mavx512bw -mavx512vbmi -mavx512vbmi2 -mavx512ifma -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mavx512vp2intersect -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mavx512bitalg -mpconfig -mwbnoinvd -mavx512bf16 -menqcmd -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>

View File

@ -1,5 +1,5 @@
/* { dg-do compile } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma" } */
/* { dg-options "-O0 -Werror-implicit-function-declaration -march=k8 -msse4a -m3dnow -mavx -mavx2 -mfma4 -mxop -maes -mpclmul -mpopcnt -mabm -mlzcnt -mbmi -mbmi2 -mtbm -mlwp -mfsgsbase -mrdrnd -mf16c -mfma -mrtm -mrdseed -mprfchw -madx -mfxsr -mxsaveopt -mavx512f -mavx512er -mavx512cd -mavx512pf -msha -mprefetchwt1 -mxsavec -mxsaves -mclflushopt -mavx512dq -mavx512bw -mavx512vl -mavx512ifma -mavx512vbmi -mavx512vbmi2 -mavx5124fmaps -mavx5124vnniw -mavx512vpopcntdq -mclwb -mmwaitx -mclzero -mpku -msgx -mrdpid -mgfni -mpconfig -mwbnoinvd -mavx512vl -mavx512bf16 -menqcmd -mavx512vp2intersect -mserialize -mtsxldtrk -mamx-tile -mamx-int8 -mamx-bf16 -mkl -mwidekl -mavxvnni -mavx512fp16 -mavxifma -mavxvnniint8" } */
/* { dg-add-options bind_pic_locally } */
#include <mm_malloc.h>

View File

@ -103,7 +103,7 @@
#ifndef DIFFERENT_PRAGMAS
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,avx512vl,avx512bw,avx512dq,avx512vbmi,avx512vbmi2,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
#endif
/* Following intrinsics require immediate arguments. They
@ -220,7 +220,7 @@ test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1)
/* immintrin.h (AVX/AVX2/RDRND/FSGSBASE/F16C/RTM/AVX512F/SHA) */
#ifdef DIFFERENT_PRAGMAS
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
#pragma GCC target ("avx,avx2,rdrnd,fsgsbase,f16c,rtm,avx512f,avx512er,avx512cd,avx512pf,sha,avx512vl,avx512bw,avx512dq,avx512ifma,avx512vbmi,avx512vbmi2,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,gfni,avx512bitalg,avx512bf16,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
#endif
#include <immintrin.h>
test_1 (_cvtss_sh, unsigned short, float, 1)

View File

@ -843,6 +843,6 @@
#define __builtin_ia32_vpclmulqdq_v2di(A, B, C) __builtin_ia32_vpclmulqdq_v2di(A, B, 1)
#define __builtin_ia32_vpclmulqdq_v8di(A, B, C) __builtin_ia32_vpclmulqdq_v8di(A, B, 1)
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma")
#pragma GCC target ("sse4a,3dnow,avx,avx2,fma4,xop,aes,pclmul,popcnt,abm,lzcnt,bmi,bmi2,tbm,lwp,fsgsbase,rdrnd,f16c,fma,rtm,rdseed,prfchw,adx,fxsr,xsaveopt,avx512f,avx512er,avx512cd,avx512pf,sha,prefetchwt1,xsavec,xsaves,clflushopt,avx512bw,avx512dq,avx512vl,avx512vbmi,avx512ifma,avx5124fmaps,avx5124vnniw,avx512vpopcntdq,clwb,mwaitx,clzero,pku,sgx,rdpid,gfni,avx512vbmi2,vpclmulqdq,avx512bitalg,pconfig,wbnoinvd,avx512bf16,enqcmd,avx512vp2intersect,serialize,tsxldtrk,amx-tile,amx-int8,amx-bf16,kl,widekl,avxvnni,avx512fp16,avxifma,avxvnniint8")
#include <x86intrin.h>

View File

@ -9534,6 +9534,18 @@ proc check_effective_target_avxifma { } {
} "-O0 -mavxifma" ]
}
# Return 1 if avxvnniint8 instructions can be compiled.
proc check_effective_target_avxvnniint8 { } {
return [check_no_compiler_messages avxvnniint8 object {
typedef int __v8si __attribute__ ((__vector_size__ (32)));
__v8si
_mm256_dpbssd_epi32 (__v8si __A, __v8si __B, __v8si __C)
{
return __builtin_ia32_vpdpbssd256 (__A, __B, __C);
}
} "-O0 -mavxvnniint8" ]
}
# Return 1 if sse instructions can be compiled.
proc check_effective_target_sse { } {
return [check_no_compiler_messages sse object {