mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 05:10:33 +08:00
config.gcc (i[34567]86-*-*): Add smmintrin.h to extra_headers.
2007-05-22 H.J. Lu <hongjiu.lu@intel.com> Richard Henderson <rth@redhat.com> * config.gcc (i[34567]86-*-*): Add smmintrin.h to extra_headers. (x86_64-*-*): Likewise. * i386/i386-modes.def (V2QI): New. * config/i386/i386.c (ix86_handle_option): Handle SSE4.1 and SSE4A. (override_options): Support SSE4.1. (IX86_BUILTIN_BLENDPD): New for SSE4.1. (IX86_BUILTIN_BLENDPS): Likewise. (IX86_BUILTIN_BLENDVPD): Likewise. (IX86_BUILTIN_BLENDVPS): Likewise. (IX86_BUILTIN_PBLENDVB128): Likewise. (IX86_BUILTIN_PBLENDW128): Likewise. (IX86_BUILTIN_DPPD): Likewise. (IX86_BUILTIN_DPPS): Likewise. (IX86_BUILTIN_INSERTPS128): Likewise. (IX86_BUILTIN_MOVNTDQA): Likewise. (IX86_BUILTIN_MPSADBW128): Likewise. (IX86_BUILTIN_PACKUSDW128): Likewise. (IX86_BUILTIN_PCMPEQQ): Likewise. (IX86_BUILTIN_PHMINPOSUW128): Likewise. (IX86_BUILTIN_PMAXSB128): Likewise. (IX86_BUILTIN_PMAXSD128): Likewise. (IX86_BUILTIN_PMAXUD128): Likewise. (IX86_BUILTIN_PMAXUW128): Likewise. (IX86_BUILTIN_PMINSB128): Likewise. (IX86_BUILTIN_PMINSD128): Likewise. (IX86_BUILTIN_PMINUD128): Likewise. (IX86_BUILTIN_PMINUW128): Likewise. (IX86_BUILTIN_PMOVSXBW128): Likewise. (IX86_BUILTIN_PMOVSXBD128): Likewise. (IX86_BUILTIN_PMOVSXBQ128): Likewise. (IX86_BUILTIN_PMOVSXWD128): Likewise. (IX86_BUILTIN_PMOVSXWQ128): Likewise. (IX86_BUILTIN_PMOVSXDQ128): Likewise. (IX86_BUILTIN_PMOVZXBW128): Likewise. (IX86_BUILTIN_PMOVZXBD128): Likewise. (IX86_BUILTIN_PMOVZXBQ128): Likewise. (IX86_BUILTIN_PMOVZXWD128): Likewise. (IX86_BUILTIN_PMOVZXWQ128): Likewise. (IX86_BUILTIN_PMOVZXDQ128): Likewise. (IX86_BUILTIN_PMULDQ128): Likewise. (IX86_BUILTIN_PMULLD128): Likewise. (IX86_BUILTIN_ROUNDPD): Likewise. (IX86_BUILTIN_ROUNDPS): Likewise. (IX86_BUILTIN_ROUNDSD): Likewise. (IX86_BUILTIN_ROUNDSS): Likewise. (IX86_BUILTIN_PTESTZ): Likewise. (IX86_BUILTIN_PTESTC): Likewise. (IX86_BUILTIN_PTESTNZC): Likewise. (IX86_BUILTIN_VEC_EXT_V16QI): Likewise. (IX86_BUILTIN_VEC_SET_V2DI): Likewise. (IX86_BUILTIN_VEC_SET_V4SF): Likewise. (IX86_BUILTIN_VEC_SET_V4SI): Likewise. (IX86_BUILTIN_VEC_SET_V16QI): Likewise. (bdesc_ptest): New. (bdesc_sse_3arg): Likewise. (bdesc_2arg): Likewise. (bdesc_1arg): Likewise. (ix86_init_mmx_sse_builtins): Support SSE4.1. Handle SSE builtins with 3 args. (ix86_expand_sse_4_operands_builtin): New. (ix86_expand_unop_builtin): Support 2 arg builtins with a constant smaller than 8 bits as the 2nd arg. (ix86_expand_sse_ptest): New. (ix86_expand_builtin): Support SSE4.1. Support 3 arg SSE builtins. (ix86_expand_vector_set): Support SSE4.1. (ix86_expand_vector_extract): Likewise. * config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Define __SSE4_1__ for -msse4.1. * config/i386/i386.md (UNSPEC_BLENDV): New for SSE4.1. (UNSPEC_INSERTPS): Likewise. (UNSPEC_DP): Likewise. (UNSPEC_MOVNTDQA): Likewise. (UNSPEC_MPSADBW): Likewise. (UNSPEC_PHMINPOSUW): Likewise. (UNSPEC_PTEST): Likewise. (UNSPEC_ROUNDP): Likewise. (UNSPEC_ROUNDS): Likewise. * config/i386/i386.opt (msse4.1): New for SSE4.1. * config/i386/predicates.md (const_pow2_1_to_2_operand): New. (const_pow2_1_to_32768_operand): Likewise. * config/i386/smmintrin.h: New. The SSE4.1 intrinsic header file. * config/i386/sse.md (*vec_setv4sf_sse4_1): New pattern for SSE4.1. (sse4_1_insertps): Likewise. (*sse4_1_extractps): Likewise. (sse4_1_ptest): Likewise. (sse4_1_mulv2siv2di3): Likewise. (*sse4_1_mulv4si3): Likewise. (*sse4_1_smax<mode>3): Likewise. (*sse4_1_umax<mode>3): Likewise. (*sse4_1_smin<mode>3): Likewise. (*sse4_1_umin<mode>3): Likewise. (sse4_1_eqv2di3): Likewise. (*sse4_1_pinsrb): Likewise. (*sse4_1_pinsrd): Likewise. (*sse4_1_pinsrq): Likewise. (*sse4_1_pextrb): Likewise. (*sse4_1_pextrb_memory): Likewise. (*sse4_1_pextrw_memory): Likewise. (*sse4_1_pextrq): Likewise. (sse4_1_blendpd): Likewise. (sse4_1_blendps): Likewise. (sse4_1_blendvpd): Likewise. (sse4_1_blendvps): Likewise. (sse4_1_dppd): Likewise. (sse4_1_dpps): Likewise. (sse4_1_movntdqa): Likewise. (sse4_1_mpsadbw): Likewise. (sse4_1_packusdw): Likewise. (sse4_1_pblendvb): Likewise. (sse4_1_pblendw): Likewise. (sse4_1_phminposuw): Likewise. (sse4_1_extendv8qiv8hi2): Likewise. (*sse4_1_extendv8qiv8hi2): Likewise. (sse4_1_extendv4qiv4si2): Likewise. (*sse4_1_extendv4qiv4si2): Likewise. (sse4_1_extendv2qiv2di2): Likewise. (*sse4_1_extendv2qiv2di2): Likewise. (sse4_1_extendv4hiv4si2): Likewise. (*sse4_1_extendv4hiv4si2): Likewise. (sse4_1_extendv2hiv2di2): Likewise. (*sse4_1_extendv2hiv2di2): Likewise. (sse4_1_extendv2siv2di2): Likewise. (*sse4_1_extendv2siv2di2): Likewise. (sse4_1_zero_extendv8qiv8hi2): Likewise. (*sse4_1_zero_extendv8qiv8hi2): Likewise. (sse4_1_zero_extendv4qiv4si2): Likewise. (*sse4_1_zero_extendv4qiv4si2): Likewise. (sse4_1_zero_extendv2qiv2di2): Likewise. (*sse4_1_zero_extendv2qiv2di2): Likewise. (sse4_1_zero_extendv4hiv4si2): Likewise. (*sse4_1_zero_extendv4hiv4si2): Likewise. (sse4_1_zero_extendv2hiv2di2): Likewise. (*sse4_1_zero_extendv2hiv2di2): Likewise. (sse4_1_zero_extendv2siv2di2): Likewise. (*sse4_1_zero_extendv2siv2di2): Likewise. (sse4_1_roundpd): Likewise. (sse4_1_roundps): Likewise. (sse4_1_roundsd): Likewise. (sse4_1_roundss): Likewise. (mulv4si3): Don't expand for SSE4.1. (smax<mode>3): Likewise. (umaxv4si3): Likewise. (uminv16qi3): Likewise. (umin<mode>3): Likewise. (umaxv8hi3): Rewrite. Only enabled for SSE4.1. * doc/extend.texi: Document SSE4.1 built-in functions. * doc/invoke.texi: Document -msse4.1. Co-Authored-By: Richard Henderson <rth@redhat.com> From-SVN: r124945
This commit is contained in:
parent
c099916d62
commit
9a5cee0228
165
gcc/ChangeLog
165
gcc/ChangeLog
@ -1,3 +1,168 @@
|
||||
2007-05-22 H.J. Lu <hongjiu.lu@intel.com>
|
||||
Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config.gcc (i[34567]86-*-*): Add smmintrin.h to
|
||||
extra_headers.
|
||||
(x86_64-*-*): Likewise.
|
||||
|
||||
* i386/i386-modes.def (V2QI): New.
|
||||
|
||||
* config/i386/i386.c (ix86_handle_option): Handle SSE4.1 and
|
||||
SSE4A.
|
||||
(override_options): Support SSE4.1.
|
||||
(IX86_BUILTIN_BLENDPD): New for SSE4.1.
|
||||
(IX86_BUILTIN_BLENDPS): Likewise.
|
||||
(IX86_BUILTIN_BLENDVPD): Likewise.
|
||||
(IX86_BUILTIN_BLENDVPS): Likewise.
|
||||
(IX86_BUILTIN_PBLENDVB128): Likewise.
|
||||
(IX86_BUILTIN_PBLENDW128): Likewise.
|
||||
(IX86_BUILTIN_DPPD): Likewise.
|
||||
(IX86_BUILTIN_DPPS): Likewise.
|
||||
(IX86_BUILTIN_INSERTPS128): Likewise.
|
||||
(IX86_BUILTIN_MOVNTDQA): Likewise.
|
||||
(IX86_BUILTIN_MPSADBW128): Likewise.
|
||||
(IX86_BUILTIN_PACKUSDW128): Likewise.
|
||||
(IX86_BUILTIN_PCMPEQQ): Likewise.
|
||||
(IX86_BUILTIN_PHMINPOSUW128): Likewise.
|
||||
(IX86_BUILTIN_PMAXSB128): Likewise.
|
||||
(IX86_BUILTIN_PMAXSD128): Likewise.
|
||||
(IX86_BUILTIN_PMAXUD128): Likewise.
|
||||
(IX86_BUILTIN_PMAXUW128): Likewise.
|
||||
(IX86_BUILTIN_PMINSB128): Likewise.
|
||||
(IX86_BUILTIN_PMINSD128): Likewise.
|
||||
(IX86_BUILTIN_PMINUD128): Likewise.
|
||||
(IX86_BUILTIN_PMINUW128): Likewise.
|
||||
(IX86_BUILTIN_PMOVSXBW128): Likewise.
|
||||
(IX86_BUILTIN_PMOVSXBD128): Likewise.
|
||||
(IX86_BUILTIN_PMOVSXBQ128): Likewise.
|
||||
(IX86_BUILTIN_PMOVSXWD128): Likewise.
|
||||
(IX86_BUILTIN_PMOVSXWQ128): Likewise.
|
||||
(IX86_BUILTIN_PMOVSXDQ128): Likewise.
|
||||
(IX86_BUILTIN_PMOVZXBW128): Likewise.
|
||||
(IX86_BUILTIN_PMOVZXBD128): Likewise.
|
||||
(IX86_BUILTIN_PMOVZXBQ128): Likewise.
|
||||
(IX86_BUILTIN_PMOVZXWD128): Likewise.
|
||||
(IX86_BUILTIN_PMOVZXWQ128): Likewise.
|
||||
(IX86_BUILTIN_PMOVZXDQ128): Likewise.
|
||||
(IX86_BUILTIN_PMULDQ128): Likewise.
|
||||
(IX86_BUILTIN_PMULLD128): Likewise.
|
||||
(IX86_BUILTIN_ROUNDPD): Likewise.
|
||||
(IX86_BUILTIN_ROUNDPS): Likewise.
|
||||
(IX86_BUILTIN_ROUNDSD): Likewise.
|
||||
(IX86_BUILTIN_ROUNDSS): Likewise.
|
||||
(IX86_BUILTIN_PTESTZ): Likewise.
|
||||
(IX86_BUILTIN_PTESTC): Likewise.
|
||||
(IX86_BUILTIN_PTESTNZC): Likewise.
|
||||
(IX86_BUILTIN_VEC_EXT_V16QI): Likewise.
|
||||
(IX86_BUILTIN_VEC_SET_V2DI): Likewise.
|
||||
(IX86_BUILTIN_VEC_SET_V4SF): Likewise.
|
||||
(IX86_BUILTIN_VEC_SET_V4SI): Likewise.
|
||||
(IX86_BUILTIN_VEC_SET_V16QI): Likewise.
|
||||
(bdesc_ptest): New.
|
||||
(bdesc_sse_3arg): Likewise.
|
||||
(bdesc_2arg): Likewise.
|
||||
(bdesc_1arg): Likewise.
|
||||
(ix86_init_mmx_sse_builtins): Support SSE4.1. Handle SSE builtins
|
||||
with 3 args.
|
||||
(ix86_expand_sse_4_operands_builtin): New.
|
||||
(ix86_expand_unop_builtin): Support 2 arg builtins with a constant
|
||||
smaller than 8 bits as the 2nd arg.
|
||||
(ix86_expand_sse_ptest): New.
|
||||
(ix86_expand_builtin): Support SSE4.1. Support 3 arg SSE builtins.
|
||||
(ix86_expand_vector_set): Support SSE4.1.
|
||||
(ix86_expand_vector_extract): Likewise.
|
||||
|
||||
* config/i386/i386.h (TARGET_CPU_CPP_BUILTINS): Define
|
||||
__SSE4_1__ for -msse4.1.
|
||||
|
||||
* config/i386/i386.md (UNSPEC_BLENDV): New for SSE4.1.
|
||||
(UNSPEC_INSERTPS): Likewise.
|
||||
(UNSPEC_DP): Likewise.
|
||||
(UNSPEC_MOVNTDQA): Likewise.
|
||||
(UNSPEC_MPSADBW): Likewise.
|
||||
(UNSPEC_PHMINPOSUW): Likewise.
|
||||
(UNSPEC_PTEST): Likewise.
|
||||
(UNSPEC_ROUNDP): Likewise.
|
||||
(UNSPEC_ROUNDS): Likewise.
|
||||
|
||||
* config/i386/i386.opt (msse4.1): New for SSE4.1.
|
||||
|
||||
* config/i386/predicates.md (const_pow2_1_to_2_operand): New.
|
||||
(const_pow2_1_to_32768_operand): Likewise.
|
||||
|
||||
* config/i386/smmintrin.h: New. The SSE4.1 intrinsic header
|
||||
file.
|
||||
|
||||
* config/i386/sse.md (*vec_setv4sf_sse4_1): New pattern for
|
||||
SSE4.1.
|
||||
(sse4_1_insertps): Likewise.
|
||||
(*sse4_1_extractps): Likewise.
|
||||
(sse4_1_ptest): Likewise.
|
||||
(sse4_1_mulv2siv2di3): Likewise.
|
||||
(*sse4_1_mulv4si3): Likewise.
|
||||
(*sse4_1_smax<mode>3): Likewise.
|
||||
(*sse4_1_umax<mode>3): Likewise.
|
||||
(*sse4_1_smin<mode>3): Likewise.
|
||||
(*sse4_1_umin<mode>3): Likewise.
|
||||
(sse4_1_eqv2di3): Likewise.
|
||||
(*sse4_1_pinsrb): Likewise.
|
||||
(*sse4_1_pinsrd): Likewise.
|
||||
(*sse4_1_pinsrq): Likewise.
|
||||
(*sse4_1_pextrb): Likewise.
|
||||
(*sse4_1_pextrb_memory): Likewise.
|
||||
(*sse4_1_pextrw_memory): Likewise.
|
||||
(*sse4_1_pextrq): Likewise.
|
||||
(sse4_1_blendpd): Likewise.
|
||||
(sse4_1_blendps): Likewise.
|
||||
(sse4_1_blendvpd): Likewise.
|
||||
(sse4_1_blendvps): Likewise.
|
||||
(sse4_1_dppd): Likewise.
|
||||
(sse4_1_dpps): Likewise.
|
||||
(sse4_1_movntdqa): Likewise.
|
||||
(sse4_1_mpsadbw): Likewise.
|
||||
(sse4_1_packusdw): Likewise.
|
||||
(sse4_1_pblendvb): Likewise.
|
||||
(sse4_1_pblendw): Likewise.
|
||||
(sse4_1_phminposuw): Likewise.
|
||||
(sse4_1_extendv8qiv8hi2): Likewise.
|
||||
(*sse4_1_extendv8qiv8hi2): Likewise.
|
||||
(sse4_1_extendv4qiv4si2): Likewise.
|
||||
(*sse4_1_extendv4qiv4si2): Likewise.
|
||||
(sse4_1_extendv2qiv2di2): Likewise.
|
||||
(*sse4_1_extendv2qiv2di2): Likewise.
|
||||
(sse4_1_extendv4hiv4si2): Likewise.
|
||||
(*sse4_1_extendv4hiv4si2): Likewise.
|
||||
(sse4_1_extendv2hiv2di2): Likewise.
|
||||
(*sse4_1_extendv2hiv2di2): Likewise.
|
||||
(sse4_1_extendv2siv2di2): Likewise.
|
||||
(*sse4_1_extendv2siv2di2): Likewise.
|
||||
(sse4_1_zero_extendv8qiv8hi2): Likewise.
|
||||
(*sse4_1_zero_extendv8qiv8hi2): Likewise.
|
||||
(sse4_1_zero_extendv4qiv4si2): Likewise.
|
||||
(*sse4_1_zero_extendv4qiv4si2): Likewise.
|
||||
(sse4_1_zero_extendv2qiv2di2): Likewise.
|
||||
(*sse4_1_zero_extendv2qiv2di2): Likewise.
|
||||
(sse4_1_zero_extendv4hiv4si2): Likewise.
|
||||
(*sse4_1_zero_extendv4hiv4si2): Likewise.
|
||||
(sse4_1_zero_extendv2hiv2di2): Likewise.
|
||||
(*sse4_1_zero_extendv2hiv2di2): Likewise.
|
||||
(sse4_1_zero_extendv2siv2di2): Likewise.
|
||||
(*sse4_1_zero_extendv2siv2di2): Likewise.
|
||||
(sse4_1_roundpd): Likewise.
|
||||
(sse4_1_roundps): Likewise.
|
||||
(sse4_1_roundsd): Likewise.
|
||||
(sse4_1_roundss): Likewise.
|
||||
(mulv4si3): Don't expand for SSE4.1.
|
||||
(smax<mode>3): Likewise.
|
||||
(umaxv4si3): Likewise.
|
||||
(uminv16qi3): Likewise.
|
||||
(umin<mode>3): Likewise.
|
||||
(umaxv8hi3): Rewrite. Only enabled for SSE4.1.
|
||||
|
||||
* doc/extend.texi: Document SSE4.1 built-in functions.
|
||||
|
||||
* doc/invoke.texi: Document -msse4.1.
|
||||
|
||||
2007-05-22 Nathan Sidwell <nathan@codesourcery.com>
|
||||
|
||||
* config/m68k/linux.h (ASM_SPEC): Add asm_pcrel_spec.
|
||||
|
@ -276,12 +276,12 @@ xscale-*-*)
|
||||
i[34567]86-*-*)
|
||||
cpu_type=i386
|
||||
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
|
||||
pmmintrin.h tmmintrin.h ammintrin.h"
|
||||
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h"
|
||||
;;
|
||||
x86_64-*-*)
|
||||
cpu_type=i386
|
||||
extra_headers="mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
|
||||
pmmintrin.h tmmintrin.h ammintrin.h"
|
||||
pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h"
|
||||
need_64bit_hwint=yes
|
||||
;;
|
||||
ia64-*-*)
|
||||
|
@ -68,6 +68,7 @@ VECTOR_MODES (INT, 8); /* V8QI V4HI V2SI */
|
||||
VECTOR_MODES (INT, 16); /* V16QI V8HI V4SI V2DI */
|
||||
VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */
|
||||
VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */
|
||||
VECTOR_MODE (INT, QI, 2); /* V2QI */
|
||||
VECTOR_MODE (INT, DI, 4); /* V4DI */
|
||||
VECTOR_MODE (INT, SI, 8); /* V8SI */
|
||||
VECTOR_MODE (INT, HI, 16); /* V16HI */
|
||||
|
@ -1594,6 +1594,14 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
||||
return true;
|
||||
|
||||
case OPT_mssse3:
|
||||
if (!value)
|
||||
{
|
||||
target_flags &= ~(MASK_SSE4_1 | MASK_SSE4A);
|
||||
target_flags_explicit |= MASK_SSE4_1 | MASK_SSE4A;
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_msse4_1:
|
||||
if (!value)
|
||||
{
|
||||
target_flags &= ~MASK_SSE4A;
|
||||
@ -1601,6 +1609,14 @@ ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
|
||||
}
|
||||
return true;
|
||||
|
||||
case OPT_msse4a:
|
||||
if (!value)
|
||||
{
|
||||
target_flags &= ~MASK_SSE4_1;
|
||||
target_flags_explicit |= MASK_SSE4_1;
|
||||
}
|
||||
return true;
|
||||
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
@ -1674,7 +1690,8 @@ override_options (void)
|
||||
PTA_POPCNT = 1 << 10,
|
||||
PTA_ABM = 1 << 11,
|
||||
PTA_SSE4A = 1 << 12,
|
||||
PTA_NO_SAHF = 1 << 13
|
||||
PTA_NO_SAHF = 1 << 13,
|
||||
PTA_SSE4_1 = 1 << 14
|
||||
} flags;
|
||||
}
|
||||
const processor_alias_table[] =
|
||||
@ -1936,6 +1953,9 @@ override_options (void)
|
||||
if (processor_alias_table[i].flags & PTA_SSSE3
|
||||
&& !(target_flags_explicit & MASK_SSSE3))
|
||||
target_flags |= MASK_SSSE3;
|
||||
if (processor_alias_table[i].flags & PTA_SSE4_1
|
||||
&& !(target_flags_explicit & MASK_SSE4_1))
|
||||
target_flags |= MASK_SSE4_1;
|
||||
if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
|
||||
x86_prefetch_sse = true;
|
||||
if (processor_alias_table[i].flags & PTA_CX16)
|
||||
@ -2141,6 +2161,10 @@ override_options (void)
|
||||
if (!TARGET_80387)
|
||||
target_flags |= MASK_NO_FANCY_MATH_387;
|
||||
|
||||
/* Turn on SSSE3 builtins for -msse4.1. */
|
||||
if (TARGET_SSE4_1)
|
||||
target_flags |= MASK_SSSE3;
|
||||
|
||||
/* Turn on SSE3 builtins for -mssse3. */
|
||||
if (TARGET_SSSE3)
|
||||
target_flags |= MASK_SSE3;
|
||||
@ -16412,6 +16436,61 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_INSERTQI,
|
||||
IX86_BUILTIN_INSERTQ,
|
||||
|
||||
/* SSE4.1. */
|
||||
IX86_BUILTIN_BLENDPD,
|
||||
IX86_BUILTIN_BLENDPS,
|
||||
IX86_BUILTIN_BLENDVPD,
|
||||
IX86_BUILTIN_BLENDVPS,
|
||||
IX86_BUILTIN_PBLENDVB128,
|
||||
IX86_BUILTIN_PBLENDW128,
|
||||
|
||||
IX86_BUILTIN_DPPD,
|
||||
IX86_BUILTIN_DPPS,
|
||||
|
||||
IX86_BUILTIN_INSERTPS128,
|
||||
|
||||
IX86_BUILTIN_MOVNTDQA,
|
||||
IX86_BUILTIN_MPSADBW128,
|
||||
IX86_BUILTIN_PACKUSDW128,
|
||||
IX86_BUILTIN_PCMPEQQ,
|
||||
IX86_BUILTIN_PHMINPOSUW128,
|
||||
|
||||
IX86_BUILTIN_PMAXSB128,
|
||||
IX86_BUILTIN_PMAXSD128,
|
||||
IX86_BUILTIN_PMAXUD128,
|
||||
IX86_BUILTIN_PMAXUW128,
|
||||
|
||||
IX86_BUILTIN_PMINSB128,
|
||||
IX86_BUILTIN_PMINSD128,
|
||||
IX86_BUILTIN_PMINUD128,
|
||||
IX86_BUILTIN_PMINUW128,
|
||||
|
||||
IX86_BUILTIN_PMOVSXBW128,
|
||||
IX86_BUILTIN_PMOVSXBD128,
|
||||
IX86_BUILTIN_PMOVSXBQ128,
|
||||
IX86_BUILTIN_PMOVSXWD128,
|
||||
IX86_BUILTIN_PMOVSXWQ128,
|
||||
IX86_BUILTIN_PMOVSXDQ128,
|
||||
|
||||
IX86_BUILTIN_PMOVZXBW128,
|
||||
IX86_BUILTIN_PMOVZXBD128,
|
||||
IX86_BUILTIN_PMOVZXBQ128,
|
||||
IX86_BUILTIN_PMOVZXWD128,
|
||||
IX86_BUILTIN_PMOVZXWQ128,
|
||||
IX86_BUILTIN_PMOVZXDQ128,
|
||||
|
||||
IX86_BUILTIN_PMULDQ128,
|
||||
IX86_BUILTIN_PMULLD128,
|
||||
|
||||
IX86_BUILTIN_ROUNDPD,
|
||||
IX86_BUILTIN_ROUNDPS,
|
||||
IX86_BUILTIN_ROUNDSD,
|
||||
IX86_BUILTIN_ROUNDSS,
|
||||
|
||||
IX86_BUILTIN_PTESTZ,
|
||||
IX86_BUILTIN_PTESTC,
|
||||
IX86_BUILTIN_PTESTNZC,
|
||||
|
||||
IX86_BUILTIN_VEC_INIT_V2SI,
|
||||
IX86_BUILTIN_VEC_INIT_V4HI,
|
||||
IX86_BUILTIN_VEC_INIT_V8QI,
|
||||
@ -16422,8 +16501,13 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_VEC_EXT_V8HI,
|
||||
IX86_BUILTIN_VEC_EXT_V2SI,
|
||||
IX86_BUILTIN_VEC_EXT_V4HI,
|
||||
IX86_BUILTIN_VEC_EXT_V16QI,
|
||||
IX86_BUILTIN_VEC_SET_V2DI,
|
||||
IX86_BUILTIN_VEC_SET_V4SF,
|
||||
IX86_BUILTIN_VEC_SET_V4SI,
|
||||
IX86_BUILTIN_VEC_SET_V8HI,
|
||||
IX86_BUILTIN_VEC_SET_V4HI,
|
||||
IX86_BUILTIN_VEC_SET_V16QI,
|
||||
|
||||
IX86_BUILTIN_MAX
|
||||
};
|
||||
@ -16508,6 +16592,33 @@ static const struct builtin_description bdesc_comi[] =
|
||||
{ MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
|
||||
};
|
||||
|
||||
static const struct builtin_description bdesc_ptest[] =
|
||||
{
|
||||
/* SSE4.1 */
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
|
||||
};
|
||||
|
||||
/* SSE builtins with 3 arguments and the last argument must be a 8 bit
|
||||
constant or xmm0. */
|
||||
static const struct builtin_description bdesc_sse_3arg[] =
|
||||
{
|
||||
/* SSE4.1 */
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, 0, 0 },
|
||||
};
|
||||
|
||||
static const struct builtin_description bdesc_2arg[] =
|
||||
{
|
||||
/* SSE */
|
||||
@ -16806,7 +16917,21 @@ static const struct builtin_description bdesc_2arg[] =
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
|
||||
{ MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 },
|
||||
|
||||
/* SSE4.1 */
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, 0, 0 },
|
||||
};
|
||||
|
||||
static const struct builtin_description bdesc_1arg[] =
|
||||
@ -16861,6 +16986,26 @@ static const struct builtin_description bdesc_1arg[] =
|
||||
{ MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
|
||||
{ MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
|
||||
|
||||
/* SSE4.1 */
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, 0, 0 },
|
||||
|
||||
/* Fake 1 arg builtins with a constant smaller than 8 bits as the
|
||||
2nd arg. */
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, 0, 0 },
|
||||
{ MASK_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, 0, 0 },
|
||||
};
|
||||
|
||||
/* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
|
||||
@ -17167,6 +17312,55 @@ ix86_init_mmx_sse_builtins (void)
|
||||
tree v2di_ftype_v2di_v16qi
|
||||
= build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
|
||||
NULL_TREE);
|
||||
tree v2df_ftype_v2df_v2df_v2df
|
||||
= build_function_type_list (V2DF_type_node,
|
||||
V2DF_type_node, V2DF_type_node,
|
||||
V2DF_type_node, NULL_TREE);
|
||||
tree v4sf_ftype_v4sf_v4sf_v4sf
|
||||
= build_function_type_list (V4SF_type_node,
|
||||
V4SF_type_node, V4SF_type_node,
|
||||
V4SF_type_node, NULL_TREE);
|
||||
tree v8hi_ftype_v16qi
|
||||
= build_function_type_list (V8HI_type_node, V16QI_type_node,
|
||||
NULL_TREE);
|
||||
tree v4si_ftype_v16qi
|
||||
= build_function_type_list (V4SI_type_node, V16QI_type_node,
|
||||
NULL_TREE);
|
||||
tree v2di_ftype_v16qi
|
||||
= build_function_type_list (V2DI_type_node, V16QI_type_node,
|
||||
NULL_TREE);
|
||||
tree v4si_ftype_v8hi
|
||||
= build_function_type_list (V4SI_type_node, V8HI_type_node,
|
||||
NULL_TREE);
|
||||
tree v2di_ftype_v8hi
|
||||
= build_function_type_list (V2DI_type_node, V8HI_type_node,
|
||||
NULL_TREE);
|
||||
tree v2di_ftype_v4si
|
||||
= build_function_type_list (V2DI_type_node, V4SI_type_node,
|
||||
NULL_TREE);
|
||||
tree v2di_ftype_pv2di
|
||||
= build_function_type_list (V2DI_type_node, pv2di_type_node,
|
||||
NULL_TREE);
|
||||
tree v16qi_ftype_v16qi_v16qi_int
|
||||
= build_function_type_list (V16QI_type_node, V16QI_type_node,
|
||||
V16QI_type_node, integer_type_node,
|
||||
NULL_TREE);
|
||||
tree v16qi_ftype_v16qi_v16qi_v16qi
|
||||
= build_function_type_list (V16QI_type_node, V16QI_type_node,
|
||||
V16QI_type_node, V16QI_type_node,
|
||||
NULL_TREE);
|
||||
tree v8hi_ftype_v8hi_v8hi_int
|
||||
= build_function_type_list (V8HI_type_node, V8HI_type_node,
|
||||
V8HI_type_node, integer_type_node,
|
||||
NULL_TREE);
|
||||
tree v4si_ftype_v4si_v4si_int
|
||||
= build_function_type_list (V4SI_type_node, V4SI_type_node,
|
||||
V4SI_type_node, integer_type_node,
|
||||
NULL_TREE);
|
||||
tree int_ftype_v2di_v2di
|
||||
= build_function_type_list (integer_type_node,
|
||||
V2DI_type_node, V2DI_type_node,
|
||||
NULL_TREE);
|
||||
|
||||
tree float80_type;
|
||||
tree float128_type;
|
||||
@ -17193,6 +17387,64 @@ ix86_init_mmx_sse_builtins (void)
|
||||
(*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
|
||||
}
|
||||
|
||||
/* Add all SSE builtins that are more or less simple operations on
|
||||
three operands. */
|
||||
for (i = 0, d = bdesc_sse_3arg;
|
||||
i < ARRAY_SIZE (bdesc_sse_3arg);
|
||||
i++, d++)
|
||||
{
|
||||
/* Use one of the operands; the target can have a different mode for
|
||||
mask-generating compares. */
|
||||
enum machine_mode mode;
|
||||
tree type;
|
||||
|
||||
if (d->name == 0)
|
||||
continue;
|
||||
mode = insn_data[d->icode].operand[1].mode;
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
case V16QImode:
|
||||
type = v16qi_ftype_v16qi_v16qi_int;
|
||||
break;
|
||||
case V8HImode:
|
||||
type = v8hi_ftype_v8hi_v8hi_int;
|
||||
break;
|
||||
case V4SImode:
|
||||
type = v4si_ftype_v4si_v4si_int;
|
||||
break;
|
||||
case V2DImode:
|
||||
type = v2di_ftype_v2di_v2di_int;
|
||||
break;
|
||||
case V2DFmode:
|
||||
type = v2df_ftype_v2df_v2df_int;
|
||||
break;
|
||||
case V4SFmode:
|
||||
type = v4sf_ftype_v4sf_v4sf_int;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Override for variable blends. */
|
||||
switch (d->icode)
|
||||
{
|
||||
case CODE_FOR_sse4_1_blendvpd:
|
||||
type = v2df_ftype_v2df_v2df_v2df;
|
||||
break;
|
||||
case CODE_FOR_sse4_1_blendvps:
|
||||
type = v4sf_ftype_v4sf_v4sf_v4sf;
|
||||
break;
|
||||
case CODE_FOR_sse4_1_pblendvb:
|
||||
type = v16qi_ftype_v16qi_v16qi_v16qi;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
def_builtin (d->mask, d->name, type, d->code);
|
||||
}
|
||||
|
||||
/* Add all builtins that are more or less simple operations on two
|
||||
operands. */
|
||||
for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
|
||||
@ -17322,6 +17574,10 @@ ix86_init_mmx_sse_builtins (void)
|
||||
else
|
||||
def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
|
||||
|
||||
/* ptest insns. */
|
||||
for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
|
||||
def_builtin (d->mask, d->name, int_ftype_v2di_v2di, d->code);
|
||||
|
||||
def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
|
||||
def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
|
||||
def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
|
||||
@ -17495,6 +17751,44 @@ ix86_init_mmx_sse_builtins (void)
|
||||
def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
|
||||
IX86_BUILTIN_PALIGNR);
|
||||
|
||||
/* SSE4.1. */
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_movntdqa",
|
||||
v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbw128",
|
||||
v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbd128",
|
||||
v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxbq128",
|
||||
v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwd128",
|
||||
v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxwq128",
|
||||
v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovsxdq128",
|
||||
v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbw128",
|
||||
v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbd128",
|
||||
v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxbq128",
|
||||
v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwd128",
|
||||
v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxwq128",
|
||||
v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmovzxdq128",
|
||||
v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_pmuldq128",
|
||||
v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
|
||||
def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundpd",
|
||||
v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
|
||||
def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundps",
|
||||
v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
|
||||
def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundsd",
|
||||
v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
|
||||
def_builtin_const (MASK_SSE4_1, "__builtin_ia32_roundss",
|
||||
v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
|
||||
|
||||
/* AMDFAM10 SSE4A New built-ins */
|
||||
def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
|
||||
void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
|
||||
@ -17567,7 +17861,30 @@ ix86_init_mmx_sse_builtins (void)
|
||||
def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
|
||||
ftype, IX86_BUILTIN_VEC_EXT_V2SI);
|
||||
|
||||
ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
|
||||
integer_type_node, NULL_TREE);
|
||||
def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v16qi",
|
||||
ftype, IX86_BUILTIN_VEC_EXT_V16QI);
|
||||
|
||||
/* Access to the vec_set patterns. */
|
||||
ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
|
||||
intDI_type_node,
|
||||
integer_type_node, NULL_TREE);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v2di",
|
||||
ftype, IX86_BUILTIN_VEC_SET_V2DI);
|
||||
|
||||
ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
|
||||
float_type_node,
|
||||
integer_type_node, NULL_TREE);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4sf",
|
||||
ftype, IX86_BUILTIN_VEC_SET_V4SF);
|
||||
|
||||
ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
|
||||
intSI_type_node,
|
||||
integer_type_node, NULL_TREE);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v4si",
|
||||
ftype, IX86_BUILTIN_VEC_SET_V4SI);
|
||||
|
||||
ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
|
||||
intHI_type_node,
|
||||
integer_type_node, NULL_TREE);
|
||||
@ -17579,6 +17896,12 @@ ix86_init_mmx_sse_builtins (void)
|
||||
integer_type_node, NULL_TREE);
|
||||
def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
|
||||
ftype, IX86_BUILTIN_VEC_SET_V4HI);
|
||||
|
||||
ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
|
||||
intQI_type_node,
|
||||
integer_type_node, NULL_TREE);
|
||||
def_builtin (MASK_SSE4_1, "__builtin_ia32_vec_set_v16qi",
|
||||
ftype, IX86_BUILTIN_VEC_SET_V16QI);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -17599,6 +17922,74 @@ safe_vector_operand (rtx x, enum machine_mode mode)
|
||||
return x;
|
||||
}
|
||||
|
||||
/* Subroutine of ix86_expand_builtin to take care of SSE insns with
|
||||
4 operands. The third argument must be a constant smaller than 8
|
||||
bits or xmm0. */
|
||||
|
||||
static rtx
|
||||
ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
|
||||
rtx target)
|
||||
{
|
||||
rtx pat;
|
||||
tree arg0 = CALL_EXPR_ARG (exp, 0);
|
||||
tree arg1 = CALL_EXPR_ARG (exp, 1);
|
||||
tree arg2 = CALL_EXPR_ARG (exp, 2);
|
||||
rtx op0 = expand_normal (arg0);
|
||||
rtx op1 = expand_normal (arg1);
|
||||
rtx op2 = expand_normal (arg2);
|
||||
enum machine_mode tmode = insn_data[icode].operand[0].mode;
|
||||
enum machine_mode mode0 = insn_data[icode].operand[1].mode;
|
||||
enum machine_mode mode1 = insn_data[icode].operand[2].mode;
|
||||
enum machine_mode mode2;
|
||||
rtx xmm0;
|
||||
|
||||
if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
|
||||
op0 = copy_to_mode_reg (mode0, op0);
|
||||
if ((optimize && !register_operand (op1, mode1))
|
||||
|| !(*insn_data[icode].operand[2].predicate) (op1, mode1))
|
||||
op1 = copy_to_mode_reg (mode1, op1);
|
||||
|
||||
switch (icode)
|
||||
{
|
||||
case CODE_FOR_sse4_1_blendvpd:
|
||||
case CODE_FOR_sse4_1_blendvps:
|
||||
case CODE_FOR_sse4_1_pblendvb:
|
||||
/* The third argument of variable blends must be xmm0. */
|
||||
xmm0 = gen_rtx_REG (tmode, FIRST_SSE_REG);
|
||||
emit_move_insn (xmm0, op2);
|
||||
op2 = xmm0;
|
||||
break;
|
||||
default:
|
||||
mode2 = insn_data[icode].operand[2].mode;
|
||||
if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
|
||||
{
|
||||
switch (icode)
|
||||
{
|
||||
case CODE_FOR_sse4_1_roundsd:
|
||||
case CODE_FOR_sse4_1_roundss:
|
||||
error ("the third argument must be a 4-bit immediate");
|
||||
break;
|
||||
default:
|
||||
error ("the third argument must be a 8-bit immediate");
|
||||
break;
|
||||
}
|
||||
return const0_rtx;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (optimize
|
||||
|| target == 0
|
||||
|| GET_MODE (target) != tmode
|
||||
|| ! (*insn_data[icode].operand[0].predicate) (target, tmode))
|
||||
target = gen_reg_rtx (tmode);
|
||||
pat = GEN_FCN (icode) (target, op0, op1, op2);
|
||||
if (! pat)
|
||||
return 0;
|
||||
emit_insn (pat);
|
||||
return target;
|
||||
}
|
||||
|
||||
/* Subroutine of ix86_expand_builtin to take care of binop insns. */
|
||||
|
||||
static rtx
|
||||
@ -17720,7 +18111,28 @@ ix86_expand_unop_builtin (enum insn_code icode, tree exp,
|
||||
op0 = copy_to_mode_reg (mode0, op0);
|
||||
}
|
||||
|
||||
pat = GEN_FCN (icode) (target, op0);
|
||||
switch (icode)
|
||||
{
|
||||
case CODE_FOR_sse4_1_roundpd:
|
||||
case CODE_FOR_sse4_1_roundps:
|
||||
{
|
||||
tree arg1 = CALL_EXPR_ARG (exp, 1);
|
||||
rtx op1 = expand_normal (arg1);
|
||||
enum machine_mode mode1 = insn_data[icode].operand[2].mode;
|
||||
|
||||
if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
|
||||
{
|
||||
error ("the second argument must be a 4-bit immediate");
|
||||
return const0_rtx;
|
||||
}
|
||||
pat = GEN_FCN (icode) (target, op0, op1);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
pat = GEN_FCN (icode) (target, op0);
|
||||
break;
|
||||
}
|
||||
|
||||
if (! pat)
|
||||
return 0;
|
||||
emit_insn (pat);
|
||||
@ -17867,6 +18279,50 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
|
||||
return SUBREG_REG (target);
|
||||
}
|
||||
|
||||
/* Subroutine of ix86_expand_builtin to take care of ptest insns. */
|
||||
|
||||
static rtx
|
||||
ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
|
||||
rtx target)
|
||||
{
|
||||
rtx pat;
|
||||
tree arg0 = CALL_EXPR_ARG (exp, 0);
|
||||
tree arg1 = CALL_EXPR_ARG (exp, 1);
|
||||
rtx op0 = expand_normal (arg0);
|
||||
rtx op1 = expand_normal (arg1);
|
||||
enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
|
||||
enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
|
||||
enum rtx_code comparison = d->comparison;
|
||||
|
||||
if (VECTOR_MODE_P (mode0))
|
||||
op0 = safe_vector_operand (op0, mode0);
|
||||
if (VECTOR_MODE_P (mode1))
|
||||
op1 = safe_vector_operand (op1, mode1);
|
||||
|
||||
target = gen_reg_rtx (SImode);
|
||||
emit_move_insn (target, const0_rtx);
|
||||
target = gen_rtx_SUBREG (QImode, target, 0);
|
||||
|
||||
if ((optimize && !register_operand (op0, mode0))
|
||||
|| !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
|
||||
op0 = copy_to_mode_reg (mode0, op0);
|
||||
if ((optimize && !register_operand (op1, mode1))
|
||||
|| !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
|
||||
op1 = copy_to_mode_reg (mode1, op1);
|
||||
|
||||
pat = GEN_FCN (d->icode) (op0, op1);
|
||||
if (! pat)
|
||||
return 0;
|
||||
emit_insn (pat);
|
||||
emit_insn (gen_rtx_SET (VOIDmode,
|
||||
gen_rtx_STRICT_LOW_PART (VOIDmode, target),
|
||||
gen_rtx_fmt_ee (comparison, QImode,
|
||||
SET_DEST (pat),
|
||||
const0_rtx)));
|
||||
|
||||
return SUBREG_REG (target);
|
||||
}
|
||||
|
||||
/* Return the integer constant in ARG. Constrain it to be in the range
|
||||
of the subparts of VEC_TYPE; issue an error if not. */
|
||||
|
||||
@ -18522,6 +18978,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
||||
emit_insn (pat);
|
||||
return target;
|
||||
|
||||
case IX86_BUILTIN_MOVNTDQA:
|
||||
return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
|
||||
target, 1);
|
||||
|
||||
case IX86_BUILTIN_MOVNTSD:
|
||||
return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
|
||||
|
||||
@ -18642,16 +19102,28 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
||||
case IX86_BUILTIN_VEC_EXT_V8HI:
|
||||
case IX86_BUILTIN_VEC_EXT_V2SI:
|
||||
case IX86_BUILTIN_VEC_EXT_V4HI:
|
||||
case IX86_BUILTIN_VEC_EXT_V16QI:
|
||||
return ix86_expand_vec_ext_builtin (exp, target);
|
||||
|
||||
case IX86_BUILTIN_VEC_SET_V2DI:
|
||||
case IX86_BUILTIN_VEC_SET_V4SF:
|
||||
case IX86_BUILTIN_VEC_SET_V4SI:
|
||||
case IX86_BUILTIN_VEC_SET_V8HI:
|
||||
case IX86_BUILTIN_VEC_SET_V4HI:
|
||||
case IX86_BUILTIN_VEC_SET_V16QI:
|
||||
return ix86_expand_vec_set_builtin (exp);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
for (i = 0, d = bdesc_sse_3arg;
|
||||
i < ARRAY_SIZE (bdesc_sse_3arg);
|
||||
i++, d++)
|
||||
if (d->code == fcode)
|
||||
return ix86_expand_sse_4_operands_builtin (d->icode, exp,
|
||||
target);
|
||||
|
||||
for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
|
||||
if (d->code == fcode)
|
||||
{
|
||||
@ -18673,6 +19145,10 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
|
||||
if (d->code == fcode)
|
||||
return ix86_expand_sse_comi (d, exp, target);
|
||||
|
||||
for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
|
||||
if (d->code == fcode)
|
||||
return ix86_expand_sse_ptest (d, exp, target);
|
||||
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
@ -20877,8 +21353,12 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
|
||||
}
|
||||
break;
|
||||
|
||||
case V2DFmode:
|
||||
case V2DImode:
|
||||
use_vec_merge = TARGET_SSE4_1;
|
||||
if (use_vec_merge)
|
||||
break;
|
||||
|
||||
case V2DFmode:
|
||||
{
|
||||
rtx op0, op1;
|
||||
|
||||
@ -20899,6 +21379,10 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
|
||||
return;
|
||||
|
||||
case V4SFmode:
|
||||
use_vec_merge = TARGET_SSE4_1;
|
||||
if (use_vec_merge)
|
||||
break;
|
||||
|
||||
switch (elt)
|
||||
{
|
||||
case 0:
|
||||
@ -20946,6 +21430,10 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
|
||||
break;
|
||||
|
||||
case V4SImode:
|
||||
use_vec_merge = TARGET_SSE4_1;
|
||||
if (use_vec_merge)
|
||||
break;
|
||||
|
||||
/* Element 0 handled by vec_merge below. */
|
||||
if (elt == 0)
|
||||
{
|
||||
@ -20990,6 +21478,9 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
|
||||
break;
|
||||
|
||||
case V16QImode:
|
||||
use_vec_merge = TARGET_SSE4_1;
|
||||
break;
|
||||
|
||||
case V8QImode:
|
||||
default:
|
||||
break;
|
||||
@ -21036,6 +21527,10 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
||||
break;
|
||||
|
||||
case V4SFmode:
|
||||
use_vec_extr = TARGET_SSE4_1;
|
||||
if (use_vec_extr)
|
||||
break;
|
||||
|
||||
switch (elt)
|
||||
{
|
||||
case 0:
|
||||
@ -21064,6 +21559,10 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
||||
break;
|
||||
|
||||
case V4SImode:
|
||||
use_vec_extr = TARGET_SSE4_1;
|
||||
if (use_vec_extr)
|
||||
break;
|
||||
|
||||
if (TARGET_SSE2)
|
||||
{
|
||||
switch (elt)
|
||||
@ -21109,6 +21608,9 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
||||
break;
|
||||
|
||||
case V16QImode:
|
||||
use_vec_extr = TARGET_SSE4_1;
|
||||
break;
|
||||
|
||||
case V8QImode:
|
||||
/* ??? Could extract the appropriate HImode element and shift. */
|
||||
default:
|
||||
@ -21121,7 +21623,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
|
||||
tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
|
||||
|
||||
/* Let the rtl optimizers know about the zero extension performed. */
|
||||
if (inner_mode == HImode)
|
||||
if (inner_mode == QImode || inner_mode == HImode)
|
||||
{
|
||||
tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
|
||||
target = gen_lowpart (SImode, target);
|
||||
|
@ -540,6 +540,8 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|
||||
builtin_define ("__SSE3__"); \
|
||||
if (TARGET_SSSE3) \
|
||||
builtin_define ("__SSSE3__"); \
|
||||
if (TARGET_SSE4_1) \
|
||||
builtin_define ("__SSE4_1__"); \
|
||||
if (TARGET_SSE4A) \
|
||||
builtin_define ("__SSE4A__"); \
|
||||
if (TARGET_SSE_MATH && TARGET_SSE) \
|
||||
|
@ -162,6 +162,17 @@
|
||||
(UNSPEC_EXTRQ 131)
|
||||
(UNSPEC_INSERTQI 132)
|
||||
(UNSPEC_INSERTQ 133)
|
||||
|
||||
; For SSE4.1 support
|
||||
(UNSPEC_BLENDV 134)
|
||||
(UNSPEC_INSERTPS 135)
|
||||
(UNSPEC_DP 136)
|
||||
(UNSPEC_MOVNTDQA 137)
|
||||
(UNSPEC_MPSADBW 138)
|
||||
(UNSPEC_PHMINPOSUW 139)
|
||||
(UNSPEC_PTEST 140)
|
||||
(UNSPEC_ROUNDP 141)
|
||||
(UNSPEC_ROUNDS 142)
|
||||
])
|
||||
|
||||
(define_constants
|
||||
|
@ -187,6 +187,10 @@ mssse3
|
||||
Target Report Mask(SSSE3)
|
||||
Support MMX, SSE, SSE2, SSE3 and SSSE3 built-in functions and code generation
|
||||
|
||||
msse4.1
|
||||
Target Report Mask(SSE4_1)
|
||||
Support MMX, SSE, SSE2, SSE3, SSSE3 and SSE4.1 built-in functions and code generation
|
||||
|
||||
msse4a
|
||||
Target Report Mask(SSE4A)
|
||||
Support MMX, SSE, SSE2, SSE3 and SSE4A built-in functions and code generation
|
||||
|
@ -623,6 +623,11 @@
|
||||
(and (match_code "const_int")
|
||||
(match_test "IN_RANGE (INTVAL (op), 4, 7)")))
|
||||
|
||||
;; Match exactly one bit in 2-bit mask.
|
||||
(define_predicate "const_pow2_1_to_2_operand"
|
||||
(and (match_code "const_int")
|
||||
(match_test "INTVAL (op) == 1 || INTVAL (op) == 2")))
|
||||
|
||||
;; Match exactly one bit in 4-bit mask.
|
||||
(define_predicate "const_pow2_1_to_8_operand"
|
||||
(match_code "const_int")
|
||||
@ -639,6 +644,14 @@
|
||||
return log <= 7;
|
||||
})
|
||||
|
||||
;; Match exactly one bit in 16-bit mask.
|
||||
(define_predicate "const_pow2_1_to_32768_operand"
|
||||
(match_code "const_int")
|
||||
{
|
||||
unsigned int log = exact_log2 (INTVAL (op));
|
||||
return log <= 15;
|
||||
})
|
||||
|
||||
;; True if this is a constant appropriate for an increment or decrement.
|
||||
(define_predicate "incdec_operand"
|
||||
(match_code "const_int")
|
||||
|
578
gcc/config/i386/smmintrin.h
Normal file
578
gcc/config/i386/smmintrin.h
Normal file
@ -0,0 +1,578 @@
|
||||
/* Copyright (C) 2007 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU General Public License as published by
|
||||
the Free Software Foundation; either version 2, or (at your option)
|
||||
any later version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING. If not, write to
|
||||
the Free Software Foundation, 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* As a special exception, if you include this header file into source
|
||||
files compiled by GCC, this header file does not by itself cause
|
||||
the resulting executable to be covered by the GNU General Public
|
||||
License. This exception does not however invalidate any other
|
||||
reasons why the executable file might be covered by the GNU General
|
||||
Public License. */
|
||||
|
||||
/* Implemented from the specification included in the Intel C++ Compiler
|
||||
User Guide and Reference, version 10.0. */
|
||||
|
||||
#ifndef _SMMINTRIN_H_INCLUDED
|
||||
#define _SMMINTRIN_H_INCLUDED
|
||||
|
||||
#ifndef __SSE4_1__
|
||||
# error "SSE4.1 instruction set not enabled"
|
||||
#else
|
||||
|
||||
/* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
|
||||
files. */
|
||||
#include <tmmintrin.h>
|
||||
|
||||
/* SSE4.1 */
|
||||
|
||||
/* Rounding mode macros. */
|
||||
#define _MM_FROUND_TO_NEAREST_INT 0x00
|
||||
#define _MM_FROUND_TO_NEG_INF 0x01
|
||||
#define _MM_FROUND_TO_POS_INF 0x02
|
||||
#define _MM_FROUND_TO_ZERO 0x03
|
||||
#define _MM_FROUND_CUR_DIRECTION 0x04
|
||||
|
||||
#define _MM_FROUND_RAISE_EXC 0x00
|
||||
#define _MM_FROUND_NO_EXC 0x08
|
||||
|
||||
#define _MM_FROUND_NINT \
|
||||
(_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_FLOOR \
|
||||
(_MM_FROUND_TO_NEG_INF | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_CEIL \
|
||||
(_MM_FROUND_TO_POS_INF | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_TRUNC \
|
||||
(_MM_FROUND_TO_ZERO | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_RINT \
|
||||
(_MM_FROUND_CUR_DIRECTION | _MM_FROUND_RAISE_EXC)
|
||||
#define _MM_FROUND_NEARBYINT \
|
||||
(_MM_FROUND_CUR_DIRECTION | _MM_FROUND_NO_EXC)
|
||||
|
||||
/* Integer blend instructions - select data from 2 sources using
|
||||
constant/variable mask. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_blend_epi16 (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pblendw128 ((__v8hi)__X,
|
||||
(__v8hi)__Y,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_blend_epi16(X, Y, M) \
|
||||
((__m128i) __builtin_ia32_pblendw128 ((__v8hi)(X), (__v8hi)(Y), (M)))
|
||||
#endif
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_blendv_epi8 (__m128i __X, __m128i __Y, __m128i __M)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pblendvb128 ((__v16qi)__X,
|
||||
(__v16qi)__Y,
|
||||
(__v16qi)__M);
|
||||
}
|
||||
|
||||
/* Single precision floating point blend instructions - select data
|
||||
from 2 sources using constant/variable mask. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
static __inline __m128 __attribute__((__always_inline__))
|
||||
_mm_blend_ps (__m128 __X, __m128 __Y, const int __M)
|
||||
{
|
||||
return (__m128) __builtin_ia32_blendps ((__v4sf)__X,
|
||||
(__v4sf)__Y,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_blend_ps(X, Y, M) \
|
||||
((__m128) __builtin_ia32_blendps ((__v4sf)(X), (__v4sf)(Y), (M)))
|
||||
#endif
|
||||
|
||||
static __inline __m128 __attribute__((__always_inline__))
|
||||
_mm_blendv_ps (__m128 __X, __m128 __Y, __m128 __M)
|
||||
{
|
||||
return (__m128) __builtin_ia32_blendvps ((__v4sf)__X,
|
||||
(__v4sf)__Y,
|
||||
(__v4sf)__M);
|
||||
}
|
||||
|
||||
/* Double precision floating point blend instructions - select data
|
||||
from 2 sources using constant/variable mask. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
static __inline __m128d __attribute__((__always_inline__))
|
||||
_mm_blend_pd (__m128d __X, __m128d __Y, const int __M)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_blendpd ((__v2df)__X,
|
||||
(__v2df)__Y,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_blend_pd(X, Y, M) \
|
||||
((__m128d) __builtin_ia32_blendpd ((__v2df)(X), (__v2df)(Y), (M)))
|
||||
#endif
|
||||
|
||||
static __inline __m128d __attribute__((__always_inline__))
|
||||
_mm_blendv_pd (__m128d __X, __m128d __Y, __m128d __M)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_blendvpd ((__v2df)__X,
|
||||
(__v2df)__Y,
|
||||
(__v2df)__M);
|
||||
}
|
||||
|
||||
/* Dot product instructions with mask-defined summing and zeroing parts
|
||||
of result. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
static __inline __m128 __attribute__((__always_inline__))
|
||||
_mm_dp_ps (__m128 __X, __m128 __Y, const int __M)
|
||||
{
|
||||
return (__m128) __builtin_ia32_dpps ((__v4sf)__X,
|
||||
(__v4sf)__Y,
|
||||
__M);
|
||||
}
|
||||
|
||||
static __inline __m128d __attribute__((__always_inline__))
|
||||
_mm_dp_pd (__m128d __X, __m128d __Y, const int __M)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_dppd ((__v2df)__X,
|
||||
(__v2df)__Y,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_dp_ps(X, Y, M) \
|
||||
((__m128) __builtin_ia32_dpps ((__v4sf)(X), (__v4sf)(Y), (M)))
|
||||
|
||||
#define _mm_dp_pd(X, Y, M) \
|
||||
((__m128d) __builtin_ia32_dppd ((__v2df)(X), (__v2df)(Y), (M)))
|
||||
#endif
|
||||
|
||||
/* Packed integer 64-bit comparison, zeroing or filling with ones
|
||||
corresponding parts of result. */
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cmpeq_epi64 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pcmpeqq ((__v2di)__X, (__v2di)__Y);
|
||||
}
|
||||
|
||||
/* Min/max packed integer instructions. */
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_min_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pminsb128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_max_epi8 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmaxsb128 ((__v16qi)__X, (__v16qi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_min_epu16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pminuw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_max_epu16 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmaxuw128 ((__v8hi)__X, (__v8hi)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_min_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pminsd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_max_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmaxsd128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_min_epu32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pminud128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_max_epu32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmaxud128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
/* Packed integer 32-bit multiplication with truncation of upper
|
||||
halves of results. */
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_mullo_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmulld128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
/* Packed integer 32-bit multiplication of 2 pairs of operands
|
||||
with two 64-bit results. */
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_mul_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmuldq128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
/* Packed integer 128-bit bitwise comparison. Return 1 if
|
||||
(__V & __M) == 0. */
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_testz_si128 (__m128i __M, __m128i __V)
|
||||
{
|
||||
return __builtin_ia32_ptestz128 ((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
/* Packed integer 128-bit bitwise comparison. Return 1 if
|
||||
(__V & ~__M) == 0. */
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_testc_si128 (__m128i __M, __m128i __V)
|
||||
{
|
||||
return __builtin_ia32_ptestc128 ((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
/* Packed integer 128-bit bitwise comparison. Return 1 if
|
||||
(__V & __M) != 0 && (__V & ~__M) != 0. */
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_testnzc_si128 (__m128i __M, __m128i __V)
|
||||
{
|
||||
return __builtin_ia32_ptestnzc128 ((__v2di)__M, (__v2di)__V);
|
||||
}
|
||||
|
||||
/* Macros for packed integer 128-bit comparison intrinsics. */
|
||||
#define _mm_test_all_zeros(M, V) _mm_testz_si128 ((M), (V))
|
||||
|
||||
#define _mm_test_all_ones(V) \
|
||||
_mm_testc_si128 ((V), _mm_cmpeq_epi32 ((V), (V)))
|
||||
|
||||
#define _mm_test_mix_ones_zeros(M, V) _mm_testnzc_si128 ((M), (V))
|
||||
|
||||
/* Insert single precision float into packed single precision array
|
||||
element selected by index N. The bits [7-6] of N define S
|
||||
index, the bits [5-4] define D index, and bits [3-0] define
|
||||
zeroing mask for D. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
static __inline __m128 __attribute__((__always_inline__))
|
||||
_mm_insert_ps (__m128 __D, __m128 __S, const int __N)
|
||||
{
|
||||
return (__m128) __builtin_ia32_insertps128 ((__v4sf)__D,
|
||||
(__v4sf)__S,
|
||||
__N);
|
||||
}
|
||||
#else
|
||||
#define _mm_insert_ps(D, S, N) \
|
||||
((__m128) __builtin_ia32_insertps128 ((__v4sf)(D), (__v4sf)(S), (N)))
|
||||
#endif
|
||||
|
||||
/* Helper macro to create the N value for _mm_insert_ps. */
|
||||
#define _MM_MK_INSERTPS_NDX(S, D, M) (((S) << 6) | ((D) << 4) | (M))
|
||||
|
||||
/* Extract binary representation of single precision float from packed
|
||||
single precision array element of X selected by index N. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_extract_ps (__m128 __X, const int __N)
|
||||
{
|
||||
union { int i; float f; } __tmp;
|
||||
__tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)__X, __N);
|
||||
return __tmp.i;
|
||||
}
|
||||
#else
|
||||
#define _mm_extract_ps(X, N) \
|
||||
(__extension__ \
|
||||
({ \
|
||||
union { int i; float f; } __tmp; \
|
||||
__tmp.f = __builtin_ia32_vec_ext_v4sf ((__v4sf)(X), (N)); \
|
||||
__tmp.i; \
|
||||
}) \
|
||||
)
|
||||
#endif
|
||||
|
||||
/* Extract binary representation of single precision float into
|
||||
D from packed single precision array element of S selected
|
||||
by index N. */
|
||||
#define _MM_EXTRACT_FLOAT(D, S, N) \
|
||||
{ (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); }
|
||||
|
||||
/* Extract specified single precision float element into the lower
|
||||
part of __m128. */
|
||||
#define _MM_PICK_OUT_PS(X, N) \
|
||||
_mm_insert_ps (_mm_setzero_ps (), (X), \
|
||||
_MM_MK_INSERTPS_NDX ((N), 0, 0x0e))
|
||||
|
||||
/* Insert integer, S, into packed integer array element of D
|
||||
selected by index N. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_insert_epi8 (__m128i __D, int __S, const int __N)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)__D,
|
||||
__S, __N);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_insert_epi32 (__m128i __D, int __S, const int __N)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vec_set_v4si ((__v4si)__D,
|
||||
__S, __N);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_insert_epi64 (__m128i __D, long long __S, const int __N)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_vec_set_v2di ((__v2di)__D,
|
||||
__S, __N);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#define _mm_insert_epi8(D, S, N) \
|
||||
((__m128i) __builtin_ia32_vec_set_v16qi ((__v16qi)(D), (S), (N)))
|
||||
|
||||
#define _mm_insert_epi32(D, S, N) \
|
||||
((__m128i) __builtin_ia32_vec_set_v4si ((__v4si)(D), (S), (N)))
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _mm_insert_epi64(D, S, N) \
|
||||
((__m128i) __builtin_ia32_vec_set_v2di ((__v2di)(D), (S), (N)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Extract integer from packed integer array element of X selected by
|
||||
index N. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_extract_epi8 (__m128i __X, const int __N)
|
||||
{
|
||||
return __builtin_ia32_vec_ext_v16qi ((__v16qi)__X, __N);
|
||||
}
|
||||
|
||||
static __inline int __attribute__((__always_inline__))
|
||||
_mm_extract_epi32 (__m128i __X, const int __N)
|
||||
{
|
||||
return __builtin_ia32_vec_ext_v4si ((__v4si)__X, __N);
|
||||
}
|
||||
|
||||
#ifdef __x86_64__
|
||||
static __inline long long __attribute__((__always_inline__))
|
||||
_mm_extract_epi64 (__m128i __X, const int __N)
|
||||
{
|
||||
return __builtin_ia32_vec_ext_v2di ((__v2di)__X, __N);
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
#define _mm_extract_epi8(X, N) \
|
||||
__builtin_ia32_vec_ext_v16qi ((__v16qi) X, (N))
|
||||
#define _mm_extract_epi32(X, N) \
|
||||
__builtin_ia32_vec_ext_v4si ((__v4si) X, (N))
|
||||
|
||||
#ifdef __x86_64__
|
||||
#define _mm_extract_epi64(X, N) \
|
||||
((long long) __builtin_ia32_vec_ext_v2di ((__v2di)(X), (N)))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* Return horizontal packed word minimum and its index in bits [15:0]
|
||||
and bits [18:16] respectively. */
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_minpos_epu16 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_phminposuw128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
/* Packed/scalar double precision floating point rounding. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
static __inline __m128d __attribute__((__always_inline__))
|
||||
_mm_round_pd (__m128d __V, const int __M)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_roundpd ((__v2df)__V, __M);
|
||||
}
|
||||
|
||||
static __inline __m128d __attribute__((__always_inline__))
|
||||
_mm_round_sd(__m128d __D, __m128d __V, const int __M)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_roundsd ((__v2df)__D,
|
||||
(__v2df)__V,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_round_pd(V, M) \
|
||||
((__m128d) __builtin_ia32_roundpd ((__v2df)(V), (M)))
|
||||
|
||||
#define _mm_round_sd(D, V, M) \
|
||||
((__m128d) __builtin_ia32_roundsd ((__v2df)(D), (__v2df)(V), (M)))
|
||||
#endif
|
||||
|
||||
/* Packed/scalar single precision floating point rounding. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
static __inline __m128 __attribute__((__always_inline__))
|
||||
_mm_round_ps (__m128 __V, const int __M)
|
||||
{
|
||||
return (__m128) __builtin_ia32_roundps ((__v4sf)__V, __M);
|
||||
}
|
||||
|
||||
static __inline __m128 __attribute__((__always_inline__))
|
||||
_mm_round_ss (__m128 __D, __m128 __V, const int __M)
|
||||
{
|
||||
return (__m128) __builtin_ia32_roundss ((__v4sf)__D,
|
||||
(__v4sf)__V,
|
||||
__M);
|
||||
}
|
||||
#else
|
||||
#define _mm_round_ps(V, M) \
|
||||
((__m128) __builtin_ia32_roundps ((__v4sf)(V), (M)))
|
||||
|
||||
#define _mm_round_ss(D, V, M) \
|
||||
((__m128) __builtin_ia32_roundss ((__v4sf)(D), (__v4sf)(V), (M)))
|
||||
#endif
|
||||
|
||||
/* Macros for ceil/floor intrinsics. */
|
||||
#define _mm_ceil_pd(V) _mm_round_pd ((V), _MM_FROUND_CEIL)
|
||||
#define _mm_ceil_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_CEIL)
|
||||
|
||||
#define _mm_floor_pd(V) _mm_round_pd((V), _MM_FROUND_FLOOR)
|
||||
#define _mm_floor_sd(D, V) _mm_round_sd ((D), (V), _MM_FROUND_FLOOR)
|
||||
|
||||
#define _mm_ceil_ps(V) _mm_round_ps ((V), _MM_FROUND_CEIL)
|
||||
#define _mm_ceil_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_CEIL)
|
||||
|
||||
#define _mm_floor_ps(V) _mm_round_ps ((V), _MM_FROUND_FLOOR)
|
||||
#define _mm_floor_ss(D, V) _mm_round_ss ((D), (V), _MM_FROUND_FLOOR)
|
||||
|
||||
/* Packed integer sign-extension. */
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepi8_epi32 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxbd128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepi16_epi32 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxwd128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepi8_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxbq128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepi32_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxdq128 ((__v4si)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepi16_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxwq128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepi8_epi16 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovsxbw128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
/* Packed integer zero-extension. */
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepu8_epi32 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxbd128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepu16_epi32 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxwd128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepu8_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxbq128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepu32_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxdq128 ((__v4si)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepu16_epi64 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxwq128 ((__v8hi)__X);
|
||||
}
|
||||
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_cvtepu8_epi16 (__m128i __X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_pmovzxbw128 ((__v16qi)__X);
|
||||
}
|
||||
|
||||
/* Pack 8 double words from 2 operands into 8 words of result with
|
||||
unsigned saturation. */
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_packus_epi32 (__m128i __X, __m128i __Y)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_packusdw128 ((__v4si)__X, (__v4si)__Y);
|
||||
}
|
||||
|
||||
/* Sum absolute 8-bit integer difference of adjacent groups of 4
|
||||
byte integers in the first 2 operands. Starting offsets within
|
||||
operands are determined by the 3rd mask operand. */
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_mpsadbw_epu8 (__m128i __X, __m128i __Y, const int __M)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)__X,
|
||||
(__v16qi)__Y, __M);
|
||||
}
|
||||
#else
|
||||
#define _mm_mpsadbw_epu8(X, Y, M) \
|
||||
((__m128i) __builtin_ia32_mpsadbw128 ((__v16qi)(X), (__v16qi)(Y), (M)))
|
||||
#endif
|
||||
|
||||
/* Load double quadword using non-temporal aligned hint. */
|
||||
static __inline __m128i __attribute__((__always_inline__))
|
||||
_mm_stream_load_si128 (__m128i *__X)
|
||||
{
|
||||
return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
|
||||
}
|
||||
|
||||
#endif /* __SSE4_1__ */
|
||||
|
||||
#endif /* _SMMINTRIN_H_INCLUDED */
|
File diff suppressed because it is too large
Load Diff
@ -7396,6 +7396,84 @@ v4si __builtin_ia32_pabsd128 (v4si)
|
||||
v8hi __builtin_ia32_pabsw128 (v8hi)
|
||||
@end smallexample
|
||||
|
||||
The following built-in functions are available when @option{-msse4.1} is
|
||||
used. All of them generate the machine instruction that is part of the
|
||||
name.
|
||||
|
||||
@smallexample
|
||||
v2df __builtin_ia32_blendpd (v2df, v2df, const int)
|
||||
v4sf __builtin_ia32_blendps (v4sf, v4sf, const int)
|
||||
v2df __builtin_ia32_blendvpd (v2df, v2df, v2df)
|
||||
v4sf __builtin_ia32_blendvps (v4sf, v4sf, v4sf)
|
||||
v2df __builtin_ia32_dppd (__v2df, __v2df, const int)
|
||||
v4sf __builtin_ia32_dpps (v4sf, v4sf, const int)
|
||||
v4sf __builtin_ia32_insertps128 (v4sf, v4sf, const int)
|
||||
v2di __builtin_ia32_movntdqa (v2di *);
|
||||
v16qi __builtin_ia32_mpsadbw128 (v16qi, v16qi, const int)
|
||||
v8hi __builtin_ia32_packusdw128 (v4si, v4si)
|
||||
v16qi __builtin_ia32_pblendvb128 (v16qi, v16qi, v16qi)
|
||||
v8hi __builtin_ia32_pblendw128 (v8hi, v8hi, const int)
|
||||
v2di __builtin_ia32_pcmpeqq (v2di, v2di)
|
||||
v8hi __builtin_ia32_phminposuw128 (v8hi)
|
||||
v16qi __builtin_ia32_pmaxsb128 (v16qi, v16qi)
|
||||
v4si __builtin_ia32_pmaxsd128 (v4si, v4si)
|
||||
v4si __builtin_ia32_pmaxud128 (v4si, v4si)
|
||||
v8hi __builtin_ia32_pmaxuw128 (v8hi, v8hi)
|
||||
v16qi __builtin_ia32_pminsb128 (v16qi, v16qi)
|
||||
v4si __builtin_ia32_pminsd128 (v4si, v4si)
|
||||
v4si __builtin_ia32_pminud128 (v4si, v4si)
|
||||
v8hi __builtin_ia32_pminuw128 (v8hi, v8hi)
|
||||
v4si __builtin_ia32_pmovsxbd128 (v16qi)
|
||||
v2di __builtin_ia32_pmovsxbq128 (v16qi)
|
||||
v8hi __builtin_ia32_pmovsxbw128 (v16qi)
|
||||
v2di __builtin_ia32_pmovsxdq128 (v4si)
|
||||
v4si __builtin_ia32_pmovsxwd128 (v8hi)
|
||||
v2di __builtin_ia32_pmovsxwq128 (v8hi)
|
||||
v4si __builtin_ia32_pmovzxbd128 (v16qi)
|
||||
v2di __builtin_ia32_pmovzxbq128 (v16qi)
|
||||
v8hi __builtin_ia32_pmovzxbw128 (v16qi)
|
||||
v2di __builtin_ia32_pmovzxdq128 (v4si)
|
||||
v4si __builtin_ia32_pmovzxwd128 (v8hi)
|
||||
v2di __builtin_ia32_pmovzxwq128 (v8hi)
|
||||
v2di __builtin_ia32_pmuldq128 (v4si, v4si)
|
||||
v4si __builtin_ia32_pmulld128 (v4si, v4si)
|
||||
int __builtin_ia32_ptestc128 (v2di, v2di)
|
||||
int __builtin_ia32_ptestnzc128 (v2di, v2di)
|
||||
int __builtin_ia32_ptestz128 (v2di, v2di)
|
||||
v2df __builtin_ia32_roundpd (v2df, const int)
|
||||
v4sf __builtin_ia32_roundps (v4sf, const int)
|
||||
v2df __builtin_ia32_roundsd (v2df, v2df, const int)
|
||||
v4sf __builtin_ia32_roundss (v4sf, v4sf, const int)
|
||||
@end smallexample
|
||||
|
||||
The following built-in functions are available when @option{-msse4.1} is
|
||||
used.
|
||||
|
||||
@table @code
|
||||
@item v4sf __builtin_ia32_vec_set_v4sf (v4sf, float, const int)
|
||||
Generates the @code{insertps} machine instruction.
|
||||
@item int __builtin_ia32_vec_ext_v16qi (v16qi, const int)
|
||||
Generates the @code{pextrb} machine instruction.
|
||||
@item v16qi __builtin_ia32_vec_set_v16qi (v16qi, int, const int)
|
||||
Generates the @code{pinsrb} machine instruction.
|
||||
@item v4si __builtin_ia32_vec_set_v4si (v4si, int, const int)
|
||||
Generates the @code{pinsrd} machine instruction.
|
||||
@item v2di __builtin_ia32_vec_set_v2di (v2di, long long, const int)
|
||||
Generates the @code{pinsrq} machine instruction in 64bit mode.
|
||||
@end table
|
||||
|
||||
The following built-in functions are changed to generate new SSE4.1
|
||||
instructions when @option{-msse4.1} is used.
|
||||
|
||||
@table @code
|
||||
@item float __builtin_ia32_vec_ext_v4sf (v4sf, const int)
|
||||
Generates the @code{extractps} machine instruction.
|
||||
@item int __builtin_ia32_vec_ext_v4si (v4si, const int)
|
||||
Generates the @code{pextrd} machine instruction.
|
||||
@item long long __builtin_ia32_vec_ext_v2di (v2di, const int)
|
||||
Generates the @code{pextrq} machine instruction in 64bit mode.
|
||||
@end table
|
||||
|
||||
The following built-in functions are available when @option{-msse4a} is used.
|
||||
|
||||
@smallexample
|
||||
|
@ -547,7 +547,8 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-mno-fp-ret-in-387 -msoft-float @gol
|
||||
-mno-wide-multiply -mrtd -malign-double @gol
|
||||
-mpreferred-stack-boundary=@var{num} -mcx16 -msahf @gol
|
||||
-mmmx -msse -msse2 -msse3 -mssse3 -msse4a -m3dnow -mpopcnt -mabm @gol
|
||||
-mmmx -msse -msse2 -msse3 -mssse3 -msse4.1 @gol
|
||||
-msse4a -m3dnow -mpopcnt -mabm @gol
|
||||
-mthreads -mno-align-stringops -minline-all-stringops @gol
|
||||
-mpush-args -maccumulate-outgoing-args -m128bit-long-double @gol
|
||||
-m96bit-long-double -mregparm=@var{num} -msseregparm @gol
|
||||
@ -10260,6 +10261,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@itemx -mno-sse3
|
||||
@item -mssse3
|
||||
@itemx -mno-ssse3
|
||||
@item -msse4.1
|
||||
@itemx -mno-sse4.1
|
||||
@item -msse4a
|
||||
@item -mno-sse4a
|
||||
@item -m3dnow
|
||||
@ -10275,7 +10278,8 @@ preferred alignment to @option{-mpreferred-stack-boundary=2}.
|
||||
@opindex m3dnow
|
||||
@opindex mno-3dnow
|
||||
These switches enable or disable the use of instructions in the MMX,
|
||||
SSE, SSE2, SSE3, SSSE3, SSE4A, ABM or 3DNow! extended instruction sets.
|
||||
SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4A, ABM or 3DNow! extended
|
||||
instruction sets.
|
||||
These extensions are also available as built-in functions: see
|
||||
@ref{X86 Built-in Functions}, for details of the functions enabled and
|
||||
disabled by these switches.
|
||||
|
Loading…
x
Reference in New Issue
Block a user