mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-10 09:40:42 +08:00
i386.c (ix86_expand_builtin): If gather mask argument is known to have all high bits set...
* config/i386/i386.c (ix86_expand_builtin): If gather mask argument is known to have all high bits set, pass pc_rtx as second argument to the expander instead of op0. * config/i386/sse.md (*avx2_gathersi<mode>_2, *avx2_gatherdi<mode>_2): New patterns. * config/i386/avx2intrin.h (_mm256_i32gather_pd, _mm256_i64gather_pd, _mm256_i32gather_ps): Set mask using _mm256_cmp_pd with zero vector arguments and _CMP_EQ_OQ instead of _mm256_set1_pd. From-SVN: r181090
This commit is contained in:
parent
aec7ae7dea
commit
da80a6464e
gcc
@ -1,5 +1,15 @@
|
||||
2011-11-07 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* config/i386/i386.c (ix86_expand_builtin): If gather mask
|
||||
argument is known to have all high bits set, pass pc_rtx as
|
||||
second argument to the expander instead of op0.
|
||||
* config/i386/sse.md (*avx2_gathersi<mode>_2,
|
||||
*avx2_gatherdi<mode>_2): New patterns.
|
||||
* config/i386/avx2intrin.h (_mm256_i32gather_pd,
|
||||
_mm256_i64gather_pd, _mm256_i32gather_ps): Set mask using
|
||||
_mm256_cmp_pd with zero vector arguments and _CMP_EQ_OQ instead of
|
||||
_mm256_set1_pd.
|
||||
|
||||
PR tree-optimization/50789
|
||||
* tree-vect-stmts.c (process_use): Add force argument, avoid
|
||||
exist_non_indexing_operands_for_use_p check if true.
|
||||
|
@ -1252,7 +1252,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_i32gather_pd (double const *base, __m128i index, const int scale)
|
||||
{
|
||||
__v4df src = _mm256_setzero_pd ();
|
||||
__v4df mask = _mm256_set1_pd((double)(long long int) -1);
|
||||
__v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
|
||||
|
||||
return (__m256d) __builtin_ia32_gathersiv4df (src,
|
||||
base,
|
||||
@ -1304,7 +1304,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_i64gather_pd (double const *base, __m256i index, const int scale)
|
||||
{
|
||||
__v4df src = _mm256_setzero_pd ();
|
||||
__v4df mask = _mm256_set1_pd((double)(long long int) -1);
|
||||
__v4df mask = _mm256_cmp_pd (src, src, _CMP_EQ_OQ);
|
||||
|
||||
return (__m256d) __builtin_ia32_gatherdiv4df (src,
|
||||
base,
|
||||
@ -1356,7 +1356,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_i32gather_ps (float const *base, __m256i index, const int scale)
|
||||
{
|
||||
__v8sf src = _mm256_setzero_ps ();
|
||||
__v8sf mask = _mm256_set1_ps((float)(int) -1);
|
||||
__v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
|
||||
|
||||
return (__m256) __builtin_ia32_gathersiv8sf (src,
|
||||
base,
|
||||
|
@ -29087,6 +29087,71 @@ rdrand_step:
|
||||
error ("last argument must be scale 1, 2, 4, 8");
|
||||
return const0_rtx;
|
||||
}
|
||||
|
||||
/* Optimize. If mask is known to have all high bits set,
|
||||
replace op0 with pc_rtx to signal that the instruction
|
||||
overwrites the whole destination and doesn't use its
|
||||
previous contents. */
|
||||
if (optimize)
|
||||
{
|
||||
if (TREE_CODE (arg3) == VECTOR_CST)
|
||||
{
|
||||
tree elt;
|
||||
unsigned int negative = 0;
|
||||
for (elt = TREE_VECTOR_CST_ELTS (arg3);
|
||||
elt; elt = TREE_CHAIN (elt))
|
||||
{
|
||||
tree cst = TREE_VALUE (elt);
|
||||
if (TREE_CODE (cst) == INTEGER_CST
|
||||
&& tree_int_cst_sign_bit (cst))
|
||||
negative++;
|
||||
else if (TREE_CODE (cst) == REAL_CST
|
||||
&& REAL_VALUE_NEGATIVE (TREE_REAL_CST (cst)))
|
||||
negative++;
|
||||
}
|
||||
if (negative == TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg3)))
|
||||
op0 = pc_rtx;
|
||||
}
|
||||
else if (TREE_CODE (arg3) == SSA_NAME)
|
||||
{
|
||||
/* Recognize also when mask is like:
|
||||
__v2df src = _mm_setzero_pd ();
|
||||
__v2df mask = _mm_cmpeq_pd (src, src);
|
||||
or
|
||||
__v8sf src = _mm256_setzero_ps ();
|
||||
__v8sf mask = _mm256_cmp_ps (src, src, _CMP_EQ_OQ);
|
||||
as that is a cheaper way to load all ones into
|
||||
a register than having to load a constant from
|
||||
memory. */
|
||||
gimple def_stmt = SSA_NAME_DEF_STMT (arg3);
|
||||
if (is_gimple_call (def_stmt))
|
||||
{
|
||||
tree fndecl = gimple_call_fndecl (def_stmt);
|
||||
if (fndecl
|
||||
&& DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
|
||||
switch ((unsigned int) DECL_FUNCTION_CODE (fndecl))
|
||||
{
|
||||
case IX86_BUILTIN_CMPPD:
|
||||
case IX86_BUILTIN_CMPPS:
|
||||
case IX86_BUILTIN_CMPPD256:
|
||||
case IX86_BUILTIN_CMPPS256:
|
||||
if (!integer_zerop (gimple_call_arg (def_stmt, 2)))
|
||||
break;
|
||||
/* FALLTHRU */
|
||||
case IX86_BUILTIN_CMPEQPD:
|
||||
case IX86_BUILTIN_CMPEQPS:
|
||||
if (initializer_zerop (gimple_call_arg (def_stmt, 0))
|
||||
&& initializer_zerop (gimple_call_arg (def_stmt,
|
||||
1)))
|
||||
op0 = pc_rtx;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pat = GEN_FCN (icode) (subtarget, op0, op1, op2, op3, op4);
|
||||
if (! pat)
|
||||
return const0_rtx;
|
||||
|
@ -12567,6 +12567,26 @@
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_insn "*avx2_gathersi<mode>_2"
|
||||
[(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
|
||||
(unspec:VEC_GATHER_MODE
|
||||
[(pc)
|
||||
(match_operator:<ssescalarmode> 6 "vsib_mem_operator"
|
||||
[(unspec:P
|
||||
[(match_operand:P 2 "vsib_address_operand" "p")
|
||||
(match_operand:<VEC_GATHER_IDXSI> 3 "register_operand" "x")
|
||||
(match_operand:SI 5 "const1248_operand" "n")]
|
||||
UNSPEC_VSIBADDR)])
|
||||
(mem:BLK (scratch))
|
||||
(match_operand:VEC_GATHER_MODE 4 "register_operand" "1")]
|
||||
UNSPEC_GATHER))
|
||||
(clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
|
||||
"TARGET_AVX2"
|
||||
"v<sseintprefix>gatherd<ssemodesuffix>\t{%1, %6, %0|%0, %6, %1}"
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_expand "avx2_gatherdi<mode>"
|
||||
[(parallel [(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "")
|
||||
(unspec:VEC_GATHER_MODE
|
||||
@ -12608,3 +12628,27 @@
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_insn "*avx2_gatherdi<mode>_2"
|
||||
[(set (match_operand:VEC_GATHER_MODE 0 "register_operand" "=&x")
|
||||
(unspec:VEC_GATHER_MODE
|
||||
[(pc)
|
||||
(match_operator:<ssescalarmode> 6 "vsib_mem_operator"
|
||||
[(unspec:P
|
||||
[(match_operand:P 2 "vsib_address_operand" "p")
|
||||
(match_operand:<VEC_GATHER_IDXDI> 3 "register_operand" "x")
|
||||
(match_operand:SI 5 "const1248_operand" "n")]
|
||||
UNSPEC_VSIBADDR)])
|
||||
(mem:BLK (scratch))
|
||||
(match_operand:<VEC_GATHER_SRCDI> 4 "register_operand" "1")]
|
||||
UNSPEC_GATHER))
|
||||
(clobber (match_scratch:VEC_GATHER_MODE 1 "=&x"))]
|
||||
"TARGET_AVX2"
|
||||
{
|
||||
if (<MODE>mode != <VEC_GATHER_SRCDI>mode)
|
||||
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %x0|%x0, %6, %4}";
|
||||
return "v<sseintprefix>gatherq<ssemodesuffix>\t{%4, %6, %0|%0, %6, %4}";
|
||||
}
|
||||
[(set_attr "type" "ssemov")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
Loading…
x
Reference in New Issue
Block a user