mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-08 12:10:39 +08:00
tree-vect-patterns.c (vect_recog_sad_pattern): New function for SAD pattern recognition.
2014-06-24 Cong Hou <congh@google.com> * tree-vect-patterns.c (vect_recog_sad_pattern): New function for SAD pattern recognition. (type_conversion_p): PROMOTION is true if it's a type promotion conversion, and false otherwise. Return true if the given expression is a type conversion one. * tree-vectorizer.h: Adjust the number of patterns. * tree.def: Add SAD_EXPR. * optabs.def: Add sad_optab. * cfgexpand.c (expand_debug_expr): Add SAD_EXPR case. * expr.c (expand_expr_real_2): Likewise. * gimple-pretty-print.c (dump_ternary_rhs): Likewise. * gimple.c (get_gimple_rhs_num_ops): Likewise. * optabs.c (optab_for_tree_code): Likewise. * tree-cfg.c (estimate_operator_cost): Likewise. * tree-ssa-operands.c (get_expr_operands): Likewise. * tree-vect-loop.c (get_initial_def_for_reduction): Likewise. * config/i386/sse.md: Add SSE2 and AVX2 expand for SAD. * doc/generic.texi: Add document for SAD_EXPR. * doc/md.texi: Add document for ssad and usad. 2014-06-24 Cong Hou <congh@google.com> * gcc.dg/vect/vect-reduc-sad.c: New. * lib/target-supports.exp (check_effective_target_vect_usad_char): New. From-SVN: r211966
This commit is contained in:
parent
22543f17af
commit
79d652a570
@ -1,3 +1,25 @@
|
||||
2014-06-24 Cong Hou <congh@google.com>
|
||||
|
||||
* tree-vect-patterns.c (vect_recog_sad_pattern): New function for SAD
|
||||
pattern recognition.
|
||||
(type_conversion_p): PROMOTION is true if it's a type promotion
|
||||
conversion, and false otherwise. Return true if the given expression
|
||||
is a type conversion one.
|
||||
* tree-vectorizer.h: Adjust the number of patterns.
|
||||
* tree.def: Add SAD_EXPR.
|
||||
* optabs.def: Add sad_optab.
|
||||
* cfgexpand.c (expand_debug_expr): Add SAD_EXPR case.
|
||||
* expr.c (expand_expr_real_2): Likewise.
|
||||
* gimple-pretty-print.c (dump_ternary_rhs): Likewise.
|
||||
* gimple.c (get_gimple_rhs_num_ops): Likewise.
|
||||
* optabs.c (optab_for_tree_code): Likewise.
|
||||
* tree-cfg.c (estimate_operator_cost): Likewise.
|
||||
* tree-ssa-operands.c (get_expr_operands): Likewise.
|
||||
* tree-vect-loop.c (get_initial_def_for_reduction): Likewise.
|
||||
* config/i386/sse.md: Add SSE2 and AVX2 expand for SAD.
|
||||
* doc/generic.texi: Add document for SAD_EXPR.
|
||||
* doc/md.texi: Add document for ssad and usad.
|
||||
|
||||
2014-06-24 Trevor Saunders <tsaunders@mozilla.com>
|
||||
|
||||
* config/i386/winnt.c (i386_pe_section_type_flags): Fix const
|
||||
|
@ -3713,6 +3713,7 @@ expand_debug_expr (tree exp)
|
||||
{
|
||||
case COND_EXPR:
|
||||
case DOT_PROD_EXPR:
|
||||
case SAD_EXPR:
|
||||
case WIDEN_MULT_PLUS_EXPR:
|
||||
case WIDEN_MULT_MINUS_EXPR:
|
||||
case FMA_EXPR:
|
||||
|
@ -8260,6 +8260,36 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "usadv16qi"
|
||||
[(match_operand:V4SI 0 "register_operand")
|
||||
(match_operand:V16QI 1 "register_operand")
|
||||
(match_operand:V16QI 2 "nonimmediate_operand")
|
||||
(match_operand:V4SI 3 "nonimmediate_operand")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx t1 = gen_reg_rtx (V2DImode);
|
||||
rtx t2 = gen_reg_rtx (V4SImode);
|
||||
emit_insn (gen_sse2_psadbw (t1, operands[1], operands[2]));
|
||||
convert_move (t2, t1, 0);
|
||||
emit_insn (gen_addv4si3 (operands[0], t2, operands[3]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "usadv32qi"
|
||||
[(match_operand:V8SI 0 "register_operand")
|
||||
(match_operand:V32QI 1 "register_operand")
|
||||
(match_operand:V32QI 2 "nonimmediate_operand")
|
||||
(match_operand:V8SI 3 "nonimmediate_operand")]
|
||||
"TARGET_AVX2"
|
||||
{
|
||||
rtx t1 = gen_reg_rtx (V4DImode);
|
||||
rtx t2 = gen_reg_rtx (V8SImode);
|
||||
emit_insn (gen_avx2_psadbw (t1, operands[1], operands[2]));
|
||||
convert_move (t2, t1, 0);
|
||||
emit_insn (gen_addv8si3 (operands[0], t2, operands[3]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "ashr<mode>3"
|
||||
[(set (match_operand:VI24_AVX2 0 "register_operand" "=x,x")
|
||||
(ashiftrt:VI24_AVX2
|
||||
|
@ -1702,6 +1702,7 @@ a value from @code{enum annot_expr_kind}.
|
||||
@tindex VEC_PACK_TRUNC_EXPR
|
||||
@tindex VEC_PACK_SAT_EXPR
|
||||
@tindex VEC_PACK_FIX_TRUNC_EXPR
|
||||
@tindex SAD_EXPR
|
||||
|
||||
@table @code
|
||||
@item VEC_LSHIFT_EXPR
|
||||
@ -1782,6 +1783,15 @@ value, it is taken from the second operand. It should never evaluate to
|
||||
any other value currently, but optimizations should not rely on that
|
||||
property. In contrast with a @code{COND_EXPR}, all operands are always
|
||||
evaluated.
|
||||
|
||||
@item SAD_EXPR
|
||||
This node represents the Sum of Absolute Differences operation. The three
|
||||
operands must be vectors of integral types. The first and second operand
|
||||
must have the same type. The size of the vector element of the third
|
||||
operand must be at lease twice of the size of the vector element of the
|
||||
first and second one. The SAD is calculated between the first and second
|
||||
operands, added to the third operand, and returned.
|
||||
|
||||
@end table
|
||||
|
||||
|
||||
|
@ -4842,6 +4842,16 @@ wider mode, is computed and added to operand 3. Operand 3 is of a mode equal or
|
||||
wider than the mode of the product. The result is placed in operand 0, which
|
||||
is of the same mode as operand 3.
|
||||
|
||||
@cindex @code{ssad@var{m}} instruction pattern
|
||||
@item @samp{ssad@var{m}}
|
||||
@cindex @code{usad@var{m}} instruction pattern
|
||||
@item @samp{usad@var{m}}
|
||||
Compute the sum of absolute differences of two signed/unsigned elements.
|
||||
Operand 1 and operand 2 are of the same mode. Their absolute difference, which
|
||||
is of a wider mode, is computed and added to operand 3. Operand 3 is of a mode
|
||||
equal or wider than the mode of the absolute difference. The result is placed
|
||||
in operand 0, which is of the same mode as operand 3.
|
||||
|
||||
@cindex @code{ssum_widen@var{m3}} instruction pattern
|
||||
@item @samp{ssum_widen@var{m3}}
|
||||
@cindex @code{usum_widen@var{m3}} instruction pattern
|
||||
|
14
gcc/expr.c
14
gcc/expr.c
@ -9110,6 +9110,20 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
|
||||
return target;
|
||||
}
|
||||
|
||||
case SAD_EXPR:
|
||||
{
|
||||
tree oprnd0 = treeop0;
|
||||
tree oprnd1 = treeop1;
|
||||
tree oprnd2 = treeop2;
|
||||
rtx op2;
|
||||
|
||||
expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
|
||||
op2 = expand_normal (oprnd2);
|
||||
target = expand_widen_pattern_expr (ops, op0, op1, op2,
|
||||
target, unsignedp);
|
||||
return target;
|
||||
}
|
||||
|
||||
case REALIGN_LOAD_EXPR:
|
||||
{
|
||||
tree oprnd0 = treeop0;
|
||||
|
@ -437,6 +437,16 @@ dump_ternary_rhs (pretty_printer *buffer, gimple gs, int spc, int flags)
|
||||
dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
|
||||
pp_greater (buffer);
|
||||
break;
|
||||
|
||||
case SAD_EXPR:
|
||||
pp_string (buffer, "SAD_EXPR <");
|
||||
dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
|
||||
pp_string (buffer, ", ");
|
||||
dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
|
||||
pp_string (buffer, ", ");
|
||||
dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
|
||||
pp_greater (buffer);
|
||||
break;
|
||||
|
||||
case VEC_PERM_EXPR:
|
||||
pp_string (buffer, "VEC_PERM_EXPR <");
|
||||
|
@ -1934,6 +1934,7 @@ get_gimple_rhs_num_ops (enum tree_code code)
|
||||
|| (SYM) == WIDEN_MULT_PLUS_EXPR \
|
||||
|| (SYM) == WIDEN_MULT_MINUS_EXPR \
|
||||
|| (SYM) == DOT_PROD_EXPR \
|
||||
|| (SYM) == SAD_EXPR \
|
||||
|| (SYM) == REALIGN_LOAD_EXPR \
|
||||
|| (SYM) == VEC_COND_EXPR \
|
||||
|| (SYM) == VEC_PERM_EXPR \
|
||||
|
@ -484,6 +484,9 @@ optab_for_tree_code (enum tree_code code, const_tree type,
|
||||
case DOT_PROD_EXPR:
|
||||
return TYPE_UNSIGNED (type) ? udot_prod_optab : sdot_prod_optab;
|
||||
|
||||
case SAD_EXPR:
|
||||
return TYPE_UNSIGNED (type) ? usad_optab : ssad_optab;
|
||||
|
||||
case WIDEN_MULT_PLUS_EXPR:
|
||||
return (TYPE_UNSIGNED (type)
|
||||
? (TYPE_SATURATING (type)
|
||||
|
@ -253,6 +253,8 @@ OPTAB_D (sdot_prod_optab, "sdot_prod$I$a")
|
||||
OPTAB_D (ssum_widen_optab, "widen_ssum$I$a3")
|
||||
OPTAB_D (udot_prod_optab, "udot_prod$I$a")
|
||||
OPTAB_D (usum_widen_optab, "widen_usum$I$a3")
|
||||
OPTAB_D (usad_optab, "usad$I$a")
|
||||
OPTAB_D (ssad_optab, "ssad$I$a")
|
||||
OPTAB_D (maskload_optab, "maskload$a")
|
||||
OPTAB_D (maskstore_optab, "maskstore$a")
|
||||
OPTAB_D (vec_extract_optab, "vec_extract$a")
|
||||
|
@ -1,3 +1,8 @@
|
||||
2014-06-24 Cong Hou <congh@google.com>
|
||||
|
||||
* gcc.dg/vect/vect-reduc-sad.c: New.
|
||||
* lib/target-supports.exp (check_effective_target_vect_usad_char): New.
|
||||
|
||||
2014-06-24 Marc Glisse <marc.glisse@inria.fr>
|
||||
|
||||
PR tree-optimization/57742
|
||||
|
54
gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
Normal file
54
gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
Normal file
@ -0,0 +1,54 @@
|
||||
/* { dg-require-effective-target vect_usad_char } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
#define SAD N*N/2
|
||||
|
||||
unsigned char X[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
unsigned char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
|
||||
|
||||
/* Sum of absolute differences between arrays of unsigned char types.
|
||||
Detected as a sad pattern.
|
||||
Vectorized on targets that support sad for unsigned chars. */
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
foo (int len)
|
||||
{
|
||||
int i;
|
||||
int result = 0;
|
||||
|
||||
for (i = 0; i < len; i++)
|
||||
result += abs (X[i] - Y[i]);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
int i;
|
||||
int sad;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
X[i] = i;
|
||||
Y[i] = N - i;
|
||||
__asm__ volatile ("");
|
||||
}
|
||||
|
||||
sad = foo (N);
|
||||
if (sad != SAD)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_sad_pattern: detected" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -3919,6 +3919,26 @@ proc check_effective_target_vect_udot_hi { } {
|
||||
return $et_vect_udot_hi_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# sad operation of unsigned chars, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_usad_char { } {
|
||||
global et_vect_usad_char
|
||||
|
||||
if [info exists et_vect_usad_char_saved] {
|
||||
verbose "check_effective_target_vect_usad_char: using cached result" 2
|
||||
} else {
|
||||
set et_vect_usad_char_saved 0
|
||||
if { ([istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]) } {
|
||||
set et_vect_usad_char_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_usad_char: returning $et_vect_usad_char_saved" 2
|
||||
return $et_vect_usad_char_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# demotion (packing) of shorts (to chars) and ints (to shorts)
|
||||
|
@ -3972,6 +3972,36 @@ verify_gimple_assign_ternary (gimple stmt)
|
||||
|
||||
return false;
|
||||
|
||||
case SAD_EXPR:
|
||||
if (!useless_type_conversion_p (rhs1_type, rhs2_type)
|
||||
|| !useless_type_conversion_p (lhs_type, rhs3_type)
|
||||
|| 2 * GET_MODE_BITSIZE (GET_MODE_INNER
|
||||
(TYPE_MODE (TREE_TYPE (rhs1_type))))
|
||||
> GET_MODE_BITSIZE (GET_MODE_INNER
|
||||
(TYPE_MODE (TREE_TYPE (lhs_type)))))
|
||||
{
|
||||
error ("type mismatch in sad expression");
|
||||
debug_generic_expr (lhs_type);
|
||||
debug_generic_expr (rhs1_type);
|
||||
debug_generic_expr (rhs2_type);
|
||||
debug_generic_expr (rhs3_type);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (TREE_CODE (rhs1_type) != VECTOR_TYPE
|
||||
|| TREE_CODE (rhs2_type) != VECTOR_TYPE
|
||||
|| TREE_CODE (rhs3_type) != VECTOR_TYPE)
|
||||
{
|
||||
error ("vector types expected in sad expression");
|
||||
debug_generic_expr (lhs_type);
|
||||
debug_generic_expr (rhs1_type);
|
||||
debug_generic_expr (rhs2_type);
|
||||
debug_generic_expr (rhs3_type);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
case DOT_PROD_EXPR:
|
||||
case REALIGN_LOAD_EXPR:
|
||||
/* FIXME. */
|
||||
|
@ -3696,6 +3696,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
|
||||
case WIDEN_SUM_EXPR:
|
||||
case WIDEN_MULT_EXPR:
|
||||
case DOT_PROD_EXPR:
|
||||
case SAD_EXPR:
|
||||
case WIDEN_MULT_PLUS_EXPR:
|
||||
case WIDEN_MULT_MINUS_EXPR:
|
||||
case WIDEN_LSHIFT_EXPR:
|
||||
|
@ -859,6 +859,7 @@ get_expr_operands (struct function *fn, gimple stmt, tree *expr_p, int flags)
|
||||
}
|
||||
|
||||
case DOT_PROD_EXPR:
|
||||
case SAD_EXPR:
|
||||
case REALIGN_LOAD_EXPR:
|
||||
case WIDEN_MULT_PLUS_EXPR:
|
||||
case WIDEN_MULT_MINUS_EXPR:
|
||||
|
@ -3694,6 +3694,7 @@ get_initial_def_for_reduction (gimple stmt, tree init_val,
|
||||
{
|
||||
case WIDEN_SUM_EXPR:
|
||||
case DOT_PROD_EXPR:
|
||||
case SAD_EXPR:
|
||||
case PLUS_EXPR:
|
||||
case MINUS_EXPR:
|
||||
case BIT_IOR_EXPR:
|
||||
|
@ -58,6 +58,8 @@ static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
|
||||
tree *);
|
||||
static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
|
||||
tree *);
|
||||
static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
|
||||
tree *);
|
||||
static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
|
||||
static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
|
||||
tree *);
|
||||
@ -75,6 +77,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
|
||||
vect_recog_widen_mult_pattern,
|
||||
vect_recog_widen_sum_pattern,
|
||||
vect_recog_dot_prod_pattern,
|
||||
vect_recog_sad_pattern,
|
||||
vect_recog_pow_pattern,
|
||||
vect_recog_widen_shift_pattern,
|
||||
vect_recog_over_widening_pattern,
|
||||
@ -153,9 +156,8 @@ vect_single_imm_use (gimple def_stmt)
|
||||
}
|
||||
|
||||
/* Check whether NAME, an ssa-name used in USE_STMT,
|
||||
is a result of a type promotion or demotion, such that:
|
||||
is a result of a type promotion, such that:
|
||||
DEF_STMT: NAME = NOP (name0)
|
||||
where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
|
||||
If CHECK_SIGN is TRUE, check that either both types are signed or both are
|
||||
unsigned. */
|
||||
|
||||
@ -202,10 +204,8 @@ type_conversion_p (tree name, gimple use_stmt, bool check_sign,
|
||||
|
||||
if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
|
||||
*promotion = true;
|
||||
else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
|
||||
*promotion = false;
|
||||
else
|
||||
return false;
|
||||
*promotion = false;
|
||||
|
||||
if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
|
||||
bb_vinfo, &dummy_gimple, &dummy, &dt))
|
||||
@ -448,6 +448,240 @@ vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
|
||||
}
|
||||
|
||||
|
||||
/* Function vect_recog_sad_pattern
|
||||
|
||||
Try to find the following Sum of Absolute Difference (SAD) pattern:
|
||||
|
||||
type x_t, y_t;
|
||||
signed TYPE1 diff, abs_diff;
|
||||
TYPE2 sum = init;
|
||||
loop:
|
||||
sum_0 = phi <init, sum_1>
|
||||
S1 x_t = ...
|
||||
S2 y_t = ...
|
||||
S3 x_T = (TYPE1) x_t;
|
||||
S4 y_T = (TYPE1) y_t;
|
||||
S5 diff = x_T - y_T;
|
||||
S6 abs_diff = ABS_EXPR <diff>;
|
||||
[S7 abs_diff = (TYPE2) abs_diff; #optional]
|
||||
S8 sum_1 = abs_diff + sum_0;
|
||||
|
||||
where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
|
||||
same size of 'TYPE1' or bigger. This is a special case of a reduction
|
||||
computation.
|
||||
|
||||
Input:
|
||||
|
||||
* STMTS: Contains a stmt from which the pattern search begins. In the
|
||||
example, when this function is called with S8, the pattern
|
||||
{S3,S4,S5,S6,S7,S8} will be detected.
|
||||
|
||||
Output:
|
||||
|
||||
* TYPE_IN: The type of the input arguments to the pattern.
|
||||
|
||||
* TYPE_OUT: The type of the output of this pattern.
|
||||
|
||||
* Return value: A new stmt that will be used to replace the sequence of
|
||||
stmts that constitute the pattern. In this case it will be:
|
||||
SAD_EXPR <x_t, y_t, sum_0>
|
||||
*/
|
||||
|
||||
static gimple
|
||||
vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
|
||||
tree *type_out)
|
||||
{
|
||||
gimple last_stmt = (*stmts)[0];
|
||||
tree sad_oprnd0, sad_oprnd1;
|
||||
stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
|
||||
tree half_type;
|
||||
loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
struct loop *loop;
|
||||
bool promotion;
|
||||
|
||||
if (!loop_info)
|
||||
return NULL;
|
||||
|
||||
loop = LOOP_VINFO_LOOP (loop_info);
|
||||
|
||||
if (!is_gimple_assign (last_stmt))
|
||||
return NULL;
|
||||
|
||||
tree sum_type = gimple_expr_type (last_stmt);
|
||||
|
||||
/* Look for the following pattern
|
||||
DX = (TYPE1) X;
|
||||
DY = (TYPE1) Y;
|
||||
DDIFF = DX - DY;
|
||||
DAD = ABS_EXPR <DDIFF>;
|
||||
DDPROD = (TYPE2) DPROD;
|
||||
sum_1 = DAD + sum_0;
|
||||
In which
|
||||
- DX is at least double the size of X
|
||||
- DY is at least double the size of Y
|
||||
- DX, DY, DDIFF, DAD all have the same type
|
||||
- sum is the same size of DAD or bigger
|
||||
- sum has been recognized as a reduction variable.
|
||||
|
||||
This is equivalent to:
|
||||
DDIFF = X w- Y; #widen sub
|
||||
DAD = ABS_EXPR <DDIFF>;
|
||||
sum_1 = DAD w+ sum_0; #widen summation
|
||||
or
|
||||
DDIFF = X w- Y; #widen sub
|
||||
DAD = ABS_EXPR <DDIFF>;
|
||||
sum_1 = DAD + sum_0; #summation
|
||||
*/
|
||||
|
||||
/* Starting from LAST_STMT, follow the defs of its uses in search
|
||||
of the above pattern. */
|
||||
|
||||
if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
|
||||
return NULL;
|
||||
|
||||
tree plus_oprnd0, plus_oprnd1;
|
||||
|
||||
if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
|
||||
{
|
||||
/* Has been detected as widening-summation? */
|
||||
|
||||
gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
|
||||
sum_type = gimple_expr_type (stmt);
|
||||
if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
|
||||
return NULL;
|
||||
plus_oprnd0 = gimple_assign_rhs1 (stmt);
|
||||
plus_oprnd1 = gimple_assign_rhs2 (stmt);
|
||||
half_type = TREE_TYPE (plus_oprnd0);
|
||||
}
|
||||
else
|
||||
{
|
||||
gimple def_stmt;
|
||||
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
|
||||
return NULL;
|
||||
plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
|
||||
plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
|
||||
if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
|
||||
|| !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
|
||||
return NULL;
|
||||
|
||||
/* The type conversion could be promotion, demotion,
|
||||
or just signed -> unsigned. */
|
||||
if (type_conversion_p (plus_oprnd0, last_stmt, false,
|
||||
&half_type, &def_stmt, &promotion))
|
||||
plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
|
||||
else
|
||||
half_type = sum_type;
|
||||
}
|
||||
|
||||
/* So far so good. Since last_stmt was detected as a (summation) reduction,
|
||||
we know that plus_oprnd1 is the reduction variable (defined by a loop-header
|
||||
phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
|
||||
Then check that plus_oprnd0 is defined by an abs_expr. */
|
||||
|
||||
if (TREE_CODE (plus_oprnd0) != SSA_NAME)
|
||||
return NULL;
|
||||
|
||||
tree abs_type = half_type;
|
||||
gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
|
||||
|
||||
/* It could not be the sad pattern if the abs_stmt is outside the loop. */
|
||||
if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
|
||||
return NULL;
|
||||
|
||||
/* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
|
||||
inside the loop (in case we are analyzing an outer-loop). */
|
||||
if (!is_gimple_assign (abs_stmt))
|
||||
return NULL;
|
||||
|
||||
stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
|
||||
gcc_assert (abs_stmt_vinfo);
|
||||
if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
|
||||
return NULL;
|
||||
if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
|
||||
return NULL;
|
||||
|
||||
tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
|
||||
if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
|
||||
return NULL;
|
||||
if (TYPE_UNSIGNED (abs_type))
|
||||
return NULL;
|
||||
|
||||
/* We then detect if the operand of abs_expr is defined by a minus_expr. */
|
||||
|
||||
if (TREE_CODE (abs_oprnd) != SSA_NAME)
|
||||
return NULL;
|
||||
|
||||
gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
|
||||
|
||||
/* It could not be the sad pattern if the diff_stmt is outside the loop. */
|
||||
if (!gimple_bb (diff_stmt)
|
||||
|| !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
|
||||
return NULL;
|
||||
|
||||
/* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
|
||||
inside the loop (in case we are analyzing an outer-loop). */
|
||||
if (!is_gimple_assign (diff_stmt))
|
||||
return NULL;
|
||||
|
||||
stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
|
||||
gcc_assert (diff_stmt_vinfo);
|
||||
if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
|
||||
return NULL;
|
||||
if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
|
||||
return NULL;
|
||||
|
||||
tree half_type0, half_type1;
|
||||
gimple def_stmt;
|
||||
|
||||
tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
|
||||
tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
|
||||
|
||||
if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
|
||||
|| !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
|
||||
return NULL;
|
||||
if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
|
||||
&half_type0, &def_stmt, &promotion)
|
||||
|| !promotion)
|
||||
return NULL;
|
||||
sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
|
||||
|
||||
if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
|
||||
&half_type1, &def_stmt, &promotion)
|
||||
|| !promotion)
|
||||
return NULL;
|
||||
sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
|
||||
|
||||
if (!types_compatible_p (half_type0, half_type1))
|
||||
return NULL;
|
||||
if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
|
||||
|| TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
|
||||
return NULL;
|
||||
|
||||
*type_in = TREE_TYPE (sad_oprnd0);
|
||||
*type_out = sum_type;
|
||||
|
||||
/* Pattern detected. Create a stmt to be used to replace the pattern: */
|
||||
tree var = vect_recog_temp_ssa_var (sum_type, NULL);
|
||||
gimple pattern_stmt = gimple_build_assign_with_ops
|
||||
(SAD_EXPR, var, sad_oprnd0, sad_oprnd1, plus_oprnd1);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"vect_recog_sad_pattern: detected: ");
|
||||
dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
|
||||
dump_printf (MSG_NOTE, "\n");
|
||||
}
|
||||
|
||||
/* We don't allow changing the order of the computation in the inner-loop
|
||||
when doing outer-loop vectorization. */
|
||||
gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
|
||||
|
||||
return pattern_stmt;
|
||||
}
|
||||
|
||||
|
||||
/* Handle widening operation by a constant. At the moment we support MULT_EXPR
|
||||
and LSHIFT_EXPR.
|
||||
|
||||
|
@ -1123,7 +1123,7 @@ extern void vect_slp_transform_bb (basic_block);
|
||||
Additional pattern recognition functions can (and will) be added
|
||||
in the future. */
|
||||
typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *);
|
||||
#define NUM_PATTERNS 11
|
||||
#define NUM_PATTERNS 12
|
||||
void vect_pattern_recog (loop_vec_info, bb_vec_info);
|
||||
|
||||
/* In tree-vectorizer.c. */
|
||||
|
16
gcc/tree.def
16
gcc/tree.def
@ -1185,6 +1185,22 @@ DEFTREECODE (DOT_PROD_EXPR, "dot_prod_expr", tcc_expression, 3)
|
||||
with the second argument. */
|
||||
DEFTREECODE (WIDEN_SUM_EXPR, "widen_sum_expr", tcc_binary, 2)
|
||||
|
||||
/* Widening sad (sum of absolute differences).
|
||||
The first two arguments are of type t1 which should be integer.
|
||||
The third argument and the result are of type t2, such that t2 is at least
|
||||
twice the size of t1. Like DOT_PROD_EXPR, SAD_EXPR (arg1,arg2,arg3) is
|
||||
equivalent to (note we don't have WIDEN_MINUS_EXPR now, but we assume its
|
||||
behavior is similar to WIDEN_SUM_EXPR):
|
||||
tmp = WIDEN_MINUS_EXPR (arg1, arg2)
|
||||
tmp2 = ABS_EXPR (tmp)
|
||||
arg3 = PLUS_EXPR (tmp2, arg3)
|
||||
or:
|
||||
tmp = WIDEN_MINUS_EXPR (arg1, arg2)
|
||||
tmp2 = ABS_EXPR (tmp)
|
||||
arg3 = WIDEN_SUM_EXPR (tmp2, arg3)
|
||||
*/
|
||||
DEFTREECODE (SAD_EXPR, "sad_expr", tcc_expression, 3)
|
||||
|
||||
/* Widening multiplication.
|
||||
The two arguments are of type t1.
|
||||
The result is of type t2, such that t2 is at least twice
|
||||
|
Loading…
x
Reference in New Issue
Block a user