mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-18 13:01:06 +08:00
Replace REDUC_*_EXPRs with internal functions.
This patch replaces the REDUC_*_EXPR tree codes with internal functions. This is needed so that the upcoming in-order reductions can also use internal functions without too much complication. 2017-11-22 Richard Sandiford <richard.sandiford@linaro.org> gcc/ * tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): Delete. * cfgexpand.c (expand_debug_expr): Remove handling for them. * expr.c (expand_expr_real_2): Likewise. * fold-const.c (const_unop): Likewise. * optabs-tree.c (optab_for_tree_code): Likewise. * tree-cfg.c (verify_gimple_assign_unary): Likewise. * tree-inline.c (estimate_operator_cost): Likewise. * tree-pretty-print.c (dump_generic_node): Likewise. (op_code_prio): Likewise. (op_symbol_code): Likewise. * internal-fn.def (DEF_INTERNAL_SIGNED_OPTAB_FN): Define. (IFN_REDUC_PLUS, IFN_REDUC_MAX, IFN_REDUC_MIN): New internal functions. * internal-fn.c (direct_internal_fn_optab): New function. (direct_internal_fn_array, direct_internal_fn_supported_p (internal_fn_expanders): Handle DEF_INTERNAL_SIGNED_OPTAB_FN. * fold-const-call.c (fold_const_reduction): New function. (fold_const_call): Handle CFN_REDUC_PLUS, CFN_REDUC_MAX and CFN_REDUC_MIN. * tree-vect-loop.c: Include internal-fn.h. (reduction_code_for_scalar_code): Rename to... (reduction_fn_for_scalar_code): ...this and return an internal function. (vect_model_reduction_cost): Take an internal_fn rather than a tree_code. (vect_create_epilog_for_reduction): Likewise. Build calls rather than assignments. (vectorizable_reduction): Use internal functions rather than tree codes for the reduction operation. Update calls to the functions above. * config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin): Use calls to internal functions rather than REDUC tree codes. * config/aarch64/aarch64-simd.md: Update comment accordingly. From-SVN: r255073
This commit is contained in:
parent
5675291ddb
commit
16d24520af
@ -1,3 +1,38 @@
|
||||
2017-11-22 Richard Sandiford <richard.sandiford@linaro.org>
|
||||
|
||||
* tree.def (REDUC_MAX_EXPR, REDUC_MIN_EXPR, REDUC_PLUS_EXPR): Delete.
|
||||
* cfgexpand.c (expand_debug_expr): Remove handling for them.
|
||||
* expr.c (expand_expr_real_2): Likewise.
|
||||
* fold-const.c (const_unop): Likewise.
|
||||
* optabs-tree.c (optab_for_tree_code): Likewise.
|
||||
* tree-cfg.c (verify_gimple_assign_unary): Likewise.
|
||||
* tree-inline.c (estimate_operator_cost): Likewise.
|
||||
* tree-pretty-print.c (dump_generic_node): Likewise.
|
||||
(op_code_prio): Likewise.
|
||||
(op_symbol_code): Likewise.
|
||||
* internal-fn.def (DEF_INTERNAL_SIGNED_OPTAB_FN): Define.
|
||||
(IFN_REDUC_PLUS, IFN_REDUC_MAX, IFN_REDUC_MIN): New internal functions.
|
||||
* internal-fn.c (direct_internal_fn_optab): New function.
|
||||
(direct_internal_fn_array, direct_internal_fn_supported_p
|
||||
(internal_fn_expanders): Handle DEF_INTERNAL_SIGNED_OPTAB_FN.
|
||||
* fold-const-call.c (fold_const_reduction): New function.
|
||||
(fold_const_call): Handle CFN_REDUC_PLUS, CFN_REDUC_MAX and
|
||||
CFN_REDUC_MIN.
|
||||
* tree-vect-loop.c: Include internal-fn.h.
|
||||
(reduction_code_for_scalar_code): Rename to...
|
||||
(reduction_fn_for_scalar_code): ...this and return an internal
|
||||
function.
|
||||
(vect_model_reduction_cost): Take an internal_fn rather than
|
||||
a tree_code.
|
||||
(vect_create_epilog_for_reduction): Likewise. Build calls rather
|
||||
than assignments.
|
||||
(vectorizable_reduction): Use internal functions rather than tree
|
||||
codes for the reduction operation. Update calls to the functions
|
||||
above.
|
||||
* config/aarch64/aarch64-builtins.c (aarch64_gimple_fold_builtin):
|
||||
Use calls to internal functions rather than REDUC tree codes.
|
||||
* config/aarch64/aarch64-simd.md: Update comment accordingly.
|
||||
|
||||
2017-11-22 Olivier Hainque <hainque@adacore.com>
|
||||
|
||||
* config/vxworks.c (vxworks_override_options): Pick default
|
||||
|
@ -5051,9 +5051,6 @@ expand_debug_expr (tree exp)
|
||||
|
||||
/* Vector stuff. For most of the codes we don't have rtl codes. */
|
||||
case REALIGN_LOAD_EXPR:
|
||||
case REDUC_MAX_EXPR:
|
||||
case REDUC_MIN_EXPR:
|
||||
case REDUC_PLUS_EXPR:
|
||||
case VEC_COND_EXPR:
|
||||
case VEC_PACK_FIX_TRUNC_EXPR:
|
||||
case VEC_PACK_SAT_EXPR:
|
||||
|
@ -1597,24 +1597,27 @@ aarch64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
|
||||
? gimple_call_arg_ptr (stmt, 0)
|
||||
: &error_mark_node);
|
||||
|
||||
/* We use gimple's REDUC_(PLUS|MIN|MAX)_EXPRs for float, signed int
|
||||
/* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
|
||||
and unsigned int; it will distinguish according to the types of
|
||||
the arguments to the __builtin. */
|
||||
switch (fcode)
|
||||
{
|
||||
BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
|
||||
new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
||||
REDUC_PLUS_EXPR, args[0]);
|
||||
new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
|
||||
1, args[0]);
|
||||
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
||||
break;
|
||||
BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
|
||||
BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
|
||||
new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
||||
REDUC_MAX_EXPR, args[0]);
|
||||
new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
|
||||
1, args[0]);
|
||||
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
||||
break;
|
||||
BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
|
||||
BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
|
||||
new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
||||
REDUC_MIN_EXPR, args[0]);
|
||||
new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
|
||||
1, args[0]);
|
||||
gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
||||
break;
|
||||
BUILTIN_GPF (BINOP, fmulx, 0)
|
||||
{
|
||||
|
@ -2338,7 +2338,7 @@
|
||||
;; 'across lanes' max and min ops.
|
||||
|
||||
;; Template for outputting a scalar, so we can create __builtins which can be
|
||||
;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
|
||||
;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
|
||||
(define_expand "reduc_<maxmin_uns>_scal_<mode>"
|
||||
[(match_operand:<VEL> 0 "register_operand")
|
||||
(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
|
||||
|
20
gcc/expr.c
20
gcc/expr.c
@ -9368,26 +9368,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
|
||||
return target;
|
||||
}
|
||||
|
||||
case REDUC_MAX_EXPR:
|
||||
case REDUC_MIN_EXPR:
|
||||
case REDUC_PLUS_EXPR:
|
||||
{
|
||||
op0 = expand_normal (treeop0);
|
||||
this_optab = optab_for_tree_code (code, type, optab_default);
|
||||
machine_mode vec_mode = TYPE_MODE (TREE_TYPE (treeop0));
|
||||
|
||||
struct expand_operand ops[2];
|
||||
enum insn_code icode = optab_handler (this_optab, vec_mode);
|
||||
|
||||
create_output_operand (&ops[0], target, mode);
|
||||
create_input_operand (&ops[1], op0, vec_mode);
|
||||
expand_insn (icode, 2, ops);
|
||||
target = ops[0].value;
|
||||
if (GET_MODE (target) != mode)
|
||||
return gen_lowpart (tmode, target);
|
||||
return target;
|
||||
}
|
||||
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
case VEC_UNPACK_LO_EXPR:
|
||||
{
|
||||
|
@ -583,6 +583,25 @@ fold_const_builtin_nan (tree type, tree arg, bool quiet)
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Fold a call to IFN_REDUC_<CODE> (ARG), returning a value of type TYPE. */
|
||||
|
||||
static tree
|
||||
fold_const_reduction (tree type, tree arg, tree_code code)
|
||||
{
|
||||
if (TREE_CODE (arg) != VECTOR_CST)
|
||||
return NULL_TREE;
|
||||
|
||||
tree res = VECTOR_CST_ELT (arg, 0);
|
||||
unsigned int nelts = VECTOR_CST_NELTS (arg);
|
||||
for (unsigned int i = 1; i < nelts; i++)
|
||||
{
|
||||
res = const_binop (code, type, res, VECTOR_CST_ELT (arg, i));
|
||||
if (res == NULL_TREE || !CONSTANT_CLASS_P (res))
|
||||
return NULL_TREE;
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
/* Try to evaluate:
|
||||
|
||||
*RESULT = FN (*ARG)
|
||||
@ -1148,6 +1167,15 @@ fold_const_call (combined_fn fn, tree type, tree arg)
|
||||
CASE_FLT_FN_FLOATN_NX (CFN_BUILT_IN_NANS):
|
||||
return fold_const_builtin_nan (type, arg, false);
|
||||
|
||||
case CFN_REDUC_PLUS:
|
||||
return fold_const_reduction (type, arg, PLUS_EXPR);
|
||||
|
||||
case CFN_REDUC_MAX:
|
||||
return fold_const_reduction (type, arg, MAX_EXPR);
|
||||
|
||||
case CFN_REDUC_MIN:
|
||||
return fold_const_reduction (type, arg, MIN_EXPR);
|
||||
|
||||
default:
|
||||
return fold_const_call_1 (fn, type, arg);
|
||||
}
|
||||
|
@ -1717,36 +1717,6 @@ const_unop (enum tree_code code, tree type, tree arg0)
|
||||
return build_vector (type, elts);
|
||||
}
|
||||
|
||||
case REDUC_MIN_EXPR:
|
||||
case REDUC_MAX_EXPR:
|
||||
case REDUC_PLUS_EXPR:
|
||||
{
|
||||
unsigned int nelts, i;
|
||||
enum tree_code subcode;
|
||||
|
||||
if (TREE_CODE (arg0) != VECTOR_CST)
|
||||
return NULL_TREE;
|
||||
nelts = VECTOR_CST_NELTS (arg0);
|
||||
|
||||
switch (code)
|
||||
{
|
||||
case REDUC_MIN_EXPR: subcode = MIN_EXPR; break;
|
||||
case REDUC_MAX_EXPR: subcode = MAX_EXPR; break;
|
||||
case REDUC_PLUS_EXPR: subcode = PLUS_EXPR; break;
|
||||
default: gcc_unreachable ();
|
||||
}
|
||||
|
||||
tree res = VECTOR_CST_ELT (arg0, 0);
|
||||
for (i = 1; i < nelts; i++)
|
||||
{
|
||||
res = const_binop (subcode, res, VECTOR_CST_ELT (arg0, i));
|
||||
if (res == NULL_TREE || !CONSTANT_CLASS_P (res))
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -90,6 +90,8 @@ init_internal_fns ()
|
||||
const direct_internal_fn_info direct_internal_fn_array[IFN_LAST + 1] = {
|
||||
#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) not_direct,
|
||||
#define DEF_INTERNAL_OPTAB_FN(CODE, FLAGS, OPTAB, TYPE) TYPE##_direct,
|
||||
#define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
|
||||
UNSIGNED_OPTAB, TYPE) TYPE##_direct,
|
||||
#include "internal-fn.def"
|
||||
not_direct
|
||||
};
|
||||
@ -2818,6 +2820,30 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
|
||||
#define direct_mask_store_optab_supported_p direct_optab_supported_p
|
||||
#define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
|
||||
|
||||
/* Return the optab used by internal function FN. */
|
||||
|
||||
static optab
|
||||
direct_internal_fn_optab (internal_fn fn, tree_pair types)
|
||||
{
|
||||
switch (fn)
|
||||
{
|
||||
#define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC) \
|
||||
case IFN_##CODE: break;
|
||||
#define DEF_INTERNAL_OPTAB_FN(CODE, FLAGS, OPTAB, TYPE) \
|
||||
case IFN_##CODE: return OPTAB##_optab;
|
||||
#define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
|
||||
UNSIGNED_OPTAB, TYPE) \
|
||||
case IFN_##CODE: return (TYPE_UNSIGNED (types.SELECTOR) \
|
||||
? UNSIGNED_OPTAB ## _optab \
|
||||
: SIGNED_OPTAB ## _optab);
|
||||
#include "internal-fn.def"
|
||||
|
||||
case IFN_LAST:
|
||||
break;
|
||||
}
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* Return true if FN is supported for the types in TYPES when the
|
||||
optimization type is OPT_TYPE. The types are those associated with
|
||||
the "type0" and "type1" fields of FN's direct_internal_fn_info
|
||||
@ -2835,6 +2861,16 @@ direct_internal_fn_supported_p (internal_fn fn, tree_pair types,
|
||||
case IFN_##CODE: \
|
||||
return direct_##TYPE##_optab_supported_p (OPTAB##_optab, types, \
|
||||
opt_type);
|
||||
#define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
|
||||
UNSIGNED_OPTAB, TYPE) \
|
||||
case IFN_##CODE: \
|
||||
{ \
|
||||
optab which_optab = (TYPE_UNSIGNED (types.SELECTOR) \
|
||||
? UNSIGNED_OPTAB ## _optab \
|
||||
: SIGNED_OPTAB ## _optab); \
|
||||
return direct_##TYPE##_optab_supported_p (which_optab, types, \
|
||||
opt_type); \
|
||||
}
|
||||
#include "internal-fn.def"
|
||||
|
||||
case IFN_LAST:
|
||||
@ -2874,6 +2910,15 @@ set_edom_supported_p (void)
|
||||
{ \
|
||||
expand_##TYPE##_optab_fn (fn, stmt, OPTAB##_optab); \
|
||||
}
|
||||
#define DEF_INTERNAL_SIGNED_OPTAB_FN(CODE, FLAGS, SELECTOR, SIGNED_OPTAB, \
|
||||
UNSIGNED_OPTAB, TYPE) \
|
||||
static void \
|
||||
expand_##CODE (internal_fn fn, gcall *stmt) \
|
||||
{ \
|
||||
tree_pair types = direct_internal_fn_types (fn, stmt); \
|
||||
optab which_optab = direct_internal_fn_optab (fn, types); \
|
||||
expand_##TYPE##_optab_fn (fn, stmt, which_optab); \
|
||||
}
|
||||
#include "internal-fn.def"
|
||||
|
||||
/* Routines to expand each internal function, indexed by function number.
|
||||
|
@ -30,6 +30,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
|
||||
DEF_INTERNAL_FN (NAME, FLAGS, FNSPEC)
|
||||
DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE)
|
||||
DEF_INTERNAL_SIGNED_OPTAB_FN (NAME, FLAGS, SELECTOR, SIGNED_OPTAB,
|
||||
UNSIGNED_OPTAB, TYPE)
|
||||
DEF_INTERNAL_FLT_FN (NAME, FLAGS, OPTAB, TYPE)
|
||||
DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE)
|
||||
|
||||
@ -49,6 +51,12 @@ along with GCC; see the file COPYING3. If not see
|
||||
- mask_store: currently just maskstore
|
||||
- store_lanes: currently just vec_store_lanes
|
||||
|
||||
DEF_INTERNAL_SIGNED_OPTAB_FN defines an internal function that
|
||||
maps to one of two optabs, depending on the signedness of an input.
|
||||
SIGNED_OPTAB and UNSIGNED_OPTAB are the optabs for signed and
|
||||
unsigned inputs respectively, both without the trailing "_optab".
|
||||
SELECTOR says which type in the tree_pair determines the signedness.
|
||||
|
||||
DEF_INTERNAL_FLT_FN is like DEF_INTERNAL_OPTAB_FN, but in addition,
|
||||
the function implements the computational part of a built-in math
|
||||
function BUILT_IN_<NAME>{F,,L}. Unlike some built-in functions,
|
||||
@ -75,6 +83,12 @@ along with GCC; see the file COPYING3. If not see
|
||||
DEF_INTERNAL_FN (NAME, FLAGS | ECF_LEAF, NULL)
|
||||
#endif
|
||||
|
||||
#ifndef DEF_INTERNAL_SIGNED_OPTAB_FN
|
||||
#define DEF_INTERNAL_SIGNED_OPTAB_FN(NAME, FLAGS, SELECTOR, SIGNED_OPTAB, \
|
||||
UNSIGNED_OPTAB, TYPE) \
|
||||
DEF_INTERNAL_FN (NAME, FLAGS | ECF_LEAF, NULL)
|
||||
#endif
|
||||
|
||||
#ifndef DEF_INTERNAL_FLT_FN
|
||||
#define DEF_INTERNAL_FLT_FN(NAME, FLAGS, OPTAB, TYPE) \
|
||||
DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE)
|
||||
@ -98,6 +112,13 @@ DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
|
||||
|
||||
DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
|
||||
|
||||
DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
|
||||
reduc_plus_scal, unary)
|
||||
DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MAX, ECF_CONST | ECF_NOTHROW, first,
|
||||
reduc_smax_scal, reduc_umax_scal, unary)
|
||||
DEF_INTERNAL_SIGNED_OPTAB_FN (REDUC_MIN, ECF_CONST | ECF_NOTHROW, first,
|
||||
reduc_smin_scal, reduc_umin_scal, unary)
|
||||
|
||||
/* Unary math functions. */
|
||||
DEF_INTERNAL_FLT_FN (ACOS, ECF_CONST, acos, unary)
|
||||
DEF_INTERNAL_FLT_FN (ASIN, ECF_CONST, asin, unary)
|
||||
@ -236,5 +257,6 @@ DEF_INTERNAL_FN (DIVMOD, ECF_CONST | ECF_LEAF, NULL)
|
||||
#undef DEF_INTERNAL_INT_FN
|
||||
#undef DEF_INTERNAL_FLT_FN
|
||||
#undef DEF_INTERNAL_FLT_FLOATN_FN
|
||||
#undef DEF_INTERNAL_SIGNED_OPTAB_FN
|
||||
#undef DEF_INTERNAL_OPTAB_FN
|
||||
#undef DEF_INTERNAL_FN
|
||||
|
@ -146,17 +146,6 @@ optab_for_tree_code (enum tree_code code, const_tree type,
|
||||
case FMA_EXPR:
|
||||
return fma_optab;
|
||||
|
||||
case REDUC_MAX_EXPR:
|
||||
return TYPE_UNSIGNED (type)
|
||||
? reduc_umax_scal_optab : reduc_smax_scal_optab;
|
||||
|
||||
case REDUC_MIN_EXPR:
|
||||
return TYPE_UNSIGNED (type)
|
||||
? reduc_umin_scal_optab : reduc_smin_scal_optab;
|
||||
|
||||
case REDUC_PLUS_EXPR:
|
||||
return reduc_plus_scal_optab;
|
||||
|
||||
case VEC_WIDEN_MULT_HI_EXPR:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
|
||||
|
@ -3792,18 +3792,6 @@ verify_gimple_assign_unary (gassign *stmt)
|
||||
|
||||
return false;
|
||||
}
|
||||
case REDUC_MAX_EXPR:
|
||||
case REDUC_MIN_EXPR:
|
||||
case REDUC_PLUS_EXPR:
|
||||
if (!VECTOR_TYPE_P (rhs1_type)
|
||||
|| !useless_type_conversion_p (lhs_type, TREE_TYPE (rhs1_type)))
|
||||
{
|
||||
error ("reduction should convert from vector to element type");
|
||||
debug_generic_expr (lhs_type);
|
||||
debug_generic_expr (rhs1_type);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
case VEC_UNPACK_LO_EXPR:
|
||||
|
@ -3877,9 +3877,6 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
|
||||
|
||||
case REALIGN_LOAD_EXPR:
|
||||
|
||||
case REDUC_MAX_EXPR:
|
||||
case REDUC_MIN_EXPR:
|
||||
case REDUC_PLUS_EXPR:
|
||||
case WIDEN_SUM_EXPR:
|
||||
case WIDEN_MULT_EXPR:
|
||||
case DOT_PROD_EXPR:
|
||||
|
@ -3209,24 +3209,6 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags,
|
||||
is_expr = false;
|
||||
break;
|
||||
|
||||
case REDUC_MAX_EXPR:
|
||||
pp_string (pp, " REDUC_MAX_EXPR < ");
|
||||
dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (pp, " > ");
|
||||
break;
|
||||
|
||||
case REDUC_MIN_EXPR:
|
||||
pp_string (pp, " REDUC_MIN_EXPR < ");
|
||||
dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (pp, " > ");
|
||||
break;
|
||||
|
||||
case REDUC_PLUS_EXPR:
|
||||
pp_string (pp, " REDUC_PLUS_EXPR < ");
|
||||
dump_generic_node (pp, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (pp, " > ");
|
||||
break;
|
||||
|
||||
case VEC_WIDEN_MULT_HI_EXPR:
|
||||
case VEC_WIDEN_MULT_LO_EXPR:
|
||||
case VEC_WIDEN_MULT_EVEN_EXPR:
|
||||
@ -3606,9 +3588,6 @@ op_code_prio (enum tree_code code)
|
||||
case ABS_EXPR:
|
||||
case REALPART_EXPR:
|
||||
case IMAGPART_EXPR:
|
||||
case REDUC_MAX_EXPR:
|
||||
case REDUC_MIN_EXPR:
|
||||
case REDUC_PLUS_EXPR:
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
case VEC_UNPACK_LO_EXPR:
|
||||
case VEC_UNPACK_FLOAT_HI_EXPR:
|
||||
@ -3727,9 +3706,6 @@ op_symbol_code (enum tree_code code)
|
||||
case PLUS_EXPR:
|
||||
return "+";
|
||||
|
||||
case REDUC_PLUS_EXPR:
|
||||
return "r+";
|
||||
|
||||
case WIDEN_SUM_EXPR:
|
||||
return "w+";
|
||||
|
||||
|
@ -50,6 +50,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "cgraph.h"
|
||||
#include "tree-cfg.h"
|
||||
#include "tree-if-conv.h"
|
||||
#include "internal-fn.h"
|
||||
|
||||
/* Loop Vectorization Pass.
|
||||
|
||||
@ -2376,35 +2377,34 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo)
|
||||
}
|
||||
|
||||
|
||||
/* Function reduction_code_for_scalar_code
|
||||
/* Function reduction_fn_for_scalar_code
|
||||
|
||||
Input:
|
||||
CODE - tree_code of a reduction operations.
|
||||
|
||||
Output:
|
||||
REDUC_CODE - the corresponding tree-code to be used to reduce the
|
||||
vector of partial results into a single scalar result, or ERROR_MARK
|
||||
REDUC_FN - the corresponding internal function to be used to reduce the
|
||||
vector of partial results into a single scalar result, or IFN_LAST
|
||||
if the operation is a supported reduction operation, but does not have
|
||||
such a tree-code.
|
||||
such an internal function.
|
||||
|
||||
Return FALSE if CODE currently cannot be vectorized as reduction. */
|
||||
|
||||
static bool
|
||||
reduction_code_for_scalar_code (enum tree_code code,
|
||||
enum tree_code *reduc_code)
|
||||
reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn)
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case MAX_EXPR:
|
||||
*reduc_code = REDUC_MAX_EXPR;
|
||||
*reduc_fn = IFN_REDUC_MAX;
|
||||
return true;
|
||||
|
||||
case MIN_EXPR:
|
||||
*reduc_code = REDUC_MIN_EXPR;
|
||||
*reduc_fn = IFN_REDUC_MIN;
|
||||
return true;
|
||||
|
||||
case PLUS_EXPR:
|
||||
*reduc_code = REDUC_PLUS_EXPR;
|
||||
*reduc_fn = IFN_REDUC_PLUS;
|
||||
return true;
|
||||
|
||||
case MULT_EXPR:
|
||||
@ -2412,7 +2412,7 @@ reduction_code_for_scalar_code (enum tree_code code,
|
||||
case BIT_IOR_EXPR:
|
||||
case BIT_XOR_EXPR:
|
||||
case BIT_AND_EXPR:
|
||||
*reduc_code = ERROR_MARK;
|
||||
*reduc_fn = IFN_LAST;
|
||||
return true;
|
||||
|
||||
default:
|
||||
@ -3745,7 +3745,7 @@ have_whole_vector_shift (machine_mode mode)
|
||||
the loop, and the epilogue code that must be generated. */
|
||||
|
||||
static void
|
||||
vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
|
||||
vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
|
||||
int ncopies)
|
||||
{
|
||||
int prologue_cost = 0, epilogue_cost = 0;
|
||||
@ -3799,7 +3799,7 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code,
|
||||
|
||||
if (!loop || !nested_in_vect_loop_p (loop, orig_stmt))
|
||||
{
|
||||
if (reduc_code != ERROR_MARK)
|
||||
if (reduc_fn != IFN_LAST)
|
||||
{
|
||||
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
|
||||
{
|
||||
@ -4266,7 +4266,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
|
||||
we have to generate more than one vector stmt - i.e - we need to "unroll"
|
||||
the vector stmt by a factor VF/nunits. For more details see documentation
|
||||
in vectorizable_operation.
|
||||
REDUC_CODE is the tree-code for the epilog reduction.
|
||||
REDUC_FN is the internal function for the epilog reduction.
|
||||
REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
|
||||
computation.
|
||||
REDUC_INDEX is the index of the operand in the right hand side of the
|
||||
@ -4282,7 +4282,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
|
||||
The loop-latch argument is taken from VECT_DEFS - the vector of partial
|
||||
sums.
|
||||
2. "Reduces" each vector of partial results VECT_DEFS into a single result,
|
||||
by applying the operation specified by REDUC_CODE if available, or by
|
||||
by calling the function specified by REDUC_FN if available, or by
|
||||
other means (whole-vector shifts or a scalar loop).
|
||||
The function also creates a new phi node at the loop exit to preserve
|
||||
loop-closed form, as illustrated below.
|
||||
@ -4317,7 +4317,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
|
||||
static void
|
||||
vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
|
||||
gimple *reduc_def_stmt,
|
||||
int ncopies, enum tree_code reduc_code,
|
||||
int ncopies, internal_fn reduc_fn,
|
||||
vec<gimple *> reduction_phis,
|
||||
bool double_reduc,
|
||||
slp_tree slp_node,
|
||||
@ -4569,7 +4569,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
|
||||
step 3: adjust the scalar result (s_out3) if needed.
|
||||
|
||||
Step 1 can be accomplished using one the following three schemes:
|
||||
(scheme 1) using reduc_code, if available.
|
||||
(scheme 1) using reduc_fn, if available.
|
||||
(scheme 2) using whole-vector shifts, if available.
|
||||
(scheme 3) using a scalar loop. In this case steps 1+2 above are
|
||||
combined.
|
||||
@ -4649,7 +4649,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
|
||||
exit_gsi = gsi_after_labels (exit_bb);
|
||||
|
||||
/* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
|
||||
(i.e. when reduc_code is not available) and in the final adjustment
|
||||
(i.e. when reduc_fn is not available) and in the final adjustment
|
||||
code (if needed). Also get the original scalar reduction variable as
|
||||
defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
|
||||
represents a reduction pattern), the tree-code and scalar-def are
|
||||
@ -4755,7 +4755,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
|
||||
new_phi_result = PHI_RESULT (new_phis[0]);
|
||||
|
||||
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
|
||||
&& reduc_code != ERROR_MARK)
|
||||
&& reduc_fn != IFN_LAST)
|
||||
{
|
||||
/* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
|
||||
various data values where the condition matched and another vector
|
||||
@ -4793,8 +4793,9 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
|
||||
|
||||
/* Find maximum value from the vector of found indexes. */
|
||||
tree max_index = make_ssa_name (index_scalar_type);
|
||||
gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR,
|
||||
induction_index);
|
||||
gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
|
||||
1, induction_index);
|
||||
gimple_call_set_lhs (max_index_stmt, max_index);
|
||||
gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
|
||||
|
||||
/* Vector of {max_index, max_index, max_index,...}. */
|
||||
@ -4849,13 +4850,9 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
|
||||
|
||||
/* Reduce down to a scalar value. */
|
||||
tree data_reduc = make_ssa_name (scalar_type_unsigned);
|
||||
optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned,
|
||||
optab_default);
|
||||
gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned))
|
||||
!= CODE_FOR_nothing);
|
||||
gimple *data_reduc_stmt = gimple_build_assign (data_reduc,
|
||||
REDUC_MAX_EXPR,
|
||||
vec_cond_cast);
|
||||
gcall *data_reduc_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
|
||||
1, vec_cond_cast);
|
||||
gimple_call_set_lhs (data_reduc_stmt, data_reduc);
|
||||
gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
|
||||
|
||||
/* Convert the reduced value back to the result type and set as the
|
||||
@ -4867,9 +4864,9 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
|
||||
scalar_results.safe_push (new_temp);
|
||||
}
|
||||
else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
|
||||
&& reduc_code == ERROR_MARK)
|
||||
&& reduc_fn == IFN_LAST)
|
||||
{
|
||||
/* Condition redution without supported REDUC_MAX_EXPR. Generate
|
||||
/* Condition reduction without supported IFN_REDUC_MAX. Generate
|
||||
idx = 0;
|
||||
idx_val = induction_index[0];
|
||||
val = data_reduc[0];
|
||||
@ -4939,7 +4936,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
|
||||
/* 2.3 Create the reduction code, using one of the three schemes described
|
||||
above. In SLP we simply need to extract all the elements from the
|
||||
vector (without reducing them), so we use scalar shifts. */
|
||||
else if (reduc_code != ERROR_MARK && !slp_reduc)
|
||||
else if (reduc_fn != IFN_LAST && !slp_reduc)
|
||||
{
|
||||
tree tmp;
|
||||
tree vec_elem_type;
|
||||
@ -4954,22 +4951,27 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt,
|
||||
vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
|
||||
if (!useless_type_conversion_p (scalar_type, vec_elem_type))
|
||||
{
|
||||
tree tmp_dest =
|
||||
vect_create_destination_var (scalar_dest, vec_elem_type);
|
||||
tmp = build1 (reduc_code, vec_elem_type, new_phi_result);
|
||||
epilog_stmt = gimple_build_assign (tmp_dest, tmp);
|
||||
tree tmp_dest
|
||||
= vect_create_destination_var (scalar_dest, vec_elem_type);
|
||||
epilog_stmt = gimple_build_call_internal (reduc_fn, 1,
|
||||
new_phi_result);
|
||||
gimple_set_lhs (epilog_stmt, tmp_dest);
|
||||
new_temp = make_ssa_name (tmp_dest, epilog_stmt);
|
||||
gimple_assign_set_lhs (epilog_stmt, new_temp);
|
||||
gimple_set_lhs (epilog_stmt, new_temp);
|
||||
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
||||
|
||||
tmp = build1 (NOP_EXPR, scalar_type, new_temp);
|
||||
epilog_stmt = gimple_build_assign (new_scalar_dest, NOP_EXPR,
|
||||
new_temp);
|
||||
}
|
||||
else
|
||||
tmp = build1 (reduc_code, scalar_type, new_phi_result);
|
||||
{
|
||||
epilog_stmt = gimple_build_call_internal (reduc_fn, 1,
|
||||
new_phi_result);
|
||||
gimple_set_lhs (epilog_stmt, new_scalar_dest);
|
||||
}
|
||||
|
||||
epilog_stmt = gimple_build_assign (new_scalar_dest, tmp);
|
||||
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
gimple_assign_set_lhs (epilog_stmt, new_temp);
|
||||
gimple_set_lhs (epilog_stmt, new_temp);
|
||||
gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
|
||||
|
||||
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
|
||||
@ -5589,10 +5591,11 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
tree vectype_in = NULL_TREE;
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
enum tree_code code, orig_code, epilog_reduc_code;
|
||||
enum tree_code code, orig_code;
|
||||
internal_fn reduc_fn;
|
||||
machine_mode vec_mode;
|
||||
int op_type;
|
||||
optab optab, reduc_optab;
|
||||
optab optab;
|
||||
tree new_temp = NULL_TREE;
|
||||
gimple *def_stmt;
|
||||
enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
|
||||
@ -6135,29 +6138,21 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
double_reduc = true;
|
||||
}
|
||||
|
||||
epilog_reduc_code = ERROR_MARK;
|
||||
reduc_fn = IFN_LAST;
|
||||
|
||||
if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION)
|
||||
{
|
||||
if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code))
|
||||
if (reduction_fn_for_scalar_code (orig_code, &reduc_fn))
|
||||
{
|
||||
reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out,
|
||||
optab_default);
|
||||
if (!reduc_optab)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"no optab for reduction.\n");
|
||||
|
||||
epilog_reduc_code = ERROR_MARK;
|
||||
}
|
||||
else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
|
||||
if (reduc_fn != IFN_LAST
|
||||
&& !direct_internal_fn_supported_p (reduc_fn, vectype_out,
|
||||
OPTIMIZE_FOR_SPEED))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
|
||||
"reduc op not supported by target.\n");
|
||||
|
||||
epilog_reduc_code = ERROR_MARK;
|
||||
reduc_fn = IFN_LAST;
|
||||
}
|
||||
}
|
||||
else
|
||||
@ -6180,11 +6175,9 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
cr_index_vector_type = build_vector_type
|
||||
(cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out));
|
||||
|
||||
optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type,
|
||||
optab_default);
|
||||
if (optab_handler (optab, TYPE_MODE (cr_index_vector_type))
|
||||
!= CODE_FOR_nothing)
|
||||
epilog_reduc_code = REDUC_MAX_EXPR;
|
||||
if (direct_internal_fn_supported_p (IFN_REDUC_MAX, cr_index_vector_type,
|
||||
OPTIMIZE_FOR_SPEED))
|
||||
reduc_fn = IFN_REDUC_MAX;
|
||||
}
|
||||
|
||||
if ((double_reduc
|
||||
@ -6307,7 +6300,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
if (!vec_stmt) /* transformation not required. */
|
||||
{
|
||||
if (first_p)
|
||||
vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies);
|
||||
vect_model_reduction_cost (stmt_info, reduc_fn, ncopies);
|
||||
STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
|
||||
return true;
|
||||
}
|
||||
@ -6461,8 +6454,7 @@ vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi,
|
||||
vect_defs[0] = gimple_assign_lhs (*vec_stmt);
|
||||
|
||||
vect_create_epilog_for_reduction (vect_defs, stmt, reduc_def_stmt,
|
||||
epilog_copies,
|
||||
epilog_reduc_code, phis,
|
||||
epilog_copies, reduc_fn, phis,
|
||||
double_reduc, slp_node, slp_node_instance);
|
||||
|
||||
return true;
|
||||
|
12
gcc/tree.def
12
gcc/tree.def
@ -1268,18 +1268,6 @@ DEFTREECODE (OMP_CLAUSE, "omp_clause", tcc_exceptional, 0)
|
||||
Operand 0: BODY: contains body of the transaction. */
|
||||
DEFTREECODE (TRANSACTION_EXPR, "transaction_expr", tcc_expression, 1)
|
||||
|
||||
/* Reduction operations.
|
||||
Operations that take a vector of elements and "reduce" it to a scalar
|
||||
result (e.g. summing the elements of the vector, finding the minimum over
|
||||
the vector elements, etc).
|
||||
Operand 0 is a vector.
|
||||
The expression returns a scalar, with type the same as the elements of the
|
||||
vector, holding the result of the reduction of all elements of the operand.
|
||||
*/
|
||||
DEFTREECODE (REDUC_MAX_EXPR, "reduc_max_expr", tcc_unary, 1)
|
||||
DEFTREECODE (REDUC_MIN_EXPR, "reduc_min_expr", tcc_unary, 1)
|
||||
DEFTREECODE (REDUC_PLUS_EXPR, "reduc_plus_expr", tcc_unary, 1)
|
||||
|
||||
/* Widening dot-product.
|
||||
The first two arguments are of type t1.
|
||||
The third argument and the result are of type t2, such that t2 is at least
|
||||
|
Loading…
x
Reference in New Issue
Block a user