mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-25 23:41:28 +08:00
Move vector highpart emulation to the optabs layer
* expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart. (expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab. * optabs.c (can_mult_highpart_p): New. (expand_mult_highpart): New. * expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it. * tree-vect-generic.c (expand_vector_operations_1): Don't expand by pieces if can_mult_highpart_p. (expand_vector_divmod): Use can_mult_highpart_p and always generate MULT_HIGHPART_EXPR. * tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise. * tree-vect-stmts.c (vectorizable_operation): Likewise. From-SVN: r189407
This commit is contained in:
parent
9283726f76
commit
00f07b86e7
@ -1,5 +1,17 @@
|
||||
2012-07-10 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart.
|
||||
(expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab.
|
||||
* optabs.c (can_mult_highpart_p): New.
|
||||
(expand_mult_highpart): New.
|
||||
* expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it.
|
||||
* tree-vect-generic.c (expand_vector_operations_1): Don't expand
|
||||
by pieces if can_mult_highpart_p.
|
||||
(expand_vector_divmod): Use can_mult_highpart_p and always
|
||||
generate MULT_HIGHPART_EXPR.
|
||||
* tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise.
|
||||
* tree-vect-stmts.c (vectorizable_operation): Likewise.
|
||||
|
||||
* config/spu/spu-builtins.md (spu_mpy): Move to spu.md.
|
||||
(spu_mpyu, spu_mpyhhu, spu_mpyhh): Likewise.
|
||||
* config/spu/spu.md (vec_widen_smult_odd_v8hi): Rename from spu_mpy.
|
||||
|
32
gcc/expmed.c
32
gcc/expmed.c
@ -2381,8 +2381,8 @@ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
|
||||
const struct algorithm *, enum mult_variant);
|
||||
static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
|
||||
static rtx extract_high_half (enum machine_mode, rtx);
|
||||
static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
|
||||
static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
|
||||
static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
|
||||
static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
|
||||
int, int);
|
||||
/* Compute and return the best algorithm for multiplying by T.
|
||||
The algorithm must cost less than cost_limit
|
||||
@ -3477,7 +3477,7 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
|
||||
return target;
|
||||
}
|
||||
|
||||
/* Subroutine of expand_mult_highpart. Return the MODE high part of OP. */
|
||||
/* Subroutine of expmed_mult_highpart. Return the MODE high part of OP. */
|
||||
|
||||
static rtx
|
||||
extract_high_half (enum machine_mode mode, rtx op)
|
||||
@ -3495,11 +3495,11 @@ extract_high_half (enum machine_mode mode, rtx op)
|
||||
return convert_modes (mode, wider_mode, op, 0);
|
||||
}
|
||||
|
||||
/* Like expand_mult_highpart, but only consider using a multiplication
|
||||
/* Like expmed_mult_highpart, but only consider using a multiplication
|
||||
optab. OP1 is an rtx for the constant operand. */
|
||||
|
||||
static rtx
|
||||
expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
|
||||
expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
|
||||
rtx target, int unsignedp, int max_cost)
|
||||
{
|
||||
rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
|
||||
@ -3610,7 +3610,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
|
||||
MAX_COST is the total allowed cost for the expanded RTL. */
|
||||
|
||||
static rtx
|
||||
expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
|
||||
expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
|
||||
rtx target, int unsignedp, int max_cost)
|
||||
{
|
||||
enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
|
||||
@ -3633,7 +3633,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
|
||||
mode == word_mode, however all the cost calculations in
|
||||
synth_mult etc. assume single-word operations. */
|
||||
if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
|
||||
return expand_mult_highpart_optab (mode, op0, op1, target,
|
||||
return expmed_mult_highpart_optab (mode, op0, op1, target,
|
||||
unsignedp, max_cost);
|
||||
|
||||
extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
|
||||
@ -3651,7 +3651,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
|
||||
{
|
||||
/* See whether the specialized multiplication optabs are
|
||||
cheaper than the shift/add version. */
|
||||
tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
|
||||
tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
|
||||
alg.cost.cost + extra_cost);
|
||||
if (tem)
|
||||
return tem;
|
||||
@ -3666,7 +3666,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
|
||||
|
||||
return tem;
|
||||
}
|
||||
return expand_mult_highpart_optab (mode, op0, op1, target,
|
||||
return expmed_mult_highpart_optab (mode, op0, op1, target,
|
||||
unsignedp, max_cost);
|
||||
}
|
||||
|
||||
@ -3940,7 +3940,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
|
||||
|
||||
In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
|
||||
half of the product. Different strategies for generating the product are
|
||||
implemented in expand_mult_highpart.
|
||||
implemented in expmed_mult_highpart.
|
||||
|
||||
If what we actually want is the remainder, we generate that by another
|
||||
by-constant multiplication and a subtraction. */
|
||||
@ -3990,7 +3990,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
|
||||
mode for which we can do the operation with a library call. */
|
||||
|
||||
/* We might want to refine this now that we have division-by-constant
|
||||
optimization. Since expand_mult_highpart tries so many variants, it is
|
||||
optimization. Since expmed_mult_highpart tries so many variants, it is
|
||||
not straightforward to generalize this. Maybe we should make an array
|
||||
of possible modes in init_expmed? Save this for GCC 2.7. */
|
||||
|
||||
@ -4155,7 +4155,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
|
||||
= (shift_cost[speed][compute_mode][post_shift - 1]
|
||||
+ shift_cost[speed][compute_mode][1]
|
||||
+ 2 * add_cost[speed][compute_mode]);
|
||||
t1 = expand_mult_highpart (compute_mode, op0,
|
||||
t1 = expmed_mult_highpart (compute_mode, op0,
|
||||
GEN_INT (ml),
|
||||
NULL_RTX, 1,
|
||||
max_cost - extra_cost);
|
||||
@ -4187,7 +4187,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
|
||||
extra_cost
|
||||
= (shift_cost[speed][compute_mode][pre_shift]
|
||||
+ shift_cost[speed][compute_mode][post_shift]);
|
||||
t2 = expand_mult_highpart (compute_mode, t1,
|
||||
t2 = expmed_mult_highpart (compute_mode, t1,
|
||||
GEN_INT (ml),
|
||||
NULL_RTX, 1,
|
||||
max_cost - extra_cost);
|
||||
@ -4313,7 +4313,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
|
||||
extra_cost = (shift_cost[speed][compute_mode][post_shift]
|
||||
+ shift_cost[speed][compute_mode][size - 1]
|
||||
+ add_cost[speed][compute_mode]);
|
||||
t1 = expand_mult_highpart (compute_mode, op0,
|
||||
t1 = expmed_mult_highpart (compute_mode, op0,
|
||||
GEN_INT (ml), NULL_RTX, 0,
|
||||
max_cost - extra_cost);
|
||||
if (t1 == 0)
|
||||
@ -4348,7 +4348,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
|
||||
extra_cost = (shift_cost[speed][compute_mode][post_shift]
|
||||
+ shift_cost[speed][compute_mode][size - 1]
|
||||
+ 2 * add_cost[speed][compute_mode]);
|
||||
t1 = expand_mult_highpart (compute_mode, op0, mlr,
|
||||
t1 = expmed_mult_highpart (compute_mode, op0, mlr,
|
||||
NULL_RTX, 0,
|
||||
max_cost - extra_cost);
|
||||
if (t1 == 0)
|
||||
@ -4436,7 +4436,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
|
||||
extra_cost = (shift_cost[speed][compute_mode][post_shift]
|
||||
+ shift_cost[speed][compute_mode][size - 1]
|
||||
+ 2 * add_cost[speed][compute_mode]);
|
||||
t3 = expand_mult_highpart (compute_mode, t2,
|
||||
t3 = expmed_mult_highpart (compute_mode, t2,
|
||||
GEN_INT (ml), NULL_RTX, 1,
|
||||
max_cost - extra_cost);
|
||||
if (t3 != 0)
|
||||
|
@ -8554,9 +8554,14 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
|
||||
return expand_divmod (0, code, mode, op0, op1, target, unsignedp);
|
||||
|
||||
case RDIV_EXPR:
|
||||
case MULT_HIGHPART_EXPR:
|
||||
goto binop;
|
||||
|
||||
case MULT_HIGHPART_EXPR:
|
||||
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
|
||||
temp = expand_mult_highpart (mode, op0, op1, target, unsignedp);
|
||||
gcc_assert (temp);
|
||||
return temp;
|
||||
|
||||
case TRUNC_MOD_EXPR:
|
||||
case FLOOR_MOD_EXPR:
|
||||
case CEIL_MOD_EXPR:
|
||||
|
126
gcc/optabs.c
126
gcc/optabs.c
@ -7162,6 +7162,132 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
|
||||
return ops[0].value;
|
||||
}
|
||||
|
||||
/* Return non-zero if a highpart multiply is supported of can be synthisized.
|
||||
For the benefit of expand_mult_highpart, the return value is 1 for direct,
|
||||
2 for even/odd widening, and 3 for hi/lo widening. */
|
||||
|
||||
int
|
||||
can_mult_highpart_p (enum machine_mode mode, bool uns_p)
|
||||
{
|
||||
optab op;
|
||||
unsigned char *sel;
|
||||
unsigned i, nunits;
|
||||
|
||||
op = uns_p ? umul_highpart_optab : smul_highpart_optab;
|
||||
if (optab_handler (op, mode) != CODE_FOR_nothing)
|
||||
return 1;
|
||||
|
||||
/* If the mode is an integral vector, synth from widening operations. */
|
||||
if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
|
||||
return 0;
|
||||
|
||||
nunits = GET_MODE_NUNITS (mode);
|
||||
sel = XALLOCAVEC (unsigned char, nunits);
|
||||
|
||||
op = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
|
||||
if (optab_handler (op, mode) != CODE_FOR_nothing)
|
||||
{
|
||||
op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
|
||||
if (optab_handler (op, mode) != CODE_FOR_nothing)
|
||||
{
|
||||
for (i = 0; i < nunits; ++i)
|
||||
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
|
||||
if (can_vec_perm_p (mode, false, sel))
|
||||
return 2;
|
||||
}
|
||||
}
|
||||
|
||||
op = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
|
||||
if (optab_handler (op, mode) != CODE_FOR_nothing)
|
||||
{
|
||||
op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
|
||||
if (optab_handler (op, mode) != CODE_FOR_nothing)
|
||||
{
|
||||
for (i = 0; i < nunits; ++i)
|
||||
sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
|
||||
if (can_vec_perm_p (mode, false, sel))
|
||||
return 3;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Expand a highpart multiply. */
|
||||
|
||||
rtx
|
||||
expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
|
||||
rtx target, bool uns_p)
|
||||
{
|
||||
struct expand_operand eops[3];
|
||||
enum insn_code icode;
|
||||
int method, i, nunits;
|
||||
enum machine_mode wmode;
|
||||
rtx m1, m2, perm;
|
||||
optab tab1, tab2;
|
||||
rtvec v;
|
||||
|
||||
method = can_mult_highpart_p (mode, uns_p);
|
||||
switch (method)
|
||||
{
|
||||
case 0:
|
||||
return NULL_RTX;
|
||||
case 1:
|
||||
tab1 = uns_p ? umul_highpart_optab : smul_highpart_optab;
|
||||
return expand_binop (mode, tab1, op0, op1, target, uns_p,
|
||||
OPTAB_LIB_WIDEN);
|
||||
case 2:
|
||||
tab1 = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
|
||||
tab2 = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
|
||||
break;
|
||||
case 3:
|
||||
tab1 = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
|
||||
tab2 = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
optab t = tab1;
|
||||
tab1 = tab2;
|
||||
tab2 = t;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
icode = optab_handler (tab1, mode);
|
||||
nunits = GET_MODE_NUNITS (mode);
|
||||
wmode = insn_data[icode].operand[0].mode;
|
||||
gcc_checking_assert (2 * GET_MODE_NUNITS (wmode) == nunits);
|
||||
gcc_checking_assert (GET_MODE_SIZE (wmode) == GET_MODE_SIZE (mode));
|
||||
|
||||
create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode);
|
||||
create_input_operand (&eops[1], op0, mode);
|
||||
create_input_operand (&eops[2], op1, mode);
|
||||
expand_insn (icode, 3, eops);
|
||||
m1 = gen_lowpart (mode, eops[0].value);
|
||||
|
||||
create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode);
|
||||
create_input_operand (&eops[1], op0, mode);
|
||||
create_input_operand (&eops[2], op1, mode);
|
||||
expand_insn (optab_handler (tab2, mode), 3, eops);
|
||||
m2 = gen_lowpart (mode, eops[0].value);
|
||||
|
||||
v = rtvec_alloc (nunits);
|
||||
if (method == 2)
|
||||
{
|
||||
for (i = 0; i < nunits; ++i)
|
||||
RTVEC_ELT (v, i) = GEN_INT (!BYTES_BIG_ENDIAN + (i & ~1)
|
||||
+ ((i & 1) ? nunits : 0));
|
||||
}
|
||||
else
|
||||
{
|
||||
for (i = 0; i < nunits; ++i)
|
||||
RTVEC_ELT (v, i) = GEN_INT (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
|
||||
}
|
||||
perm = gen_rtx_CONST_VECTOR (mode, v);
|
||||
|
||||
return expand_vec_perm (mode, m1, m2, perm, target);
|
||||
}
|
||||
|
||||
/* Return true if there is a compare_and_swap pattern. */
|
||||
|
||||
|
@ -1014,6 +1014,12 @@ extern bool can_vec_perm_p (enum machine_mode, bool, const unsigned char *);
|
||||
/* Generate code for VEC_PERM_EXPR. */
|
||||
extern rtx expand_vec_perm (enum machine_mode, rtx, rtx, rtx, rtx);
|
||||
|
||||
/* Return non-zero if target supports a given highpart multiplication. */
|
||||
extern int can_mult_highpart_p (enum machine_mode, bool);
|
||||
|
||||
/* Generate code for MULT_HIGHPART_EXPR. */
|
||||
extern rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, bool);
|
||||
|
||||
/* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing
|
||||
if the target does not have such an insn. */
|
||||
|
||||
|
@ -454,10 +454,9 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
|
||||
int dummy_int;
|
||||
unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
|
||||
unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
|
||||
optab op;
|
||||
tree *vec;
|
||||
unsigned char *sel = NULL;
|
||||
tree cur_op, m1, m2, mulcst, perm_mask, wider_type, tem, decl_e, decl_o;
|
||||
tree cur_op, mulcst, tem;
|
||||
optab op;
|
||||
|
||||
if (prec > HOST_BITS_PER_WIDE_INT)
|
||||
return NULL_TREE;
|
||||
@ -745,54 +744,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
|
||||
if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
|
||||
return NULL_TREE;
|
||||
|
||||
op = optab_for_tree_code (MULT_HIGHPART_EXPR, type, optab_default);
|
||||
if (op != NULL && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
|
||||
wider_type = decl_e = decl_o = NULL_TREE;
|
||||
else
|
||||
{
|
||||
wider_type = build_nonstandard_integer_type (prec * 2, unsignedp),
|
||||
wider_type = build_vector_type (wider_type, nunits / 2);
|
||||
if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
|
||||
|| GET_MODE_BITSIZE (TYPE_MODE (wider_type))
|
||||
!= GET_MODE_BITSIZE (TYPE_MODE (type)))
|
||||
return NULL_TREE;
|
||||
|
||||
sel = XALLOCAVEC (unsigned char, nunits);
|
||||
|
||||
if (targetm.vectorize.builtin_mul_widen_even
|
||||
&& targetm.vectorize.builtin_mul_widen_odd
|
||||
&& (decl_e = targetm.vectorize.builtin_mul_widen_even (type))
|
||||
&& (decl_o = targetm.vectorize.builtin_mul_widen_odd (type))
|
||||
&& (TYPE_MODE (TREE_TYPE (TREE_TYPE (decl_e)))
|
||||
== TYPE_MODE (wider_type)))
|
||||
{
|
||||
for (i = 0; i < nunits; i++)
|
||||
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
|
||||
if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
|
||||
decl_e = decl_o = NULL_TREE;
|
||||
}
|
||||
else
|
||||
decl_e = decl_o = NULL_TREE;
|
||||
|
||||
if (decl_e == NULL_TREE)
|
||||
{
|
||||
op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
|
||||
type, optab_default);
|
||||
if (op == NULL
|
||||
|| optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
|
||||
return NULL_TREE;
|
||||
op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
|
||||
type, optab_default);
|
||||
if (op == NULL
|
||||
|| optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
|
||||
return NULL_TREE;
|
||||
|
||||
for (i = 0; i < nunits; i++)
|
||||
sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
|
||||
if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
|
||||
return NULL_TREE;
|
||||
}
|
||||
}
|
||||
if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
|
||||
return NULL_TREE;
|
||||
|
||||
cur_op = op0;
|
||||
|
||||
@ -830,46 +783,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
|
||||
for (i = 0; i < nunits; i++)
|
||||
vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
|
||||
mulcst = build_vector (type, vec);
|
||||
if (wider_type == NULL_TREE)
|
||||
cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
|
||||
else
|
||||
{
|
||||
for (i = 0; i < nunits; i++)
|
||||
vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
|
||||
perm_mask = build_vector (type, vec);
|
||||
|
||||
if (decl_e != NULL_TREE)
|
||||
{
|
||||
gimple call;
|
||||
|
||||
call = gimple_build_call (decl_e, 2, cur_op, mulcst);
|
||||
m1 = create_tmp_reg (wider_type, NULL);
|
||||
add_referenced_var (m1);
|
||||
m1 = make_ssa_name (m1, call);
|
||||
gimple_call_set_lhs (call, m1);
|
||||
gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
|
||||
|
||||
call = gimple_build_call (decl_o, 2, cur_op, mulcst);
|
||||
m2 = create_tmp_reg (wider_type, NULL);
|
||||
add_referenced_var (m2);
|
||||
m2 = make_ssa_name (m2, call);
|
||||
gimple_call_set_lhs (call, m2);
|
||||
gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
|
||||
}
|
||||
else
|
||||
{
|
||||
m1 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_HI_EXPR
|
||||
: VEC_WIDEN_MULT_LO_EXPR,
|
||||
wider_type, cur_op, mulcst);
|
||||
m2 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_LO_EXPR
|
||||
: VEC_WIDEN_MULT_HI_EXPR,
|
||||
wider_type, cur_op, mulcst);
|
||||
}
|
||||
|
||||
m1 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m1);
|
||||
m2 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m2);
|
||||
cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, m1, m2, perm_mask);
|
||||
}
|
||||
cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
|
||||
|
||||
switch (mode)
|
||||
{
|
||||
@ -1454,13 +1369,17 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
|
||||
if (compute_type == type)
|
||||
{
|
||||
compute_mode = TYPE_MODE (compute_type);
|
||||
if (VECTOR_MODE_P (compute_mode)
|
||||
&& op != NULL
|
||||
&& optab_handler (op, compute_mode) != CODE_FOR_nothing)
|
||||
return;
|
||||
else
|
||||
/* There is no operation in hardware, so fall back to scalars. */
|
||||
compute_type = TREE_TYPE (type);
|
||||
if (VECTOR_MODE_P (compute_mode))
|
||||
{
|
||||
if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
|
||||
return;
|
||||
if (code == MULT_HIGHPART_EXPR
|
||||
&& can_mult_highpart_p (compute_mode,
|
||||
TYPE_UNSIGNED (compute_type)))
|
||||
return;
|
||||
}
|
||||
/* There is no operation in hardware, so fall back to scalars. */
|
||||
compute_type = TREE_TYPE (type);
|
||||
}
|
||||
|
||||
gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
|
||||
|
@ -1642,10 +1642,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
|
||||
optab optab;
|
||||
tree dummy, q;
|
||||
enum tree_code dummy_code;
|
||||
tree q;
|
||||
int dummy_int, prec;
|
||||
VEC (tree, heap) *dummy_vec;
|
||||
stmt_vec_info def_stmt_vinfo;
|
||||
|
||||
if (!is_gimple_assign (last_stmt))
|
||||
@ -1814,23 +1812,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
|
||||
|| prec > HOST_BITS_PER_WIDE_INT)
|
||||
return NULL;
|
||||
|
||||
optab = optab_for_tree_code (MULT_HIGHPART_EXPR, vectype, optab_default);
|
||||
if (optab == NULL
|
||||
|| optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
|
||||
{
|
||||
tree witype = build_nonstandard_integer_type (prec * 2,
|
||||
TYPE_UNSIGNED (itype));
|
||||
tree vecwtype = get_vectype_for_scalar_type (witype);
|
||||
|
||||
if (vecwtype == NULL_TREE)
|
||||
return NULL;
|
||||
if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
|
||||
vecwtype, vectype,
|
||||
&dummy, &dummy, &dummy_code,
|
||||
&dummy_code, &dummy_int,
|
||||
&dummy_vec))
|
||||
return NULL;
|
||||
}
|
||||
if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
|
||||
return NULL;
|
||||
|
||||
STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
|
||||
|
||||
|
@ -3304,18 +3304,17 @@ static bool
|
||||
vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
gimple *vec_stmt, slp_tree slp_node)
|
||||
{
|
||||
tree vec_dest, vec_dest2 = NULL_TREE;
|
||||
tree vec_dest3 = NULL_TREE, vec_dest4 = NULL_TREE;
|
||||
tree vec_dest;
|
||||
tree scalar_dest;
|
||||
tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
tree vectype, wide_vectype = NULL_TREE;
|
||||
tree vectype;
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
enum tree_code code;
|
||||
enum machine_mode vec_mode;
|
||||
tree new_temp;
|
||||
int op_type;
|
||||
optab optab, optab2 = NULL;
|
||||
optab optab;
|
||||
int icode;
|
||||
tree def;
|
||||
gimple def_stmt;
|
||||
@ -3332,8 +3331,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
tree vop0, vop1, vop2;
|
||||
bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
|
||||
int vf;
|
||||
unsigned char *sel = NULL;
|
||||
tree decl1 = NULL_TREE, decl2 = NULL_TREE, perm_mask = NULL_TREE;
|
||||
|
||||
if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
|
||||
return false;
|
||||
@ -3455,87 +3452,26 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
|| code == RROTATE_EXPR)
|
||||
return false;
|
||||
|
||||
optab = optab_for_tree_code (code, vectype, optab_default);
|
||||
|
||||
/* Supportable by target? */
|
||||
if (!optab && code != MULT_HIGHPART_EXPR)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "no optab.");
|
||||
return false;
|
||||
}
|
||||
|
||||
vec_mode = TYPE_MODE (vectype);
|
||||
icode = optab ? (int) optab_handler (optab, vec_mode) : CODE_FOR_nothing;
|
||||
|
||||
if (icode == CODE_FOR_nothing
|
||||
&& code == MULT_HIGHPART_EXPR
|
||||
&& VECTOR_MODE_P (vec_mode)
|
||||
&& BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
|
||||
if (code == MULT_HIGHPART_EXPR)
|
||||
{
|
||||
/* If MULT_HIGHPART_EXPR isn't supported by the backend, see
|
||||
if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR
|
||||
or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR. */
|
||||
unsigned int prec = TYPE_PRECISION (TREE_TYPE (scalar_dest));
|
||||
unsigned int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scalar_dest));
|
||||
tree wide_type
|
||||
= build_nonstandard_integer_type (prec * 2, unsignedp);
|
||||
wide_vectype
|
||||
= get_same_sized_vectype (wide_type, vectype);
|
||||
|
||||
sel = XALLOCAVEC (unsigned char, nunits_in);
|
||||
if (VECTOR_MODE_P (TYPE_MODE (wide_vectype))
|
||||
&& GET_MODE_SIZE (TYPE_MODE (wide_vectype))
|
||||
== GET_MODE_SIZE (vec_mode))
|
||||
if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
|
||||
icode = 0;
|
||||
else
|
||||
icode = CODE_FOR_nothing;
|
||||
}
|
||||
else
|
||||
{
|
||||
optab = optab_for_tree_code (code, vectype, optab_default);
|
||||
if (!optab)
|
||||
{
|
||||
if (targetm.vectorize.builtin_mul_widen_even
|
||||
&& (decl1 = targetm.vectorize.builtin_mul_widen_even (vectype))
|
||||
&& targetm.vectorize.builtin_mul_widen_odd
|
||||
&& (decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype))
|
||||
&& TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1)))
|
||||
== TYPE_MODE (wide_vectype))
|
||||
{
|
||||
for (i = 0; i < nunits_in; i++)
|
||||
sel[i] = !BYTES_BIG_ENDIAN + (i & ~1)
|
||||
+ ((i & 1) ? nunits_in : 0);
|
||||
if (can_vec_perm_p (vec_mode, false, sel))
|
||||
icode = 0;
|
||||
}
|
||||
if (icode == CODE_FOR_nothing)
|
||||
{
|
||||
decl1 = NULL_TREE;
|
||||
decl2 = NULL_TREE;
|
||||
optab = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
|
||||
vectype, optab_default);
|
||||
optab2 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
|
||||
vectype, optab_default);
|
||||
if (optab != NULL
|
||||
&& optab2 != NULL
|
||||
&& optab_handler (optab, vec_mode) != CODE_FOR_nothing
|
||||
&& optab_handler (optab2, vec_mode) != CODE_FOR_nothing
|
||||
&& insn_data[optab_handler (optab, vec_mode)].operand[0].mode
|
||||
== TYPE_MODE (wide_vectype)
|
||||
&& insn_data[optab_handler (optab2,
|
||||
vec_mode)].operand[0].mode
|
||||
== TYPE_MODE (wide_vectype))
|
||||
{
|
||||
for (i = 0; i < nunits_in; i++)
|
||||
sel[i] = !BYTES_BIG_ENDIAN + 2 * i;
|
||||
if (can_vec_perm_p (vec_mode, false, sel))
|
||||
icode = optab_handler (optab, vec_mode);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (icode == CODE_FOR_nothing)
|
||||
{
|
||||
if (optab_for_tree_code (code, vectype, optab_default) == NULL)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "no optab.");
|
||||
return false;
|
||||
}
|
||||
wide_vectype = NULL_TREE;
|
||||
optab2 = NULL;
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "no optab.");
|
||||
return false;
|
||||
}
|
||||
icode = (int) optab_handler (optab, vec_mode);
|
||||
}
|
||||
|
||||
if (icode == CODE_FOR_nothing)
|
||||
@ -3575,16 +3511,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
fprintf (vect_dump, "transform binary/unary operation.");
|
||||
|
||||
/* Handle def. */
|
||||
if (wide_vectype)
|
||||
{
|
||||
vec_dest = vect_create_destination_var (scalar_dest, wide_vectype);
|
||||
vec_dest2 = vect_create_destination_var (scalar_dest, wide_vectype);
|
||||
vec_dest3 = vect_create_destination_var (scalar_dest, vectype);
|
||||
vec_dest4 = vect_create_destination_var (scalar_dest, vectype);
|
||||
perm_mask = vect_gen_perm_mask (vectype, sel);
|
||||
}
|
||||
else
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
|
||||
/* Allocate VECs for vector operands. In case of SLP, vector operands are
|
||||
created in the previous stages of the recursion, so no allocation is
|
||||
@ -3693,66 +3620,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
|
||||
? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
|
||||
vop2 = ((op_type == ternary_op)
|
||||
? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
|
||||
if (wide_vectype)
|
||||
{
|
||||
tree new_temp2, vce;
|
||||
|
||||
gcc_assert (code == MULT_HIGHPART_EXPR);
|
||||
if (decl1 != NULL_TREE)
|
||||
{
|
||||
new_stmt = gimple_build_call (decl1, 2, vop0, vop1);
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
gimple_call_set_lhs (new_stmt, new_temp);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
new_stmt = gimple_build_call (decl2, 2, vop0, vop1);
|
||||
new_temp2 = make_ssa_name (vec_dest2, new_stmt);
|
||||
gimple_call_set_lhs (new_stmt, new_temp2);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
}
|
||||
else
|
||||
{
|
||||
new_temp = make_ssa_name (vec_dest, NULL);
|
||||
new_stmt
|
||||
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
|
||||
? VEC_WIDEN_MULT_HI_EXPR
|
||||
: VEC_WIDEN_MULT_LO_EXPR,
|
||||
new_temp, vop0, vop1);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
new_temp2 = make_ssa_name (vec_dest2, NULL);
|
||||
new_stmt
|
||||
= gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
|
||||
? VEC_WIDEN_MULT_LO_EXPR
|
||||
: VEC_WIDEN_MULT_HI_EXPR,
|
||||
new_temp2, vop0, vop1);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
}
|
||||
|
||||
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
|
||||
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
|
||||
vec_dest3, vce,
|
||||
NULL_TREE);
|
||||
new_temp = make_ssa_name (vec_dest3, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp2);
|
||||
new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
|
||||
vec_dest4, vce,
|
||||
NULL_TREE);
|
||||
new_temp2 = make_ssa_name (vec_dest4, new_stmt);
|
||||
gimple_assign_set_lhs (new_stmt, new_temp2);
|
||||
vect_finish_stmt_generation (stmt, new_stmt, gsi);
|
||||
|
||||
new_temp = permute_vec_elements (new_temp, new_temp2,
|
||||
perm_mask, stmt, gsi);
|
||||
new_stmt = SSA_NAME_DEF_STMT (new_temp);
|
||||
if (slp_node)
|
||||
VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
|
||||
new_stmt);
|
||||
continue;
|
||||
}
|
||||
new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
|
||||
vop0, vop1, vop2);
|
||||
new_temp = make_ssa_name (vec_dest, new_stmt);
|
||||
|
Loading…
x
Reference in New Issue
Block a user