mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-02-23 06:49:11 +08:00
genopinit.c (vec_shl_optab, [...]): Initialize new optabs.
* genopinit.c (vec_shl_optab, vec_shr_optab): Initialize new optabs. (reduc_plus_optab): Removed. Replcaed with... (reduc_splus_optab, reduc_uplus_optab): Initialize new optabs. * optabs.c (optab_for_tree_code): Return reduc_splus_optab or reduc_uplus_optab instead of reduc_plus_optab. (expand_vec_shift_expr): New function. (init_optabs): Initialize new optabs. Remove initialization of reduc_plus_optab. (optab_for_tree_code): Return vec_shl_optab/vec_shr_optab for VEC_LSHIFT_EXPR/VEC_RSHIFT_EXPR. * optabs.h (OTI_reduc_plus): Removed. Replaced with... (OTI_reduc_splus, OTI_reduc_uplus): New. (reduc_plus_optab): Removed. Replcaed with... (reduc_splus_optab, reduc_uplus_optab): New optabs. (vec_shl_optab, vec_shr_optab): New optabs. (expand_vec_shift_expr): New function declaration. * tree.def (VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR): New tree-codes. * tree-inline.c (estimate_num_insns_1): Handle new tree-codes. * expr.c (expand_expr_real_1): Handle new tree-codes. * tree-pretty-print.c (dump_generic_node, op_symbol, op_prio): Likewise. * tree-vect-generic.c (expand_vector_operations_1): Add assert. * tree-vect-transform.c (vect_create_epilog_for_reduction): Add two alternatives for generating reduction epilog code. (vectorizable_reduction): Don't fail of direct reduction support is not available. (vectorizable_target_reduction_pattern): Likewise. * config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf, reduc_umax_v4si, reduc_smin_v4si, reduc_smin_v4sf, reduc_umin_v4si, reduc_plus_v4si, reduc_plus_v4sf): Removed. (vec_shl_<mode>, vec_shr_<mode>, altivec_vsumsws_nomode, reduc_splus_<mode>, reduc_uplus_v16qi): New. From-SVN: r101231
This commit is contained in:
parent
a3a2067ac5
commit
a6b46ba2c8
@ -1,3 +1,40 @@
|
||||
2005-06-21 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* genopinit.c (vec_shl_optab, vec_shr_optab): Initialize new optabs.
|
||||
(reduc_plus_optab): Removed. Replcaed with...
|
||||
(reduc_splus_optab, reduc_uplus_optab): Initialize new optabs.
|
||||
* optabs.c (optab_for_tree_code): Return reduc_splus_optab or
|
||||
reduc_uplus_optab instead of reduc_plus_optab.
|
||||
(expand_vec_shift_expr): New function.
|
||||
(init_optabs): Initialize new optabs. Remove initialization of
|
||||
reduc_plus_optab.
|
||||
(optab_for_tree_code): Return vec_shl_optab/vec_shr_optab
|
||||
for VEC_LSHIFT_EXPR/VEC_RSHIFT_EXPR.
|
||||
* optabs.h (OTI_reduc_plus): Removed. Replaced with...
|
||||
(OTI_reduc_splus, OTI_reduc_uplus): New.
|
||||
(reduc_plus_optab): Removed. Replcaed with...
|
||||
(reduc_splus_optab, reduc_uplus_optab): New optabs.
|
||||
(vec_shl_optab, vec_shr_optab): New optabs.
|
||||
(expand_vec_shift_expr): New function declaration.
|
||||
|
||||
* tree.def (VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR): New tree-codes.
|
||||
* tree-inline.c (estimate_num_insns_1): Handle new tree-codes.
|
||||
* expr.c (expand_expr_real_1): Handle new tree-codes.
|
||||
* tree-pretty-print.c (dump_generic_node, op_symbol, op_prio): Likewise.
|
||||
* tree-vect-generic.c (expand_vector_operations_1): Add assert.
|
||||
|
||||
* tree-vect-transform.c (vect_create_epilog_for_reduction): Add two
|
||||
alternatives for generating reduction epilog code.
|
||||
(vectorizable_reduction): Don't fail of direct reduction support is
|
||||
not available.
|
||||
(vectorizable_target_reduction_pattern): Likewise.
|
||||
|
||||
* config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf,
|
||||
reduc_umax_v4si, reduc_smin_v4si, reduc_smin_v4sf, reduc_umin_v4si,
|
||||
reduc_plus_v4si, reduc_plus_v4sf): Removed.
|
||||
(vec_shl_<mode>, vec_shr_<mode>, altivec_vsumsws_nomode,
|
||||
reduc_splus_<mode>, reduc_uplus_v16qi): New.
|
||||
|
||||
2005-06-20 Daniel Berlin <dberlin@dberlin.org>
|
||||
|
||||
* c-typeck.c (build_function_call): Set fundecl = function again.
|
||||
|
@ -1825,157 +1825,100 @@
|
||||
operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
|
||||
})
|
||||
|
||||
;; Reduction
|
||||
|
||||
(define_expand "reduc_smax_v4si"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vtmp1 = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp2 = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp3 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
|
||||
gen_rtx_CONST_INT (SImode, 8)));
|
||||
emit_insn (gen_smaxv4si3 (vtmp2, operands[1], vtmp1));
|
||||
emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
|
||||
gen_rtx_CONST_INT (SImode, 4)));
|
||||
emit_insn (gen_smaxv4si3 (operands[0], vtmp2, vtmp3));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "reduc_smax_v4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vtmp1 = gen_reg_rtx (V4SFmode);
|
||||
rtx vtmp2 = gen_reg_rtx (V4SFmode);
|
||||
rtx vtmp3 = gen_reg_rtx (V4SFmode);
|
||||
|
||||
emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
|
||||
gen_rtx_CONST_INT (SImode, 8)));
|
||||
emit_insn (gen_smaxv4sf3 (vtmp2, operands[1], vtmp1));
|
||||
emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
|
||||
gen_rtx_CONST_INT (SImode, 4)));
|
||||
emit_insn (gen_smaxv4sf3 (operands[0], vtmp2, vtmp3));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "reduc_umax_v4si"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vtmp1 = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp2 = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp3 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
|
||||
gen_rtx_CONST_INT (SImode, 8)));
|
||||
emit_insn (gen_umaxv4si3 (vtmp2, operands[1], vtmp1));
|
||||
emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
|
||||
gen_rtx_CONST_INT (SImode, 4)));
|
||||
emit_insn (gen_umaxv4si3 (operands[0], vtmp2, vtmp3));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "reduc_smin_v4si"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vtmp1 = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp2 = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp3 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
|
||||
gen_rtx_CONST_INT (SImode, 8)));
|
||||
emit_insn (gen_sminv4si3 (vtmp2, operands[1], vtmp1));
|
||||
emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
|
||||
gen_rtx_CONST_INT (SImode, 4)));
|
||||
emit_insn (gen_sminv4si3 (operands[0], vtmp2, vtmp3));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "reduc_smin_v4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
|
||||
;; Vector shift left in bits. Currently supported ony for shift
|
||||
;; amounts that can be expressed as byte shifts (divisible by 8).
|
||||
;; General shift amounts can be supported using vslo + vsl. We're
|
||||
;; not expecting to see these yet (the vectorizer currently
|
||||
;; generates only shifts divisible by byte_size).
|
||||
(define_expand "vec_shl_<mode>"
|
||||
[(set (match_operand:V 0 "register_operand" "=v")
|
||||
(unspec:V [(match_operand:V 1 "register_operand" "v")
|
||||
(match_operand:QI 2 "reg_or_short_operand" "")] 219 ))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vtmp1 = gen_reg_rtx (V4SFmode);
|
||||
rtx vtmp2 = gen_reg_rtx (V4SFmode);
|
||||
rtx vtmp3 = gen_reg_rtx (V4SFmode);
|
||||
rtx bitshift = operands[2];
|
||||
rtx byteshift = gen_reg_rtx (QImode);
|
||||
HOST_WIDE_INT bitshift_val;
|
||||
HOST_WIDE_INT byteshift_val;
|
||||
|
||||
emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
|
||||
gen_rtx_CONST_INT (SImode, 8)));
|
||||
emit_insn (gen_sminv4sf3 (vtmp2, operands[1], vtmp1));
|
||||
emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
|
||||
gen_rtx_CONST_INT (SImode, 4)));
|
||||
emit_insn (gen_sminv4sf3 (operands[0], vtmp2, vtmp3));
|
||||
if (! CONSTANT_P (bitshift))
|
||||
FAIL;
|
||||
bitshift_val = INTVAL (bitshift);
|
||||
if (bitshift_val & 0x7)
|
||||
FAIL;
|
||||
byteshift_val = bitshift_val >> 3;
|
||||
byteshift = gen_rtx_CONST_INT (QImode, byteshift_val);
|
||||
emit_insn (gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
|
||||
byteshift));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "reduc_umin_v4si"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
|
||||
;; Vector shift left in bits. Currently supported ony for shift
|
||||
;; amounts that can be expressed as byte shifts (divisible by 8).
|
||||
;; General shift amounts can be supported using vsro + vsr. We're
|
||||
;; not expecting to see these yet (the vectorizer currently
|
||||
;; generates only shifts divisible by byte_size).
|
||||
(define_expand "vec_shr_<mode>"
|
||||
[(set (match_operand:V 0 "register_operand" "=v")
|
||||
(unspec:V [(match_operand:V 1 "register_operand" "v")
|
||||
(match_operand:QI 2 "reg_or_short_operand" "")] 219 ))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vtmp1 = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp2 = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp3 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
|
||||
gen_rtx_CONST_INT (SImode, 8)));
|
||||
emit_insn (gen_uminv4si3 (vtmp2, operands[1], vtmp1));
|
||||
emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
|
||||
gen_rtx_CONST_INT (SImode, 4)));
|
||||
emit_insn (gen_uminv4si3 (operands[0], vtmp2, vtmp3));
|
||||
rtx bitshift = operands[2];
|
||||
rtx byteshift = gen_reg_rtx (QImode);
|
||||
HOST_WIDE_INT bitshift_val;
|
||||
HOST_WIDE_INT byteshift_val;
|
||||
|
||||
if (! CONSTANT_P (bitshift))
|
||||
FAIL;
|
||||
bitshift_val = INTVAL (bitshift);
|
||||
if (bitshift_val & 0x7)
|
||||
FAIL;
|
||||
byteshift_val = 16 - (bitshift_val >> 3);
|
||||
byteshift = gen_rtx_CONST_INT (QImode, byteshift_val);
|
||||
emit_insn (gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
|
||||
byteshift));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "reduc_plus_v4si"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
|
||||
(define_insn "altivec_vsumsws_nomode"
|
||||
[(set (match_operand 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
|
||||
(match_operand:V4SI 2 "register_operand" "v")] 135))
|
||||
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
|
||||
"TARGET_ALTIVEC"
|
||||
"vsumsws %0,%1,%2"
|
||||
[(set_attr "type" "veccomplex")])
|
||||
|
||||
(define_expand "reduc_splus_<mode>"
|
||||
[(set (match_operand:VIshort 0 "register_operand" "=v")
|
||||
(unspec:VIshort [(match_operand:VIshort 1 "register_operand" "v")] 217))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vzero = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp1 = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp2 = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp3 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
|
||||
gen_rtx_CONST_INT (SImode, 8)));
|
||||
emit_insn (gen_addv4si3 (vtmp2, operands[1], vtmp1));
|
||||
emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
|
||||
gen_rtx_CONST_INT (SImode, 4)));
|
||||
emit_insn (gen_addv4si3 (operands[0], vtmp2, vtmp3));
|
||||
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
|
||||
emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero));
|
||||
emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "reduc_plus_v4sf"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=v")
|
||||
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
|
||||
|
||||
(define_expand "reduc_uplus_v16qi"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=v")
|
||||
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] 217))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vtmp1 = gen_reg_rtx (V4SFmode);
|
||||
rtx vtmp2 = gen_reg_rtx (V4SFmode);
|
||||
rtx vtmp3 = gen_reg_rtx (V4SFmode);
|
||||
{
|
||||
rtx vzero = gen_reg_rtx (V4SImode);
|
||||
rtx vtmp1 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
|
||||
gen_rtx_CONST_INT (SImode, 8)));
|
||||
emit_insn (gen_addv4sf3 (vtmp2, operands[1], vtmp1));
|
||||
emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
|
||||
gen_rtx_CONST_INT (SImode, 4)));
|
||||
emit_insn (gen_addv4sf3 (operands[0], vtmp2, vtmp3));
|
||||
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
|
||||
emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero));
|
||||
emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
|
@ -8367,6 +8367,13 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
|
||||
return temp;
|
||||
}
|
||||
|
||||
case VEC_LSHIFT_EXPR:
|
||||
case VEC_RSHIFT_EXPR:
|
||||
{
|
||||
target = expand_vec_shift_expr (exp, target);
|
||||
return target;
|
||||
}
|
||||
|
||||
default:
|
||||
return lang_hooks.expand_expr (exp, original_target, tmode,
|
||||
modifier, alt_rtl);
|
||||
|
@ -196,6 +196,8 @@ static const char * const optabs[] =
|
||||
"vec_set_optab->handlers[$A].insn_code = CODE_FOR_$(vec_set$a$)",
|
||||
"vec_extract_optab->handlers[$A].insn_code = CODE_FOR_$(vec_extract$a$)",
|
||||
"vec_init_optab->handlers[$A].insn_code = CODE_FOR_$(vec_init$a$)",
|
||||
"vec_shl_optab->handlers[$A].insn_code = CODE_FOR_$(vec_shl_$a$)",
|
||||
"vec_shr_optab->handlers[$A].insn_code = CODE_FOR_$(vec_shr_$a$)",
|
||||
"vec_realign_load_optab->handlers[$A].insn_code = CODE_FOR_$(vec_realign_load_$a$)",
|
||||
"vcond_gen_code[$A] = CODE_FOR_$(vcond$a$)",
|
||||
"vcondu_gen_code[$A] = CODE_FOR_$(vcondu$a$)",
|
||||
@ -203,7 +205,8 @@ static const char * const optabs[] =
|
||||
"reduc_umax_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_umax_$a$)",
|
||||
"reduc_smin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_smin_$a$)",
|
||||
"reduc_umin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_umin_$a$)",
|
||||
"reduc_plus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_plus_$a$)"
|
||||
"reduc_splus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_splus_$a$)" ,
|
||||
"reduc_uplus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_uplus_$a$)"
|
||||
};
|
||||
|
||||
static void gen_insn (rtx);
|
||||
|
68
gcc/optabs.c
68
gcc/optabs.c
@ -301,7 +301,13 @@ optab_for_tree_code (enum tree_code code, tree type)
|
||||
return TYPE_UNSIGNED (type) ? reduc_umin_optab : reduc_smin_optab;
|
||||
|
||||
case REDUC_PLUS_EXPR:
|
||||
return reduc_plus_optab;
|
||||
return TYPE_UNSIGNED (type) ? reduc_uplus_optab : reduc_splus_optab;
|
||||
|
||||
case VEC_LSHIFT_EXPR:
|
||||
return vec_shl_optab;
|
||||
|
||||
case VEC_RSHIFT_EXPR:
|
||||
return vec_shr_optab;
|
||||
|
||||
default:
|
||||
break;
|
||||
@ -443,6 +449,61 @@ force_expand_binop (enum machine_mode mode, optab binoptab,
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Generate insns for VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR. */
|
||||
|
||||
rtx
|
||||
expand_vec_shift_expr (tree vec_shift_expr, rtx target)
|
||||
{
|
||||
enum insn_code icode;
|
||||
rtx rtx_op1, rtx_op2;
|
||||
enum machine_mode mode1;
|
||||
enum machine_mode mode2;
|
||||
enum machine_mode mode = TYPE_MODE (TREE_TYPE (vec_shift_expr));
|
||||
tree vec_oprnd = TREE_OPERAND (vec_shift_expr, 0);
|
||||
tree shift_oprnd = TREE_OPERAND (vec_shift_expr, 1);
|
||||
optab shift_optab;
|
||||
rtx pat;
|
||||
|
||||
switch (TREE_CODE (vec_shift_expr))
|
||||
{
|
||||
case VEC_RSHIFT_EXPR:
|
||||
shift_optab = vec_shr_optab;
|
||||
break;
|
||||
case VEC_LSHIFT_EXPR:
|
||||
shift_optab = vec_shl_optab;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
icode = (int) shift_optab->handlers[(int) mode].insn_code;
|
||||
gcc_assert (icode != CODE_FOR_nothing);
|
||||
|
||||
mode1 = insn_data[icode].operand[1].mode;
|
||||
mode2 = insn_data[icode].operand[2].mode;
|
||||
|
||||
rtx_op1 = expand_expr (vec_oprnd, NULL_RTX, VOIDmode, EXPAND_NORMAL);
|
||||
if (!(*insn_data[icode].operand[1].predicate) (rtx_op1, mode1)
|
||||
&& mode1 != VOIDmode)
|
||||
rtx_op1 = force_reg (mode1, rtx_op1);
|
||||
|
||||
rtx_op2 = expand_expr (shift_oprnd, NULL_RTX, VOIDmode, EXPAND_NORMAL);
|
||||
if (!(*insn_data[icode].operand[2].predicate) (rtx_op2, mode2)
|
||||
&& mode2 != VOIDmode)
|
||||
rtx_op2 = force_reg (mode2, rtx_op2);
|
||||
|
||||
if (!target
|
||||
|| ! (*insn_data[icode].operand[0].predicate) (target, mode))
|
||||
target = gen_reg_rtx (mode);
|
||||
|
||||
/* Emit instruction */
|
||||
pat = GEN_FCN (icode) (target, rtx_op1, rtx_op2);
|
||||
gcc_assert (pat);
|
||||
emit_insn (pat);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
/* This subroutine of expand_doubleword_shift handles the cases in which
|
||||
the effective shift value is >= BITS_PER_WORD. The arguments and return
|
||||
value are the same as for the parent routine, except that SUPERWORD_OP1
|
||||
@ -5074,11 +5135,14 @@ init_optabs (void)
|
||||
reduc_umax_optab = init_optab (UNKNOWN);
|
||||
reduc_smin_optab = init_optab (UNKNOWN);
|
||||
reduc_umin_optab = init_optab (UNKNOWN);
|
||||
reduc_plus_optab = init_optab (UNKNOWN);
|
||||
reduc_splus_optab = init_optab (UNKNOWN);
|
||||
reduc_uplus_optab = init_optab (UNKNOWN);
|
||||
|
||||
vec_extract_optab = init_optab (UNKNOWN);
|
||||
vec_set_optab = init_optab (UNKNOWN);
|
||||
vec_init_optab = init_optab (UNKNOWN);
|
||||
vec_shl_optab = init_optab (UNKNOWN);
|
||||
vec_shr_optab = init_optab (UNKNOWN);
|
||||
vec_realign_load_optab = init_optab (UNKNOWN);
|
||||
movmisalign_optab = init_optab (UNKNOWN);
|
||||
|
||||
|
14
gcc/optabs.h
14
gcc/optabs.h
@ -236,7 +236,8 @@ enum optab_index
|
||||
OTI_reduc_umax,
|
||||
OTI_reduc_smin,
|
||||
OTI_reduc_umin,
|
||||
OTI_reduc_plus,
|
||||
OTI_reduc_splus,
|
||||
OTI_reduc_uplus,
|
||||
|
||||
/* Set specified field of vector operand. */
|
||||
OTI_vec_set,
|
||||
@ -244,6 +245,9 @@ enum optab_index
|
||||
OTI_vec_extract,
|
||||
/* Initialize vector operand. */
|
||||
OTI_vec_init,
|
||||
/* Whole vector shift. The shift amount is in bits. */
|
||||
OTI_vec_shl,
|
||||
OTI_vec_shr,
|
||||
/* Extract specified elements from vectors, for vector load. */
|
||||
OTI_vec_realign_load,
|
||||
|
||||
@ -358,11 +362,14 @@ extern GTY(()) optab optab_table[OTI_MAX];
|
||||
#define reduc_umax_optab (optab_table[OTI_reduc_umax])
|
||||
#define reduc_smin_optab (optab_table[OTI_reduc_smin])
|
||||
#define reduc_umin_optab (optab_table[OTI_reduc_umin])
|
||||
#define reduc_plus_optab (optab_table[OTI_reduc_plus])
|
||||
#define reduc_splus_optab (optab_table[OTI_reduc_splus])
|
||||
#define reduc_uplus_optab (optab_table[OTI_reduc_uplus])
|
||||
|
||||
#define vec_set_optab (optab_table[OTI_vec_set])
|
||||
#define vec_extract_optab (optab_table[OTI_vec_extract])
|
||||
#define vec_init_optab (optab_table[OTI_vec_init])
|
||||
#define vec_shl_optab (optab_table[OTI_vec_shl])
|
||||
#define vec_shr_optab (optab_table[OTI_vec_shr])
|
||||
#define vec_realign_load_optab (optab_table[OTI_vec_realign_load])
|
||||
|
||||
#define powi_optab (optab_table[OTI_powi])
|
||||
@ -575,4 +582,7 @@ bool expand_vec_cond_expr_p (tree, enum machine_mode);
|
||||
/* Generate code for VEC_COND_EXPR. */
|
||||
extern rtx expand_vec_cond_expr (tree, rtx);
|
||||
|
||||
/* Generate code for VEC_LSHIFT_EXPR and VEC_RSHIFT_EXPR. */
|
||||
extern rtx expand_vec_shift_expr (tree, rtx);
|
||||
|
||||
#endif /* GCC_OPTABS_H */
|
||||
|
@ -1,3 +1,21 @@
|
||||
2005-06-21 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* lib/target-supports.exp (check_effective_target_vect_reduction):
|
||||
Remove.
|
||||
* gcc.dg/vect/vect.exp: Run tests with additional flags separately.
|
||||
* gcc.dg/vect/vect-reduc-1.c: Vectorizable on all relevant platforms -
|
||||
remove vect_reduction target keyword. Also avoid two returns in main.
|
||||
* gcc.dg/vect/vect-reduc-3.c: Likewise.
|
||||
* gcc.dg/vect/vect-reduc-2.c: Likewise. Also initialize diff to 0.
|
||||
* gcc.dg/vect/vect-reduc-1short.c: New test.
|
||||
* gcc.dg/vect/vect-reduc-1char.c: New test.
|
||||
* gcc.dg/vect/vect-reduc-2short.c: New test.
|
||||
* gcc.dg/vect/vect-reduc-2char.c: New test.
|
||||
* gcc.dg/vect/vect-reduc-6.c: New test.
|
||||
* gcc.dg/vect/trapv-vect-reduc-4.c: New test.
|
||||
* gcc.dg/vect/fast-math-vect-reduc-5.c: New test.
|
||||
* gcc.dg/vect/fast-math-vect-reduc-7.c: New test
|
||||
|
||||
2005-06-21 Tobias Schl"uter <tobias.schlueter@physik.uni-muenchen.de>
|
||||
Paul Thomas <pault@gcc.gnu.org>
|
||||
|
||||
|
53
gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-5.c
Normal file
53
gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-5.c
Normal file
@ -0,0 +1,53 @@
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
|
||||
/* need -funsafe-math-optimizations to vectorize the summation.
|
||||
also need -ffinite-math-only to create the min/max expr. */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define DIFF 242
|
||||
|
||||
int main1 (float x, float max_result)
|
||||
{
|
||||
int i;
|
||||
float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
float diff = 2;
|
||||
float max = x;
|
||||
float min = 10;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
diff += (b[i] - c[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
max = max < c[i] ? c[i] : max;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
min = min > c[i] ? c[i] : min;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
if (diff != DIFF)
|
||||
abort ();
|
||||
if (max != max_result)
|
||||
abort ();
|
||||
if (min != 0)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (100, 100);
|
||||
main1 (0, 15);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
|
53
gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-7.c
Normal file
53
gcc/testsuite/gcc.dg/vect/fast-math-vect-reduc-7.c
Normal file
@ -0,0 +1,53 @@
|
||||
/* { dg-require-effective-target vect_double } */
|
||||
|
||||
/* need -funsafe-math-optimizations to vectorize the summation.
|
||||
also need -ffinite-math-only to create the min/max expr. */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define DIFF 242
|
||||
|
||||
int main1 (double x, double max_result)
|
||||
{
|
||||
int i;
|
||||
double b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
double c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
double diff = 2;
|
||||
double max = x;
|
||||
double min = 10;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
diff += (b[i] - c[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
max = max < c[i] ? c[i] : max;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
min = min > c[i] ? c[i] : min;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
if (diff != DIFF)
|
||||
abort ();
|
||||
if (max != max_result)
|
||||
abort ();
|
||||
if (min != 0)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (100, 100);
|
||||
main1 (0, 15);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
|
49
gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
Normal file
49
gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
Normal file
@ -0,0 +1,49 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
/* { dg-do compile } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define DIFF 242
|
||||
|
||||
int main1 (int x, int max_result)
|
||||
{
|
||||
int i;
|
||||
int b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
int c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
int diff = 2;
|
||||
int max = x;
|
||||
int min = 10;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
diff += (b[i] - c[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
max = max < c[i] ? c[i] : max;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
min = min > c[i] ? c[i] : min;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
if (diff != DIFF)
|
||||
abort ();
|
||||
if (max != max_result)
|
||||
abort ();
|
||||
if (min != 0)
|
||||
abort ();
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (100, 100);
|
||||
main1 (0, 15);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
|
@ -47,9 +47,9 @@ int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 (100, 100);
|
||||
return main1 (0, 15);
|
||||
main1 (100, 100);
|
||||
main1 (0, 15);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail {! vect_reduction} } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
51
gcc/testsuite/gcc.dg/vect/vect-reduc-1char.c
Normal file
51
gcc/testsuite/gcc.dg/vect/vect-reduc-1char.c
Normal file
@ -0,0 +1,51 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define DIFF 242
|
||||
|
||||
int main1 (unsigned char x, unsigned char max_result)
|
||||
{
|
||||
int i;
|
||||
unsigned char ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned char uc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
unsigned char udiff = 2;
|
||||
unsigned char umax = x;
|
||||
unsigned char umin = 10;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
udiff += (unsigned char)(ub[i] - uc[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
umax = umax < uc[i] ? uc[i] : umax;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
umin = umin > uc[i] ? uc[i] : umin;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
if (udiff != DIFF)
|
||||
abort ();
|
||||
if (umax != max_result)
|
||||
abort ();
|
||||
if (umin != 0)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (100, 100);
|
||||
main1 (0, 15);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
51
gcc/testsuite/gcc.dg/vect/vect-reduc-1short.c
Normal file
51
gcc/testsuite/gcc.dg/vect/vect-reduc-1short.c
Normal file
@ -0,0 +1,51 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define DIFF 242
|
||||
|
||||
int main1 (unsigned short x, unsigned short max_result)
|
||||
{
|
||||
int i;
|
||||
unsigned short ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned short uc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
unsigned short udiff = 2;
|
||||
unsigned short umax = x;
|
||||
unsigned short umin = 10;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
udiff += (unsigned short)(ub[i] - uc[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
umax = umax < uc[i] ? uc[i] : umax;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
umin = umin > uc[i] ? uc[i] : umin;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
if (udiff != DIFF)
|
||||
abort ();
|
||||
if (umax != max_result)
|
||||
abort ();
|
||||
if (umin != 0)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (100, 100);
|
||||
main1 (0, 15);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
@ -1,11 +1,10 @@
|
||||
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define DIFF 242
|
||||
#define DIFF 240
|
||||
|
||||
/* Test vectorization of reduction of signed-int. */
|
||||
|
||||
@ -14,7 +13,7 @@ int main1 (int x, int max_result)
|
||||
int i;
|
||||
int b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
int c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
int diff = 2;
|
||||
int diff = 0;
|
||||
int max = x;
|
||||
int min = 10;
|
||||
|
||||
@ -45,9 +44,10 @@ int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 (100, 100);
|
||||
return main1 (0, 15);
|
||||
main1 (100, 100);
|
||||
main1 (0, 15);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail {! vect_reduction} } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
51
gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
Normal file
51
gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
Normal file
@ -0,0 +1,51 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define DIFF 121
|
||||
|
||||
int main1 (char x, char max_result)
|
||||
{
|
||||
int i;
|
||||
char b[N] = {0,2,3,6,8,10,12,14,16,18,20,22,24,26,28,30};
|
||||
char c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
signed char diff = 2;
|
||||
char max = x;
|
||||
char min = 10;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
diff += (b[i] - c[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
max = max < c[i] ? c[i] : max;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
min = min > c[i] ? c[i] : min;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
if (diff != DIFF)
|
||||
abort ();
|
||||
if (max != max_result)
|
||||
abort ();
|
||||
if (min != 0)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (100, 100);
|
||||
main1 (0, 15);
|
||||
return 0 ;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
51
gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
Normal file
51
gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
Normal file
@ -0,0 +1,51 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include <stdio.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define DIFF 242
|
||||
|
||||
int main1 (short x, short max_result)
|
||||
{
|
||||
int i;
|
||||
short b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
short c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
short diff = 2;
|
||||
short max = x;
|
||||
short min = 10;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
diff += (b[i] - c[i]);
|
||||
}
|
||||
for (i = 0; i < N; i++) {
|
||||
max = max < c[i] ? c[i] : max;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
min = min > c[i] ? c[i] : min;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
if (diff != DIFF)
|
||||
abort ();
|
||||
if (max != max_result)
|
||||
abort ();
|
||||
if (min != 0)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (100, 100);
|
||||
main1 (0, 15);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
@ -4,12 +4,11 @@
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define DIFF 240
|
||||
|
||||
/* Test vectorization of reduction of unsigned-int in the presence
|
||||
of unknown-loop-bound. */
|
||||
|
||||
int main1 (int n)
|
||||
int main1 (int n, int res)
|
||||
{
|
||||
int i;
|
||||
unsigned int ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
@ -22,7 +21,7 @@ int main1 (int n)
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
if (udiff != DIFF)
|
||||
if (udiff != res)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
@ -32,9 +31,10 @@ int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 (N);
|
||||
return main1 (N-1);
|
||||
main1 (N, 240);
|
||||
main1 (N-1, 210);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail {! vect_reduction} } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
51
gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
Normal file
51
gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
Normal file
@ -0,0 +1,51 @@
|
||||
/* { dg-require-effective-target vect_float } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define DIFF 242
|
||||
|
||||
int main1 (float x, float max_result)
|
||||
{
|
||||
int i;
|
||||
float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
float diff = 2;
|
||||
float max = x;
|
||||
float min = 10;
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
diff += (b[i] - c[i]);
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
max = max < c[i] ? c[i] : max;
|
||||
}
|
||||
|
||||
for (i = 0; i < N; i++) {
|
||||
min = min > c[i] ? c[i] : min;
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
if (diff != DIFF)
|
||||
abort ();
|
||||
if (max != max_result)
|
||||
abort ();
|
||||
if (min != 0)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (100 ,100);
|
||||
main1 (0, 15);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* need -ffast-math to vectorizer these loops. */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
|
@ -76,7 +76,25 @@ if [istarget "powerpc*-*-*"] {
|
||||
dg-init
|
||||
|
||||
# Main loop.
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]] \
|
||||
"" $DEFAULT_VECTCFLAGS
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]] \
|
||||
"" $DEFAULT_VECTCFLAGS
|
||||
|
||||
#### Tests with special options
|
||||
global SAVED_DEFAULT_VECTCFLAGS
|
||||
set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS
|
||||
|
||||
# -ffast-math tests
|
||||
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
|
||||
lappend DEFAULT_VECTCFLAGS "-ffast-math"
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-vect*.\[cS\]]] \
|
||||
"" $DEFAULT_VECTCFLAGS
|
||||
|
||||
# -ftrapv tests
|
||||
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
|
||||
lappend DEFAULT_VECTCFLAGS "-ftrapv"
|
||||
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/trapv-vect*.\[cS\]]] \
|
||||
"" $DEFAULT_VECTCFLAGS
|
||||
|
||||
# Clean up.
|
||||
|
@ -988,23 +988,6 @@ proc check_effective_target_vect_int_mult { } {
|
||||
return $et_vect_int_mult_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target supports vector reduction
|
||||
|
||||
proc check_effective_target_vect_reduction { } {
|
||||
global et_vect_reduction_saved
|
||||
|
||||
if [info exists et_vect_reduction_saved] {
|
||||
verbose "check_effective_target_vect_reduction: using cached result" 2
|
||||
} else {
|
||||
set et_vect_reduction_saved 0
|
||||
if { [istarget powerpc*-*-*] } {
|
||||
set et_vect_reduction_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_reduction: returning $et_vect_reduction_saved" 2
|
||||
return $et_vect_reduction_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target supports atomic operations on "int" and "long".
|
||||
|
||||
proc check_effective_target_sync_int_long { } {
|
||||
|
@ -1692,6 +1692,8 @@ estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data)
|
||||
case RSHIFT_EXPR:
|
||||
case LROTATE_EXPR:
|
||||
case RROTATE_EXPR:
|
||||
case VEC_LSHIFT_EXPR:
|
||||
case VEC_RSHIFT_EXPR:
|
||||
|
||||
case BIT_IOR_EXPR:
|
||||
case BIT_XOR_EXPR:
|
||||
|
@ -1043,6 +1043,8 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
|
||||
case RSHIFT_EXPR:
|
||||
case LROTATE_EXPR:
|
||||
case RROTATE_EXPR:
|
||||
case VEC_LSHIFT_EXPR:
|
||||
case VEC_RSHIFT_EXPR:
|
||||
case BIT_IOR_EXPR:
|
||||
case BIT_XOR_EXPR:
|
||||
case BIT_AND_EXPR:
|
||||
@ -1838,6 +1840,8 @@ op_prio (tree op)
|
||||
case REDUC_MAX_EXPR:
|
||||
case REDUC_MIN_EXPR:
|
||||
case REDUC_PLUS_EXPR:
|
||||
case VEC_LSHIFT_EXPR:
|
||||
case VEC_RSHIFT_EXPR:
|
||||
return 16;
|
||||
|
||||
case SAVE_EXPR:
|
||||
@ -1925,6 +1929,12 @@ op_symbol (tree op)
|
||||
case RSHIFT_EXPR:
|
||||
return ">>";
|
||||
|
||||
case VEC_LSHIFT_EXPR:
|
||||
return "v<<";
|
||||
|
||||
case VEC_RSHIFT_EXPR:
|
||||
return "v>>";
|
||||
|
||||
case PLUS_EXPR:
|
||||
return "+";
|
||||
|
||||
|
@ -448,6 +448,7 @@ expand_vector_operations_1 (block_stmt_iterator *bsi)
|
||||
compute_type = TREE_TYPE (type);
|
||||
}
|
||||
|
||||
gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
|
||||
rhs = expand_vector_operation (bsi, type, compute_type, rhs, code);
|
||||
if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
|
||||
*p_rhs = rhs;
|
||||
|
@ -834,6 +834,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
|
||||
enum machine_mode mode = TYPE_MODE (vectype);
|
||||
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
|
||||
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
basic_block exit_bb;
|
||||
@ -843,15 +844,18 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
|
||||
block_stmt_iterator exit_bsi;
|
||||
tree vec_dest;
|
||||
tree new_temp;
|
||||
tree new_name;
|
||||
tree epilog_stmt;
|
||||
tree new_scalar_dest, exit_phi;
|
||||
tree bitsize, bitpos;
|
||||
tree bitsize, bitpos, bytesize;
|
||||
enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
|
||||
tree scalar_initial_def;
|
||||
tree vec_initial_def;
|
||||
tree orig_name;
|
||||
imm_use_iterator imm_iter;
|
||||
use_operand_p use_p;
|
||||
bool extract_scalar_result;
|
||||
bool adjust_in_epilog;
|
||||
|
||||
/*** 1. Create the reduction def-use cycle ***/
|
||||
|
||||
@ -888,63 +892,214 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
|
||||
exit_bsi = bsi_start (exit_bb);
|
||||
|
||||
|
||||
/* 2.2 Create:
|
||||
v_out2 = reduc_expr <v_out1>
|
||||
s_out3 = extract_field <v_out2, 0> */
|
||||
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
|
||||
build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
|
||||
new_temp = make_ssa_name (vec_dest, epilog_stmt);
|
||||
TREE_OPERAND (epilog_stmt, 0) = new_temp;
|
||||
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
{
|
||||
fprintf (vect_dump, "transform reduction: created epilog code:");
|
||||
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
|
||||
}
|
||||
|
||||
new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
|
||||
bitsize = TYPE_SIZE (scalar_type);
|
||||
bytesize = TYPE_SIZE_UNIT (scalar_type);
|
||||
|
||||
/* The result is in the low order bits. */
|
||||
if (BITS_BIG_ENDIAN)
|
||||
bitpos = size_binop (MULT_EXPR,
|
||||
bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
|
||||
TYPE_SIZE (scalar_type));
|
||||
/* 2.2 Create the reduction code. */
|
||||
|
||||
if (reduc_code < NUM_TREE_CODES)
|
||||
{
|
||||
/*** Case 1: Create:
|
||||
v_out2 = reduc_expr <v_out1> */
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
fprintf (vect_dump, "Reduce using direct vector reduction.");
|
||||
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
|
||||
build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
|
||||
new_temp = make_ssa_name (vec_dest, epilog_stmt);
|
||||
TREE_OPERAND (epilog_stmt, 0) = new_temp;
|
||||
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
|
||||
|
||||
extract_scalar_result = true;
|
||||
adjust_in_epilog = true;
|
||||
}
|
||||
else
|
||||
bitpos = bitsize_zero_node;
|
||||
{
|
||||
enum tree_code shift_code;
|
||||
bool have_whole_vector_shift = true;
|
||||
enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
|
||||
int bit_offset;
|
||||
int element_bitsize = tree_low_cst (bitsize, 1);
|
||||
int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
|
||||
tree vec_temp;
|
||||
|
||||
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
|
||||
build3 (BIT_FIELD_REF, scalar_type,
|
||||
new_temp, bitsize, bitpos));
|
||||
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
TREE_OPERAND (epilog_stmt, 0) = new_temp;
|
||||
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
|
||||
/* The result of the reduction is expected to be at the LSB bits
|
||||
of the vector. For big-endian targets this means at the right
|
||||
end of the vector. For little-edian targets this means at the
|
||||
left end of the vector. */
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
|
||||
if (BITS_BIG_ENDIAN
|
||||
&& vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
|
||||
shift_code = VEC_RSHIFT_EXPR;
|
||||
else if (!BITS_BIG_ENDIAN
|
||||
&& vec_shl_optab->handlers[mode].insn_code != CODE_FOR_nothing)
|
||||
shift_code = VEC_LSHIFT_EXPR;
|
||||
else
|
||||
have_whole_vector_shift = false;
|
||||
|
||||
if (have_whole_vector_shift)
|
||||
{
|
||||
/*** Case 2:
|
||||
for (offset = VS/2; offset >= element_size; offset/=2)
|
||||
{
|
||||
Create: va' = vec_shift <va, offset>
|
||||
Create: va = vop <va, va'>
|
||||
} */
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
fprintf (vect_dump, "Reduce using vector shifts");
|
||||
|
||||
vec_dest = vect_create_destination_var (scalar_dest, vectype);
|
||||
new_temp = PHI_RESULT (new_phi);
|
||||
|
||||
for (bit_offset = vec_size_in_bits/2;
|
||||
bit_offset >= element_bitsize;
|
||||
bit_offset /= 2)
|
||||
{
|
||||
tree bitpos = size_int (bit_offset);
|
||||
|
||||
epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
|
||||
build2 (shift_code, vectype, new_temp, bitpos));
|
||||
new_name = make_ssa_name (vec_dest, epilog_stmt);
|
||||
TREE_OPERAND (epilog_stmt, 0) = new_name;
|
||||
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
|
||||
|
||||
|
||||
epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
|
||||
build2 (code, vectype, new_name, new_temp));
|
||||
new_temp = make_ssa_name (vec_dest, epilog_stmt);
|
||||
TREE_OPERAND (epilog_stmt, 0) = new_temp;
|
||||
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
|
||||
}
|
||||
|
||||
extract_scalar_result = true;
|
||||
adjust_in_epilog = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
/*** Case 3:
|
||||
Create: s = init;
|
||||
for (offset=0; offset<vector_size; offset+=element_size;)
|
||||
{
|
||||
Create: s' = extract_field <v_out2, offset>
|
||||
Create: s = op <s, s'>
|
||||
} */
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
fprintf (vect_dump, "Reduce using scalar code. ");
|
||||
|
||||
vec_temp = PHI_RESULT (new_phi);
|
||||
vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
|
||||
|
||||
/* first iteration is peeled out when possible to minimize
|
||||
the number of operations we generate: */
|
||||
if (code == PLUS_EXPR
|
||||
&& (integer_zerop (scalar_initial_def)
|
||||
|| real_zerop (scalar_initial_def)))
|
||||
{
|
||||
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
|
||||
build3 (BIT_FIELD_REF, scalar_type,
|
||||
vec_temp, bitsize, bitsize_zero_node));
|
||||
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
TREE_OPERAND (epilog_stmt, 0) = new_temp;
|
||||
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
|
||||
|
||||
bit_offset = element_bitsize;
|
||||
}
|
||||
else
|
||||
{
|
||||
new_temp = scalar_initial_def;
|
||||
bit_offset = 0;
|
||||
}
|
||||
|
||||
for (;
|
||||
bit_offset < vec_size_in_bits;
|
||||
bit_offset += element_bitsize)
|
||||
{
|
||||
tree bitpos = bitsize_int (bit_offset);
|
||||
|
||||
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
|
||||
build3 (BIT_FIELD_REF, scalar_type,
|
||||
vec_temp, bitsize, bitpos));
|
||||
new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
TREE_OPERAND (epilog_stmt, 0) = new_name;
|
||||
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
|
||||
|
||||
|
||||
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
|
||||
build2 (code, scalar_type, new_name, new_temp));
|
||||
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
TREE_OPERAND (epilog_stmt, 0) = new_temp;
|
||||
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
|
||||
}
|
||||
|
||||
extract_scalar_result = false;
|
||||
adjust_in_epilog = false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/* 2.3 Extract the final scalar result. Create:
|
||||
s_out3 = extract_field <v_out2, bitpos> */
|
||||
|
||||
/* 2.3 Adjust the final result by the initial value of the reduction
|
||||
variable. (when such adjustment is not needed, then
|
||||
'scalar_initial_def' is zero).
|
||||
if (extract_scalar_result)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
fprintf (vect_dump, "extract scalar result");
|
||||
|
||||
Create:
|
||||
s_out = scalar_expr <s_out, scalar_initial_def> */
|
||||
/* The result is in the low order bits. */
|
||||
if (BITS_BIG_ENDIAN)
|
||||
bitpos = size_binop (MULT_EXPR,
|
||||
bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
|
||||
TYPE_SIZE (scalar_type));
|
||||
else
|
||||
bitpos = bitsize_zero_node;
|
||||
|
||||
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
|
||||
build2 (code, scalar_type, new_temp, scalar_initial_def));
|
||||
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
TREE_OPERAND (epilog_stmt, 0) = new_temp;
|
||||
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
|
||||
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
|
||||
build3 (BIT_FIELD_REF, scalar_type,
|
||||
new_temp, bitsize, bitpos));
|
||||
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
TREE_OPERAND (epilog_stmt, 0) = new_temp;
|
||||
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
|
||||
}
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
|
||||
|
||||
|
||||
/* 2.4 Replace uses of s_out0 with uses of s_out3 */
|
||||
/* 2.4 Adjust the final result by the initial value of the reduction
|
||||
variable. (when such adjustment is not needed, then
|
||||
'scalar_initial_def' is zero).
|
||||
|
||||
Create:
|
||||
s_out = scalar_expr <s_out, scalar_initial_def> */
|
||||
|
||||
if (adjust_in_epilog)
|
||||
{
|
||||
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
|
||||
build2 (code, scalar_type, new_temp, scalar_initial_def));
|
||||
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
|
||||
TREE_OPERAND (epilog_stmt, 0) = new_temp;
|
||||
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
|
||||
}
|
||||
|
||||
|
||||
/* 2.5 Replace uses of s_out0 with uses of s_out3 */
|
||||
|
||||
/* Find the loop-closed-use at the loop exit of the original
|
||||
scalar result. (The reduction result is expected to have
|
||||
@ -954,10 +1109,10 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
|
||||
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
|
||||
{
|
||||
if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
|
||||
{
|
||||
exit_phi = USE_STMT (use_p);
|
||||
break;
|
||||
}
|
||||
{
|
||||
exit_phi = USE_STMT (use_p);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
orig_name = PHI_RESULT (exit_phi);
|
||||
@ -1067,13 +1222,13 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
fprintf (vect_dump, "no optab for reduction.");
|
||||
return false;
|
||||
reduc_code = NUM_TREE_CODES;
|
||||
}
|
||||
if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
|
||||
fprintf (vect_dump, "op not supported by target.");
|
||||
return false;
|
||||
fprintf (vect_dump, "reduc op not supported by target.");
|
||||
reduc_code = NUM_TREE_CODES;
|
||||
}
|
||||
|
||||
if (!vec_stmt) /* transformation not required. */
|
||||
|
@ -957,6 +957,12 @@ DEFTREECODE (REDUC_MAX_EXPR, "reduc_max_expr", tcc_unary, 1)
|
||||
DEFTREECODE (REDUC_MIN_EXPR, "reduc_min_expr", tcc_unary, 1)
|
||||
DEFTREECODE (REDUC_PLUS_EXPR, "reduc_plus_expr", tcc_unary, 1)
|
||||
|
||||
/* Whole vector lesft/right shift in bytes.
|
||||
Operand 0 is a vector to be shifted.
|
||||
Operand 1 is an integer shift amount in bits. */
|
||||
DEFTREECODE (VEC_LSHIFT_EXPR, "vec_lshift_expr", tcc_binary, 2)
|
||||
DEFTREECODE (VEC_RSHIFT_EXPR, "vec_rshift_expr", tcc_binary, 2)
|
||||
|
||||
/*
|
||||
Local variables:
|
||||
mode:c
|
||||
|
Loading…
Reference in New Issue
Block a user