genopinit.c (vec_shl_optab, [...]): Initialize new optabs.

* genopinit.c (vec_shl_optab, vec_shr_optab): Initialize new optabs.
        (reduc_plus_optab): Removed.  Replcaed with...
        (reduc_splus_optab, reduc_uplus_optab): Initialize new optabs.
        * optabs.c (optab_for_tree_code): Return reduc_splus_optab or
        reduc_uplus_optab instead of reduc_plus_optab.
        (expand_vec_shift_expr): New function.
        (init_optabs): Initialize new optabs. Remove initialization of
        reduc_plus_optab.
        (optab_for_tree_code): Return vec_shl_optab/vec_shr_optab
        for VEC_LSHIFT_EXPR/VEC_RSHIFT_EXPR.
        * optabs.h (OTI_reduc_plus): Removed. Replaced with...
        (OTI_reduc_splus, OTI_reduc_uplus): New.
        (reduc_plus_optab): Removed.  Replcaed with...
        (reduc_splus_optab, reduc_uplus_optab): New optabs.
        (vec_shl_optab, vec_shr_optab): New optabs.
        (expand_vec_shift_expr): New function declaration.

        * tree.def (VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR): New tree-codes.
        * tree-inline.c (estimate_num_insns_1): Handle new tree-codes.
        * expr.c (expand_expr_real_1): Handle new tree-codes.
        * tree-pretty-print.c (dump_generic_node, op_symbol, op_prio): Likewise.
        * tree-vect-generic.c (expand_vector_operations_1): Add assert.

        * tree-vect-transform.c (vect_create_epilog_for_reduction): Add two
        alternatives for generating reduction epilog code.
        (vectorizable_reduction): Don't fail of direct reduction support is
        not available.
        (vectorizable_target_reduction_pattern): Likewise.

        * config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf,
        reduc_umax_v4si, reduc_smin_v4si, reduc_smin_v4sf, reduc_umin_v4si,
        reduc_plus_v4si, reduc_plus_v4sf): Removed.
        (vec_shl_<mode>, vec_shr_<mode>, altivec_vsumsws_nomode,
        reduc_splus_<mode>, reduc_uplus_v16qi): New.

From-SVN: r101231
This commit is contained in:
Dorit Nuzman 2005-06-21 09:02:00 +00:00 committed by Dorit Nuzman
parent a3a2067ac5
commit a6b46ba2c8
25 changed files with 886 additions and 219 deletions

View File

@ -1,3 +1,40 @@
2005-06-21 Dorit Nuzman <dorit@il.ibm.com>
* genopinit.c (vec_shl_optab, vec_shr_optab): Initialize new optabs.
(reduc_plus_optab): Removed. Replcaed with...
(reduc_splus_optab, reduc_uplus_optab): Initialize new optabs.
* optabs.c (optab_for_tree_code): Return reduc_splus_optab or
reduc_uplus_optab instead of reduc_plus_optab.
(expand_vec_shift_expr): New function.
(init_optabs): Initialize new optabs. Remove initialization of
reduc_plus_optab.
(optab_for_tree_code): Return vec_shl_optab/vec_shr_optab
for VEC_LSHIFT_EXPR/VEC_RSHIFT_EXPR.
* optabs.h (OTI_reduc_plus): Removed. Replaced with...
(OTI_reduc_splus, OTI_reduc_uplus): New.
(reduc_plus_optab): Removed. Replcaed with...
(reduc_splus_optab, reduc_uplus_optab): New optabs.
(vec_shl_optab, vec_shr_optab): New optabs.
(expand_vec_shift_expr): New function declaration.
* tree.def (VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR): New tree-codes.
* tree-inline.c (estimate_num_insns_1): Handle new tree-codes.
* expr.c (expand_expr_real_1): Handle new tree-codes.
* tree-pretty-print.c (dump_generic_node, op_symbol, op_prio): Likewise.
* tree-vect-generic.c (expand_vector_operations_1): Add assert.
* tree-vect-transform.c (vect_create_epilog_for_reduction): Add two
alternatives for generating reduction epilog code.
(vectorizable_reduction): Don't fail of direct reduction support is
not available.
(vectorizable_target_reduction_pattern): Likewise.
* config/rs6000/altivec.md (reduc_smax_v4si, reduc_smax_v4sf,
reduc_umax_v4si, reduc_smin_v4si, reduc_smin_v4sf, reduc_umin_v4si,
reduc_plus_v4si, reduc_plus_v4sf): Removed.
(vec_shl_<mode>, vec_shr_<mode>, altivec_vsumsws_nomode,
reduc_splus_<mode>, reduc_uplus_v16qi): New.
2005-06-20 Daniel Berlin <dberlin@dberlin.org>
* c-typeck.c (build_function_call): Set fundecl = function again.

View File

@ -1825,157 +1825,100 @@
operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
})
;; Reduction
(define_expand "reduc_smax_v4si"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
"TARGET_ALTIVEC"
"
{
rtx vtmp1 = gen_reg_rtx (V4SImode);
rtx vtmp2 = gen_reg_rtx (V4SImode);
rtx vtmp3 = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
gen_rtx_CONST_INT (SImode, 8)));
emit_insn (gen_smaxv4si3 (vtmp2, operands[1], vtmp1));
emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
gen_rtx_CONST_INT (SImode, 4)));
emit_insn (gen_smaxv4si3 (operands[0], vtmp2, vtmp3));
DONE;
}")
(define_expand "reduc_smax_v4sf"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
"TARGET_ALTIVEC"
"
{
rtx vtmp1 = gen_reg_rtx (V4SFmode);
rtx vtmp2 = gen_reg_rtx (V4SFmode);
rtx vtmp3 = gen_reg_rtx (V4SFmode);
emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
gen_rtx_CONST_INT (SImode, 8)));
emit_insn (gen_smaxv4sf3 (vtmp2, operands[1], vtmp1));
emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
gen_rtx_CONST_INT (SImode, 4)));
emit_insn (gen_smaxv4sf3 (operands[0], vtmp2, vtmp3));
DONE;
}")
(define_expand "reduc_umax_v4si"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
"TARGET_ALTIVEC"
"
{
rtx vtmp1 = gen_reg_rtx (V4SImode);
rtx vtmp2 = gen_reg_rtx (V4SImode);
rtx vtmp3 = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
gen_rtx_CONST_INT (SImode, 8)));
emit_insn (gen_umaxv4si3 (vtmp2, operands[1], vtmp1));
emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
gen_rtx_CONST_INT (SImode, 4)));
emit_insn (gen_umaxv4si3 (operands[0], vtmp2, vtmp3));
DONE;
}")
(define_expand "reduc_smin_v4si"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
"TARGET_ALTIVEC"
"
{
rtx vtmp1 = gen_reg_rtx (V4SImode);
rtx vtmp2 = gen_reg_rtx (V4SImode);
rtx vtmp3 = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
gen_rtx_CONST_INT (SImode, 8)));
emit_insn (gen_sminv4si3 (vtmp2, operands[1], vtmp1));
emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
gen_rtx_CONST_INT (SImode, 4)));
emit_insn (gen_sminv4si3 (operands[0], vtmp2, vtmp3));
DONE;
}")
(define_expand "reduc_smin_v4sf"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
;; Vector shift left in bits. Currently supported ony for shift
;; amounts that can be expressed as byte shifts (divisible by 8).
;; General shift amounts can be supported using vslo + vsl. We're
;; not expecting to see these yet (the vectorizer currently
;; generates only shifts divisible by byte_size).
(define_expand "vec_shl_<mode>"
[(set (match_operand:V 0 "register_operand" "=v")
(unspec:V [(match_operand:V 1 "register_operand" "v")
(match_operand:QI 2 "reg_or_short_operand" "")] 219 ))]
"TARGET_ALTIVEC"
"
{
rtx vtmp1 = gen_reg_rtx (V4SFmode);
rtx vtmp2 = gen_reg_rtx (V4SFmode);
rtx vtmp3 = gen_reg_rtx (V4SFmode);
rtx bitshift = operands[2];
rtx byteshift = gen_reg_rtx (QImode);
HOST_WIDE_INT bitshift_val;
HOST_WIDE_INT byteshift_val;
emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
gen_rtx_CONST_INT (SImode, 8)));
emit_insn (gen_sminv4sf3 (vtmp2, operands[1], vtmp1));
emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
gen_rtx_CONST_INT (SImode, 4)));
emit_insn (gen_sminv4sf3 (operands[0], vtmp2, vtmp3));
if (! CONSTANT_P (bitshift))
FAIL;
bitshift_val = INTVAL (bitshift);
if (bitshift_val & 0x7)
FAIL;
byteshift_val = bitshift_val >> 3;
byteshift = gen_rtx_CONST_INT (QImode, byteshift_val);
emit_insn (gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
byteshift));
DONE;
}")
(define_expand "reduc_umin_v4si"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
;; Vector shift left in bits. Currently supported ony for shift
;; amounts that can be expressed as byte shifts (divisible by 8).
;; General shift amounts can be supported using vsro + vsr. We're
;; not expecting to see these yet (the vectorizer currently
;; generates only shifts divisible by byte_size).
(define_expand "vec_shr_<mode>"
[(set (match_operand:V 0 "register_operand" "=v")
(unspec:V [(match_operand:V 1 "register_operand" "v")
(match_operand:QI 2 "reg_or_short_operand" "")] 219 ))]
"TARGET_ALTIVEC"
"
{
rtx vtmp1 = gen_reg_rtx (V4SImode);
rtx vtmp2 = gen_reg_rtx (V4SImode);
rtx vtmp3 = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
gen_rtx_CONST_INT (SImode, 8)));
emit_insn (gen_uminv4si3 (vtmp2, operands[1], vtmp1));
emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
gen_rtx_CONST_INT (SImode, 4)));
emit_insn (gen_uminv4si3 (operands[0], vtmp2, vtmp3));
rtx bitshift = operands[2];
rtx byteshift = gen_reg_rtx (QImode);
HOST_WIDE_INT bitshift_val;
HOST_WIDE_INT byteshift_val;
if (! CONSTANT_P (bitshift))
FAIL;
bitshift_val = INTVAL (bitshift);
if (bitshift_val & 0x7)
FAIL;
byteshift_val = 16 - (bitshift_val >> 3);
byteshift = gen_rtx_CONST_INT (QImode, byteshift_val);
emit_insn (gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
byteshift));
DONE;
}")
(define_expand "reduc_plus_v4si"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")] 217))]
(define_insn "altivec_vsumsws_nomode"
[(set (match_operand 0 "register_operand" "=v")
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
(match_operand:V4SI 2 "register_operand" "v")] 135))
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
"TARGET_ALTIVEC"
"vsumsws %0,%1,%2"
[(set_attr "type" "veccomplex")])
(define_expand "reduc_splus_<mode>"
[(set (match_operand:VIshort 0 "register_operand" "=v")
(unspec:VIshort [(match_operand:VIshort 1 "register_operand" "v")] 217))]
"TARGET_ALTIVEC"
"
{
rtx vzero = gen_reg_rtx (V4SImode);
rtx vtmp1 = gen_reg_rtx (V4SImode);
rtx vtmp2 = gen_reg_rtx (V4SImode);
rtx vtmp3 = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vsldoi_v4si (vtmp1, operands[1], operands[1],
gen_rtx_CONST_INT (SImode, 8)));
emit_insn (gen_addv4si3 (vtmp2, operands[1], vtmp1));
emit_insn (gen_altivec_vsldoi_v4si (vtmp3, vtmp2, vtmp2,
gen_rtx_CONST_INT (SImode, 4)));
emit_insn (gen_addv4si3 (operands[0], vtmp2, vtmp3));
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
emit_insn (gen_altivec_vsum4s<VI_char>s (vtmp1, operands[1], vzero));
emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero));
DONE;
}")
(define_expand "reduc_plus_v4sf"
[(set (match_operand:V4SF 0 "register_operand" "=v")
(unspec:V4SF [(match_operand:V4SF 1 "register_operand" "v")] 217))]
(define_expand "reduc_uplus_v16qi"
[(set (match_operand:V16QI 0 "register_operand" "=v")
(unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")] 217))]
"TARGET_ALTIVEC"
"
{
rtx vtmp1 = gen_reg_rtx (V4SFmode);
rtx vtmp2 = gen_reg_rtx (V4SFmode);
rtx vtmp3 = gen_reg_rtx (V4SFmode);
{
rtx vzero = gen_reg_rtx (V4SImode);
rtx vtmp1 = gen_reg_rtx (V4SImode);
emit_insn (gen_altivec_vsldoi_v4sf (vtmp1, operands[1], operands[1],
gen_rtx_CONST_INT (SImode, 8)));
emit_insn (gen_addv4sf3 (vtmp2, operands[1], vtmp1));
emit_insn (gen_altivec_vsldoi_v4sf (vtmp3, vtmp2, vtmp2,
gen_rtx_CONST_INT (SImode, 4)));
emit_insn (gen_addv4sf3 (operands[0], vtmp2, vtmp3));
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
emit_insn (gen_altivec_vsum4ubs (vtmp1, operands[1], vzero));
emit_insn (gen_altivec_vsumsws_nomode (operands[0], vtmp1, vzero));
DONE;
}")

View File

@ -8367,6 +8367,13 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
return temp;
}
case VEC_LSHIFT_EXPR:
case VEC_RSHIFT_EXPR:
{
target = expand_vec_shift_expr (exp, target);
return target;
}
default:
return lang_hooks.expand_expr (exp, original_target, tmode,
modifier, alt_rtl);

View File

@ -196,6 +196,8 @@ static const char * const optabs[] =
"vec_set_optab->handlers[$A].insn_code = CODE_FOR_$(vec_set$a$)",
"vec_extract_optab->handlers[$A].insn_code = CODE_FOR_$(vec_extract$a$)",
"vec_init_optab->handlers[$A].insn_code = CODE_FOR_$(vec_init$a$)",
"vec_shl_optab->handlers[$A].insn_code = CODE_FOR_$(vec_shl_$a$)",
"vec_shr_optab->handlers[$A].insn_code = CODE_FOR_$(vec_shr_$a$)",
"vec_realign_load_optab->handlers[$A].insn_code = CODE_FOR_$(vec_realign_load_$a$)",
"vcond_gen_code[$A] = CODE_FOR_$(vcond$a$)",
"vcondu_gen_code[$A] = CODE_FOR_$(vcondu$a$)",
@ -203,7 +205,8 @@ static const char * const optabs[] =
"reduc_umax_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_umax_$a$)",
"reduc_smin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_smin_$a$)",
"reduc_umin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_umin_$a$)",
"reduc_plus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_plus_$a$)"
"reduc_splus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_splus_$a$)" ,
"reduc_uplus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_uplus_$a$)"
};
static void gen_insn (rtx);

View File

@ -301,7 +301,13 @@ optab_for_tree_code (enum tree_code code, tree type)
return TYPE_UNSIGNED (type) ? reduc_umin_optab : reduc_smin_optab;
case REDUC_PLUS_EXPR:
return reduc_plus_optab;
return TYPE_UNSIGNED (type) ? reduc_uplus_optab : reduc_splus_optab;
case VEC_LSHIFT_EXPR:
return vec_shl_optab;
case VEC_RSHIFT_EXPR:
return vec_shr_optab;
default:
break;
@ -443,6 +449,61 @@ force_expand_binop (enum machine_mode mode, optab binoptab,
return true;
}
/* Generate insns for VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR. */
rtx
expand_vec_shift_expr (tree vec_shift_expr, rtx target)
{
enum insn_code icode;
rtx rtx_op1, rtx_op2;
enum machine_mode mode1;
enum machine_mode mode2;
enum machine_mode mode = TYPE_MODE (TREE_TYPE (vec_shift_expr));
tree vec_oprnd = TREE_OPERAND (vec_shift_expr, 0);
tree shift_oprnd = TREE_OPERAND (vec_shift_expr, 1);
optab shift_optab;
rtx pat;
switch (TREE_CODE (vec_shift_expr))
{
case VEC_RSHIFT_EXPR:
shift_optab = vec_shr_optab;
break;
case VEC_LSHIFT_EXPR:
shift_optab = vec_shl_optab;
break;
default:
gcc_unreachable ();
}
icode = (int) shift_optab->handlers[(int) mode].insn_code;
gcc_assert (icode != CODE_FOR_nothing);
mode1 = insn_data[icode].operand[1].mode;
mode2 = insn_data[icode].operand[2].mode;
rtx_op1 = expand_expr (vec_oprnd, NULL_RTX, VOIDmode, EXPAND_NORMAL);
if (!(*insn_data[icode].operand[1].predicate) (rtx_op1, mode1)
&& mode1 != VOIDmode)
rtx_op1 = force_reg (mode1, rtx_op1);
rtx_op2 = expand_expr (shift_oprnd, NULL_RTX, VOIDmode, EXPAND_NORMAL);
if (!(*insn_data[icode].operand[2].predicate) (rtx_op2, mode2)
&& mode2 != VOIDmode)
rtx_op2 = force_reg (mode2, rtx_op2);
if (!target
|| ! (*insn_data[icode].operand[0].predicate) (target, mode))
target = gen_reg_rtx (mode);
/* Emit instruction */
pat = GEN_FCN (icode) (target, rtx_op1, rtx_op2);
gcc_assert (pat);
emit_insn (pat);
return target;
}
/* This subroutine of expand_doubleword_shift handles the cases in which
the effective shift value is >= BITS_PER_WORD. The arguments and return
value are the same as for the parent routine, except that SUPERWORD_OP1
@ -5074,11 +5135,14 @@ init_optabs (void)
reduc_umax_optab = init_optab (UNKNOWN);
reduc_smin_optab = init_optab (UNKNOWN);
reduc_umin_optab = init_optab (UNKNOWN);
reduc_plus_optab = init_optab (UNKNOWN);
reduc_splus_optab = init_optab (UNKNOWN);
reduc_uplus_optab = init_optab (UNKNOWN);
vec_extract_optab = init_optab (UNKNOWN);
vec_set_optab = init_optab (UNKNOWN);
vec_init_optab = init_optab (UNKNOWN);
vec_shl_optab = init_optab (UNKNOWN);
vec_shr_optab = init_optab (UNKNOWN);
vec_realign_load_optab = init_optab (UNKNOWN);
movmisalign_optab = init_optab (UNKNOWN);

View File

@ -236,7 +236,8 @@ enum optab_index
OTI_reduc_umax,
OTI_reduc_smin,
OTI_reduc_umin,
OTI_reduc_plus,
OTI_reduc_splus,
OTI_reduc_uplus,
/* Set specified field of vector operand. */
OTI_vec_set,
@ -244,6 +245,9 @@ enum optab_index
OTI_vec_extract,
/* Initialize vector operand. */
OTI_vec_init,
/* Whole vector shift. The shift amount is in bits. */
OTI_vec_shl,
OTI_vec_shr,
/* Extract specified elements from vectors, for vector load. */
OTI_vec_realign_load,
@ -358,11 +362,14 @@ extern GTY(()) optab optab_table[OTI_MAX];
#define reduc_umax_optab (optab_table[OTI_reduc_umax])
#define reduc_smin_optab (optab_table[OTI_reduc_smin])
#define reduc_umin_optab (optab_table[OTI_reduc_umin])
#define reduc_plus_optab (optab_table[OTI_reduc_plus])
#define reduc_splus_optab (optab_table[OTI_reduc_splus])
#define reduc_uplus_optab (optab_table[OTI_reduc_uplus])
#define vec_set_optab (optab_table[OTI_vec_set])
#define vec_extract_optab (optab_table[OTI_vec_extract])
#define vec_init_optab (optab_table[OTI_vec_init])
#define vec_shl_optab (optab_table[OTI_vec_shl])
#define vec_shr_optab (optab_table[OTI_vec_shr])
#define vec_realign_load_optab (optab_table[OTI_vec_realign_load])
#define powi_optab (optab_table[OTI_powi])
@ -575,4 +582,7 @@ bool expand_vec_cond_expr_p (tree, enum machine_mode);
/* Generate code for VEC_COND_EXPR. */
extern rtx expand_vec_cond_expr (tree, rtx);
/* Generate code for VEC_LSHIFT_EXPR and VEC_RSHIFT_EXPR. */
extern rtx expand_vec_shift_expr (tree, rtx);
#endif /* GCC_OPTABS_H */

View File

@ -1,3 +1,21 @@
2005-06-21 Dorit Nuzman <dorit@il.ibm.com>
* lib/target-supports.exp (check_effective_target_vect_reduction):
Remove.
* gcc.dg/vect/vect.exp: Run tests with additional flags separately.
* gcc.dg/vect/vect-reduc-1.c: Vectorizable on all relevant platforms -
remove vect_reduction target keyword. Also avoid two returns in main.
* gcc.dg/vect/vect-reduc-3.c: Likewise.
* gcc.dg/vect/vect-reduc-2.c: Likewise. Also initialize diff to 0.
* gcc.dg/vect/vect-reduc-1short.c: New test.
* gcc.dg/vect/vect-reduc-1char.c: New test.
* gcc.dg/vect/vect-reduc-2short.c: New test.
* gcc.dg/vect/vect-reduc-2char.c: New test.
* gcc.dg/vect/vect-reduc-6.c: New test.
* gcc.dg/vect/trapv-vect-reduc-4.c: New test.
* gcc.dg/vect/fast-math-vect-reduc-5.c: New test.
* gcc.dg/vect/fast-math-vect-reduc-7.c: New test
2005-06-21 Tobias Schl"uter <tobias.schlueter@physik.uni-muenchen.de>
Paul Thomas <pault@gcc.gnu.org>

View File

@ -0,0 +1,53 @@
/* { dg-require-effective-target vect_float } */
/* need -funsafe-math-optimizations to vectorize the summation.
also need -ffinite-math-only to create the min/max expr. */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
#define DIFF 242
int main1 (float x, float max_result)
{
int i;
float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
float diff = 2;
float max = x;
float min = 10;
for (i = 0; i < N; i++) {
diff += (b[i] - c[i]);
}
for (i = 0; i < N; i++) {
max = max < c[i] ? c[i] : max;
}
for (i = 0; i < N; i++) {
min = min > c[i] ? c[i] : min;
}
/* check results: */
if (diff != DIFF)
abort ();
if (max != max_result)
abort ();
if (min != 0)
abort ();
return 0;
}
int main (void)
{
check_vect ();
main1 (100, 100);
main1 (0, 15);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */

View File

@ -0,0 +1,53 @@
/* { dg-require-effective-target vect_double } */
/* need -funsafe-math-optimizations to vectorize the summation.
also need -ffinite-math-only to create the min/max expr. */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
#define DIFF 242
int main1 (double x, double max_result)
{
int i;
double b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
double c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
double diff = 2;
double max = x;
double min = 10;
for (i = 0; i < N; i++) {
diff += (b[i] - c[i]);
}
for (i = 0; i < N; i++) {
max = max < c[i] ? c[i] : max;
}
for (i = 0; i < N; i++) {
min = min > c[i] ? c[i] : min;
}
/* check results: */
if (diff != DIFF)
abort ();
if (max != max_result)
abort ();
if (min != 0)
abort ();
return 0;
}
int main (void)
{
check_vect ();
main1 (100, 100);
main1 (0, 15);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */

View File

@ -0,0 +1,49 @@
/* { dg-require-effective-target vect_int } */
/* { dg-do compile } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
#define DIFF 242
int main1 (int x, int max_result)
{
int i;
int b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
int c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
int diff = 2;
int max = x;
int min = 10;
for (i = 0; i < N; i++) {
diff += (b[i] - c[i]);
}
for (i = 0; i < N; i++) {
max = max < c[i] ? c[i] : max;
}
for (i = 0; i < N; i++) {
min = min > c[i] ? c[i] : min;
}
/* check results: */
if (diff != DIFF)
abort ();
if (max != max_result)
abort ();
if (min != 0)
abort ();
}
int main (void)
{
check_vect ();
main1 (100, 100);
main1 (0, 15);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */

View File

@ -47,9 +47,9 @@ int main (void)
{
check_vect ();
return main1 (100, 100);
return main1 (0, 15);
main1 (100, 100);
main1 (0, 15);
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail {! vect_reduction} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,51 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
#define DIFF 242
int main1 (unsigned char x, unsigned char max_result)
{
int i;
unsigned char ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
unsigned char uc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
unsigned char udiff = 2;
unsigned char umax = x;
unsigned char umin = 10;
for (i = 0; i < N; i++) {
udiff += (unsigned char)(ub[i] - uc[i]);
}
for (i = 0; i < N; i++) {
umax = umax < uc[i] ? uc[i] : umax;
}
for (i = 0; i < N; i++) {
umin = umin > uc[i] ? uc[i] : umin;
}
/* check results: */
if (udiff != DIFF)
abort ();
if (umax != max_result)
abort ();
if (umin != 0)
abort ();
return 0;
}
int main (void)
{
check_vect ();
main1 (100, 100);
main1 (0, 15);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,51 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
#define DIFF 242
int main1 (unsigned short x, unsigned short max_result)
{
int i;
unsigned short ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
unsigned short uc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
unsigned short udiff = 2;
unsigned short umax = x;
unsigned short umin = 10;
for (i = 0; i < N; i++) {
udiff += (unsigned short)(ub[i] - uc[i]);
}
for (i = 0; i < N; i++) {
umax = umax < uc[i] ? uc[i] : umax;
}
for (i = 0; i < N; i++) {
umin = umin > uc[i] ? uc[i] : umin;
}
/* check results: */
if (udiff != DIFF)
abort ();
if (umax != max_result)
abort ();
if (umin != 0)
abort ();
return 0;
}
int main (void)
{
check_vect ();
main1 (100, 100);
main1 (0, 15);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -1,11 +1,10 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
#define DIFF 242
#define DIFF 240
/* Test vectorization of reduction of signed-int. */
@ -14,7 +13,7 @@ int main1 (int x, int max_result)
int i;
int b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
int c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
int diff = 2;
int diff = 0;
int max = x;
int min = 10;
@ -45,9 +44,10 @@ int main (void)
{
check_vect ();
return main1 (100, 100);
return main1 (0, 15);
main1 (100, 100);
main1 (0, 15);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail {! vect_reduction} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,51 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
#define DIFF 121
int main1 (char x, char max_result)
{
int i;
char b[N] = {0,2,3,6,8,10,12,14,16,18,20,22,24,26,28,30};
char c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
signed char diff = 2;
char max = x;
char min = 10;
for (i = 0; i < N; i++) {
diff += (b[i] - c[i]);
}
for (i = 0; i < N; i++) {
max = max < c[i] ? c[i] : max;
}
for (i = 0; i < N; i++) {
min = min > c[i] ? c[i] : min;
}
/* check results: */
if (diff != DIFF)
abort ();
if (max != max_result)
abort ();
if (min != 0)
abort ();
return 0;
}
int main (void)
{
check_vect ();
main1 (100, 100);
main1 (0, 15);
return 0 ;
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail i?86-*-* x86_64-*-* } } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,51 @@
/* { dg-require-effective-target vect_int } */
#include <stdarg.h>
#include <stdio.h>
#include "tree-vect.h"
#define N 16
#define DIFF 242
int main1 (short x, short max_result)
{
int i;
short b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
short c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
short diff = 2;
short max = x;
short min = 10;
for (i = 0; i < N; i++) {
diff += (b[i] - c[i]);
}
for (i = 0; i < N; i++) {
max = max < c[i] ? c[i] : max;
}
for (i = 0; i < N; i++) {
min = min > c[i] ? c[i] : min;
}
/* check results: */
if (diff != DIFF)
abort ();
if (max != max_result)
abort ();
if (min != 0)
abort ();
return 0;
}
int main (void)
{
check_vect ();
main1 (100, 100);
main1 (0, 15);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -4,12 +4,11 @@
#include "tree-vect.h"
#define N 16
#define DIFF 240
/* Test vectorization of reduction of unsigned-int in the presence
of unknown-loop-bound. */
int main1 (int n)
int main1 (int n, int res)
{
int i;
unsigned int ub[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
@ -22,7 +21,7 @@ int main1 (int n)
}
/* check results: */
if (udiff != DIFF)
if (udiff != res)
abort ();
return 0;
@ -32,9 +31,10 @@ int main (void)
{
check_vect ();
return main1 (N);
return main1 (N-1);
main1 (N, 240);
main1 (N-1, 210);
return 0;
}
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail {! vect_reduction} } } } */
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
/* { dg-final { cleanup-tree-dump "vect" } } */

View File

@ -0,0 +1,51 @@
/* { dg-require-effective-target vect_float } */
#include <stdarg.h>
#include "tree-vect.h"
#define N 16
#define DIFF 242
int main1 (float x, float max_result)
{
int i;
float b[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
float c[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
float diff = 2;
float max = x;
float min = 10;
for (i = 0; i < N; i++) {
diff += (b[i] - c[i]);
}
for (i = 0; i < N; i++) {
max = max < c[i] ? c[i] : max;
}
for (i = 0; i < N; i++) {
min = min > c[i] ? c[i] : min;
}
/* check results: */
if (diff != DIFF)
abort ();
if (max != max_result)
abort ();
if (min != 0)
abort ();
return 0;
}
int main (void)
{
check_vect ();
main1 (100 ,100);
main1 (0, 15);
return 0;
}
/* need -ffast-math to vectorizer these loops. */
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */

View File

@ -76,7 +76,25 @@ if [istarget "powerpc*-*-*"] {
dg-init
# Main loop.
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.\[cS\]]] \
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pr*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/vect-*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
#### Tests with special options
global SAVED_DEFAULT_VECTCFLAGS
set SAVED_DEFAULT_VECTCFLAGS $DEFAULT_VECTCFLAGS
# -ffast-math tests
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-ffast-math"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/fast-math-vect*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
# -ftrapv tests
set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
lappend DEFAULT_VECTCFLAGS "-ftrapv"
dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/trapv-vect*.\[cS\]]] \
"" $DEFAULT_VECTCFLAGS
# Clean up.

View File

@ -988,23 +988,6 @@ proc check_effective_target_vect_int_mult { } {
return $et_vect_int_mult_saved
}
# Return 1 if the target supports vector reduction
proc check_effective_target_vect_reduction { } {
global et_vect_reduction_saved
if [info exists et_vect_reduction_saved] {
verbose "check_effective_target_vect_reduction: using cached result" 2
} else {
set et_vect_reduction_saved 0
if { [istarget powerpc*-*-*] } {
set et_vect_reduction_saved 1
}
}
verbose "check_effective_target_vect_reduction: returning $et_vect_reduction_saved" 2
return $et_vect_reduction_saved
}
# Return 1 if the target supports atomic operations on "int" and "long".
proc check_effective_target_sync_int_long { } {

View File

@ -1692,6 +1692,8 @@ estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data)
case RSHIFT_EXPR:
case LROTATE_EXPR:
case RROTATE_EXPR:
case VEC_LSHIFT_EXPR:
case VEC_RSHIFT_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:

View File

@ -1043,6 +1043,8 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
case RSHIFT_EXPR:
case LROTATE_EXPR:
case RROTATE_EXPR:
case VEC_LSHIFT_EXPR:
case VEC_RSHIFT_EXPR:
case BIT_IOR_EXPR:
case BIT_XOR_EXPR:
case BIT_AND_EXPR:
@ -1838,6 +1840,8 @@ op_prio (tree op)
case REDUC_MAX_EXPR:
case REDUC_MIN_EXPR:
case REDUC_PLUS_EXPR:
case VEC_LSHIFT_EXPR:
case VEC_RSHIFT_EXPR:
return 16;
case SAVE_EXPR:
@ -1925,6 +1929,12 @@ op_symbol (tree op)
case RSHIFT_EXPR:
return ">>";
case VEC_LSHIFT_EXPR:
return "v<<";
case VEC_RSHIFT_EXPR:
return "v>>";
case PLUS_EXPR:
return "+";

View File

@ -448,6 +448,7 @@ expand_vector_operations_1 (block_stmt_iterator *bsi)
compute_type = TREE_TYPE (type);
}
gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
rhs = expand_vector_operation (bsi, type, compute_type, rhs, code);
if (lang_hooks.types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
*p_rhs = rhs;

View File

@ -834,6 +834,7 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
{
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
tree vectype = STMT_VINFO_VECTYPE (stmt_info);
enum machine_mode mode = TYPE_MODE (vectype);
loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
basic_block exit_bb;
@ -843,15 +844,18 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
block_stmt_iterator exit_bsi;
tree vec_dest;
tree new_temp;
tree new_name;
tree epilog_stmt;
tree new_scalar_dest, exit_phi;
tree bitsize, bitpos;
tree bitsize, bitpos, bytesize;
enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1));
tree scalar_initial_def;
tree vec_initial_def;
tree orig_name;
imm_use_iterator imm_iter;
use_operand_p use_p;
bool extract_scalar_result;
bool adjust_in_epilog;
/*** 1. Create the reduction def-use cycle ***/
@ -888,63 +892,214 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
exit_bsi = bsi_start (exit_bb);
/* 2.2 Create:
v_out2 = reduc_expr <v_out1>
s_out3 = extract_field <v_out2, 0> */
vec_dest = vect_create_destination_var (scalar_dest, vectype);
epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
new_temp = make_ssa_name (vec_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
{
fprintf (vect_dump, "transform reduction: created epilog code:");
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
}
new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
bitsize = TYPE_SIZE (scalar_type);
bytesize = TYPE_SIZE_UNIT (scalar_type);
/* The result is in the low order bits. */
if (BITS_BIG_ENDIAN)
bitpos = size_binop (MULT_EXPR,
bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
TYPE_SIZE (scalar_type));
/* 2.2 Create the reduction code. */
if (reduc_code < NUM_TREE_CODES)
{
/*** Case 1: Create:
v_out2 = reduc_expr <v_out1> */
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "Reduce using direct vector reduction.");
vec_dest = vect_create_destination_var (scalar_dest, vectype);
epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
build1 (reduc_code, vectype, PHI_RESULT (new_phi)));
new_temp = make_ssa_name (vec_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
extract_scalar_result = true;
adjust_in_epilog = true;
}
else
bitpos = bitsize_zero_node;
{
enum tree_code shift_code;
bool have_whole_vector_shift = true;
enum tree_code code = TREE_CODE (TREE_OPERAND (stmt, 1)); /* CHECKME */
int bit_offset;
int element_bitsize = tree_low_cst (bitsize, 1);
int vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
tree vec_temp;
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
build3 (BIT_FIELD_REF, scalar_type,
new_temp, bitsize, bitpos));
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
/* The result of the reduction is expected to be at the LSB bits
of the vector. For big-endian targets this means at the right
end of the vector. For little-edian targets this means at the
left end of the vector. */
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
if (BITS_BIG_ENDIAN
&& vec_shr_optab->handlers[mode].insn_code != CODE_FOR_nothing)
shift_code = VEC_RSHIFT_EXPR;
else if (!BITS_BIG_ENDIAN
&& vec_shl_optab->handlers[mode].insn_code != CODE_FOR_nothing)
shift_code = VEC_LSHIFT_EXPR;
else
have_whole_vector_shift = false;
if (have_whole_vector_shift)
{
/*** Case 2:
for (offset = VS/2; offset >= element_size; offset/=2)
{
Create: va' = vec_shift <va, offset>
Create: va = vop <va, va'>
} */
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "Reduce using vector shifts");
vec_dest = vect_create_destination_var (scalar_dest, vectype);
new_temp = PHI_RESULT (new_phi);
for (bit_offset = vec_size_in_bits/2;
bit_offset >= element_bitsize;
bit_offset /= 2)
{
tree bitpos = size_int (bit_offset);
epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
build2 (shift_code, vectype, new_temp, bitpos));
new_name = make_ssa_name (vec_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_name;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
epilog_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
build2 (code, vectype, new_name, new_temp));
new_temp = make_ssa_name (vec_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
}
extract_scalar_result = true;
adjust_in_epilog = true;
}
else
{
/*** Case 3:
Create: s = init;
for (offset=0; offset<vector_size; offset+=element_size;)
{
Create: s' = extract_field <v_out2, offset>
Create: s = op <s, s'>
} */
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "Reduce using scalar code. ");
vec_temp = PHI_RESULT (new_phi);
vec_size_in_bits = tree_low_cst (TYPE_SIZE (vectype), 1);
/* first iteration is peeled out when possible to minimize
the number of operations we generate: */
if (code == PLUS_EXPR
&& (integer_zerop (scalar_initial_def)
|| real_zerop (scalar_initial_def)))
{
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
build3 (BIT_FIELD_REF, scalar_type,
vec_temp, bitsize, bitsize_zero_node));
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
bit_offset = element_bitsize;
}
else
{
new_temp = scalar_initial_def;
bit_offset = 0;
}
for (;
bit_offset < vec_size_in_bits;
bit_offset += element_bitsize)
{
tree bitpos = bitsize_int (bit_offset);
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
build3 (BIT_FIELD_REF, scalar_type,
vec_temp, bitsize, bitpos));
new_name = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_name;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
build2 (code, scalar_type, new_name, new_temp));
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
}
extract_scalar_result = false;
adjust_in_epilog = false;
}
}
/* 2.3 Extract the final scalar result. Create:
s_out3 = extract_field <v_out2, bitpos> */
/* 2.3 Adjust the final result by the initial value of the reduction
variable. (when such adjustment is not needed, then
'scalar_initial_def' is zero).
if (extract_scalar_result)
{
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "extract scalar result");
Create:
s_out = scalar_expr <s_out, scalar_initial_def> */
/* The result is in the low order bits. */
if (BITS_BIG_ENDIAN)
bitpos = size_binop (MULT_EXPR,
bitsize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1),
TYPE_SIZE (scalar_type));
else
bitpos = bitsize_zero_node;
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
build2 (code, scalar_type, new_temp, scalar_initial_def));
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
build3 (BIT_FIELD_REF, scalar_type,
new_temp, bitsize, bitpos));
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
}
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
/* 2.4 Replace uses of s_out0 with uses of s_out3 */
/* 2.4 Adjust the final result by the initial value of the reduction
variable. (when such adjustment is not needed, then
'scalar_initial_def' is zero).
Create:
s_out = scalar_expr <s_out, scalar_initial_def> */
if (adjust_in_epilog)
{
epilog_stmt = build2 (MODIFY_EXPR, scalar_type, new_scalar_dest,
build2 (code, scalar_type, new_temp, scalar_initial_def));
new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
TREE_OPERAND (epilog_stmt, 0) = new_temp;
bsi_insert_after (&exit_bsi, epilog_stmt, BSI_NEW_STMT);
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
print_generic_expr (vect_dump, epilog_stmt, TDF_SLIM);
}
/* 2.5 Replace uses of s_out0 with uses of s_out3 */
/* Find the loop-closed-use at the loop exit of the original
scalar result. (The reduction result is expected to have
@ -954,10 +1109,10 @@ vect_create_epilog_for_reduction (tree vect_def, tree stmt, tree reduction_op,
FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
{
if (!flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
{
exit_phi = USE_STMT (use_p);
break;
}
{
exit_phi = USE_STMT (use_p);
break;
}
}
orig_name = PHI_RESULT (exit_phi);
@ -1067,13 +1222,13 @@ vectorizable_reduction (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
{
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "no optab for reduction.");
return false;
reduc_code = NUM_TREE_CODES;
}
if (reduc_optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
{
if (vect_print_dump_info (REPORT_DETAILS, UNKNOWN_LOC))
fprintf (vect_dump, "op not supported by target.");
return false;
fprintf (vect_dump, "reduc op not supported by target.");
reduc_code = NUM_TREE_CODES;
}
if (!vec_stmt) /* transformation not required. */

View File

@ -957,6 +957,12 @@ DEFTREECODE (REDUC_MAX_EXPR, "reduc_max_expr", tcc_unary, 1)
DEFTREECODE (REDUC_MIN_EXPR, "reduc_min_expr", tcc_unary, 1)
DEFTREECODE (REDUC_PLUS_EXPR, "reduc_plus_expr", tcc_unary, 1)
/* Whole vector lesft/right shift in bytes.
Operand 0 is a vector to be shifted.
Operand 1 is an integer shift amount in bits. */
DEFTREECODE (VEC_LSHIFT_EXPR, "vec_lshift_expr", tcc_binary, 2)
DEFTREECODE (VEC_RSHIFT_EXPR, "vec_rshift_expr", tcc_binary, 2)
/*
Local variables:
mode:c