ia64.c (rtx_needs_barrier): Handle CONST_VECTOR and VEC_SELECT.

* config/ia64/ia64.c (rtx_needs_barrier): Handle CONST_VECTOR
        and VEC_SELECT.
        * config/ia64/vect.md (mulv8qi3): Re-implement with mix_[rl].
        (mulv4hi3): Set itanium_class mmmul.
        (fpack_sfxf, fpack_xfsf, fpack_xfxf): Remove.

From-SVN: r92987
This commit is contained in:
Richard Henderson 2005-01-05 22:18:15 -08:00 committed by Richard Henderson
parent 8f98556f01
commit 051d824588
4 changed files with 81 additions and 72 deletions

View File

@ -1,3 +1,12 @@
2005-01-05 Richard Henderson <rth@redhat.com>
* config/ia64/ia64.c (rtx_needs_barrier): Handle CONST_VECTOR
and VEC_SELECT.
* config/ia64/vect.md (mulv8qi3): Re-implement with mix_[rl].
(mulv4hi3): Set itanium_class mmmul.
(fpack_sfxf, fpack_xfsf, fpack_xfxf): Remove.
(fpack): Rename from fpack_sfsf.
2005-01-05 Richard Henderson <rth@redhat.com>
PR rtl-opt/10692

View File

@ -5198,17 +5198,27 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
{
rtx pat = XVECEXP (x, 0, i);
if (GET_CODE (pat) == SET)
switch (GET_CODE (pat))
{
case SET:
update_set_flags (pat, &new_flags, &pred, &cond);
need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
need_barrier |= set_src_needs_barrier (pat, new_flags,
pred, cond);
break;
case USE:
case CALL:
case ASM_OPERANDS:
need_barrier |= rtx_needs_barrier (pat, flags, pred);
break;
case CLOBBER:
case RETURN:
break;
default:
gcc_unreachable ();
}
else if (GET_CODE (pat) == USE
|| GET_CODE (pat) == CALL
|| GET_CODE (pat) == ASM_OPERANDS)
need_barrier |= rtx_needs_barrier (pat, flags, pred);
else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
abort ();
}
for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
{
@ -5246,7 +5256,7 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
break;
case CONST_INT: case CONST_DOUBLE:
case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
case SYMBOL_REF: case LABEL_REF: case CONST:
break;
@ -5290,6 +5300,14 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
break;
case VEC_SELECT:
/* VEC_SELECT's second argument is a PARALLEL with integers that
describe the elements selected. On ia64, those integers are
always constants. Avoid walking the PARALLEL so that we don't
get confused with "normal" parallels and abort. */
need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
break;
case UNSPEC:
switch (XINT (x, 1))
{

View File

@ -172,43 +172,35 @@
(match_operand:V8QI 2 "gr_register_operand" "r")))]
""
{
rtx l1, h1, l2, h2, lm, hm, lz, hz;
rtx r1, l1, r2, l2, rm, lm;
r1 = gen_reg_rtx (V4HImode);
l1 = gen_reg_rtx (V4HImode);
h1 = gen_reg_rtx (V4HImode);
r2 = gen_reg_rtx (V4HImode);
l2 = gen_reg_rtx (V4HImode);
h2 = gen_reg_rtx (V4HImode);
/* Zero-extend the QImode elements into two words of HImode elements. */
emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1),
operands[1], CONST0_RTX (V8QImode)));
emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2),
operands[2], CONST0_RTX (V8QImode)));
emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1),
operands[1], CONST0_RTX (V8QImode)));
emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2),
operands[2], CONST0_RTX (V8QImode)));
/* Zero-extend the QImode elements into two words of HImode elements
by interleaving them with zero bytes. */
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r1),
operands[1], CONST0_RTX (V8QImode)));
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r2),
operands[2], CONST0_RTX (V8QImode)));
emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l1),
operands[1], CONST0_RTX (V8QImode)));
emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l2),
operands[2], CONST0_RTX (V8QImode)));
/* Multiply. */
rm = gen_reg_rtx (V4HImode);
lm = gen_reg_rtx (V4HImode);
hm = gen_reg_rtx (V4HImode);
emit_insn (gen_mulv4hi3 (rm, r1, r2));
emit_insn (gen_mulv4hi3 (lm, l1, l2));
emit_insn (gen_mulv4hi3 (hm, h1, h2));
/* Zap the high order bytes of the HImode elements. There are several
ways that this could be done. On Itanium2, there's 1 cycle latency
moving between the ALU units and the PALU units, so using AND would
be 3 cycles latency into the eventual pack insn, whereas using MIX
is only 2 cycles. */
lz = gen_reg_rtx (V4HImode);
hz = gen_reg_rtx (V4HImode);
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz),
gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode)));
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz),
gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode)));
/* Repack the HImode elements as QImode elements. */
emit_insn (gen_pack2_sss (operands[0], lz, hz));
/* Zap the high order bytes of the HImode elements by overwriting those
in one part with the low order bytes of the other. */
emit_insn (gen_mix1_r (operands[0],
gen_lowpart (V8QImode, rm),
gen_lowpart (V8QImode, lm)));
DONE;
})
@ -218,7 +210,7 @@
(match_operand:V4HI 2 "gr_register_operand" "r")))]
""
"pmpyshr2 %0 = %1, %2, 0"
[(set_attr "itanium_class" "mmalua")])
[(set_attr "itanium_class" "mmmul")])
(define_expand "umax<mode>3"
[(set (match_operand:VECINT 0 "gr_register_operand" "")
@ -450,7 +442,7 @@
"mix1.r %0 = %r2, %r1"
[(set_attr "itanium_class" "mmshf")])
(define_insn "*mix1_l"
(define_insn "mix1_l"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
@ -948,7 +940,7 @@
DONE;
})
(define_insn "*fpack_sfsf"
(define_insn "*fpack"
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
(vec_concat:V2SF
(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
@ -957,38 +949,6 @@
"fpack %0 = %F2, %F1"
[(set_attr "itanium_class" "fmisc")])
(define_insn "*fpack_sfxf"
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
(vec_concat:V2SF
(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
(float_truncate:SF
(match_operand 2 "fr_register_operand" "f"))))]
"GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode"
"fpack %0 = %2, %F1"
[(set_attr "itanium_class" "fmisc")])
(define_insn "*fpack_xfsf"
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
(vec_concat:V2SF
(float_truncate:SF
(match_operand 1 "fr_register_operand" "f"))
(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
"GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode"
"fpack %0 = %F2, %1"
[(set_attr "itanium_class" "fmisc")])
(define_insn "*fpack_xfxf"
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
(vec_concat:V2SF
(float_truncate:SF
(match_operand 1 "fr_register_operand" "f"))
(float_truncate:SF
(match_operand 2 "fr_register_operand" "f"))))]
"(GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode)
&& (GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode)"
"fpack %0 = %2, %1"
[(set_attr "itanium_class" "fmisc")])
;; Missing operations
;; fprcpa
;; fpsqrta

View File

@ -0,0 +1,22 @@
extern void abort (void);
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
typedef unsigned char v8qi __attribute__((vector_size(8)));
v8qi foo(v8qi x, v8qi y)
{
return x * y;
}
int main()
{
v8qi a = { 1, 2, 3, 4, 5, 6, 7, 8 };
v8qi b = { 3, 3, 3, 3, 3, 3, 3, 3 };
v8qi c = { 3, 6, 9, 12, 15, 18, 21, 24 };
v8qi r;
r = foo (a, b);
if (memcmp (&r, &c, 8) != 0)
abort ();
return 0;
}