mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-02-12 14:20:20 +08:00
ia64.c (rtx_needs_barrier): Handle CONST_VECTOR and VEC_SELECT.
* config/ia64/ia64.c (rtx_needs_barrier): Handle CONST_VECTOR and VEC_SELECT. * config/ia64/vect.md (mulv8qi3): Re-implement with mix_[rl]. (mulv4hi3): Set itanium_class mmmul. (fpack_sfxf, fpack_xfsf, fpack_xfxf): Remove. From-SVN: r92987
This commit is contained in:
parent
8f98556f01
commit
051d824588
@ -1,3 +1,12 @@
|
||||
2005-01-05 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/ia64/ia64.c (rtx_needs_barrier): Handle CONST_VECTOR
|
||||
and VEC_SELECT.
|
||||
* config/ia64/vect.md (mulv8qi3): Re-implement with mix_[rl].
|
||||
(mulv4hi3): Set itanium_class mmmul.
|
||||
(fpack_sfxf, fpack_xfsf, fpack_xfxf): Remove.
|
||||
(fpack): Rename from fpack_sfsf.
|
||||
|
||||
2005-01-05 Richard Henderson <rth@redhat.com>
|
||||
|
||||
PR rtl-opt/10692
|
||||
|
@ -5198,17 +5198,27 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
|
||||
for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
|
||||
{
|
||||
rtx pat = XVECEXP (x, 0, i);
|
||||
if (GET_CODE (pat) == SET)
|
||||
switch (GET_CODE (pat))
|
||||
{
|
||||
case SET:
|
||||
update_set_flags (pat, &new_flags, &pred, &cond);
|
||||
need_barrier |= set_src_needs_barrier (pat, new_flags, pred, cond);
|
||||
need_barrier |= set_src_needs_barrier (pat, new_flags,
|
||||
pred, cond);
|
||||
break;
|
||||
|
||||
case USE:
|
||||
case CALL:
|
||||
case ASM_OPERANDS:
|
||||
need_barrier |= rtx_needs_barrier (pat, flags, pred);
|
||||
break;
|
||||
|
||||
case CLOBBER:
|
||||
case RETURN:
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
else if (GET_CODE (pat) == USE
|
||||
|| GET_CODE (pat) == CALL
|
||||
|| GET_CODE (pat) == ASM_OPERANDS)
|
||||
need_barrier |= rtx_needs_barrier (pat, flags, pred);
|
||||
else if (GET_CODE (pat) != CLOBBER && GET_CODE (pat) != RETURN)
|
||||
abort ();
|
||||
}
|
||||
for (i = XVECLEN (x, 0) - 1; i >= 0; --i)
|
||||
{
|
||||
@ -5246,7 +5256,7 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
|
||||
need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred);
|
||||
break;
|
||||
|
||||
case CONST_INT: case CONST_DOUBLE:
|
||||
case CONST_INT: case CONST_DOUBLE: case CONST_VECTOR:
|
||||
case SYMBOL_REF: case LABEL_REF: case CONST:
|
||||
break;
|
||||
|
||||
@ -5290,6 +5300,14 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
|
||||
need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
|
||||
break;
|
||||
|
||||
case VEC_SELECT:
|
||||
/* VEC_SELECT's second argument is a PARALLEL with integers that
|
||||
describe the elements selected. On ia64, those integers are
|
||||
always constants. Avoid walking the PARALLEL so that we don't
|
||||
get confused with "normal" parallels and abort. */
|
||||
need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred);
|
||||
break;
|
||||
|
||||
case UNSPEC:
|
||||
switch (XINT (x, 1))
|
||||
{
|
||||
|
@ -172,43 +172,35 @@
|
||||
(match_operand:V8QI 2 "gr_register_operand" "r")))]
|
||||
""
|
||||
{
|
||||
rtx l1, h1, l2, h2, lm, hm, lz, hz;
|
||||
rtx r1, l1, r2, l2, rm, lm;
|
||||
|
||||
r1 = gen_reg_rtx (V4HImode);
|
||||
l1 = gen_reg_rtx (V4HImode);
|
||||
h1 = gen_reg_rtx (V4HImode);
|
||||
r2 = gen_reg_rtx (V4HImode);
|
||||
l2 = gen_reg_rtx (V4HImode);
|
||||
h2 = gen_reg_rtx (V4HImode);
|
||||
|
||||
/* Zero-extend the QImode elements into two words of HImode elements. */
|
||||
emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l1),
|
||||
operands[1], CONST0_RTX (V8QImode)));
|
||||
emit_insn (gen_unpack1_l (gen_lowpart (V8QImode, l2),
|
||||
operands[2], CONST0_RTX (V8QImode)));
|
||||
emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h1),
|
||||
operands[1], CONST0_RTX (V8QImode)));
|
||||
emit_insn (gen_unpack1_h (gen_lowpart (V8QImode, h2),
|
||||
operands[2], CONST0_RTX (V8QImode)));
|
||||
/* Zero-extend the QImode elements into two words of HImode elements
|
||||
by interleaving them with zero bytes. */
|
||||
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r1),
|
||||
operands[1], CONST0_RTX (V8QImode)));
|
||||
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r2),
|
||||
operands[2], CONST0_RTX (V8QImode)));
|
||||
emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l1),
|
||||
operands[1], CONST0_RTX (V8QImode)));
|
||||
emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l2),
|
||||
operands[2], CONST0_RTX (V8QImode)));
|
||||
|
||||
/* Multiply. */
|
||||
rm = gen_reg_rtx (V4HImode);
|
||||
lm = gen_reg_rtx (V4HImode);
|
||||
hm = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_mulv4hi3 (rm, r1, r2));
|
||||
emit_insn (gen_mulv4hi3 (lm, l1, l2));
|
||||
emit_insn (gen_mulv4hi3 (hm, h1, h2));
|
||||
|
||||
/* Zap the high order bytes of the HImode elements. There are several
|
||||
ways that this could be done. On Itanium2, there's 1 cycle latency
|
||||
moving between the ALU units and the PALU units, so using AND would
|
||||
be 3 cycles latency into the eventual pack insn, whereas using MIX
|
||||
is only 2 cycles. */
|
||||
lz = gen_reg_rtx (V4HImode);
|
||||
hz = gen_reg_rtx (V4HImode);
|
||||
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz),
|
||||
gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode)));
|
||||
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, lz),
|
||||
gen_lowpart (V8QImode, lm), CONST0_RTX (V8QImode)));
|
||||
|
||||
/* Repack the HImode elements as QImode elements. */
|
||||
emit_insn (gen_pack2_sss (operands[0], lz, hz));
|
||||
/* Zap the high order bytes of the HImode elements by overwriting those
|
||||
in one part with the low order bytes of the other. */
|
||||
emit_insn (gen_mix1_r (operands[0],
|
||||
gen_lowpart (V8QImode, rm),
|
||||
gen_lowpart (V8QImode, lm)));
|
||||
DONE;
|
||||
})
|
||||
|
||||
@ -218,7 +210,7 @@
|
||||
(match_operand:V4HI 2 "gr_register_operand" "r")))]
|
||||
""
|
||||
"pmpyshr2 %0 = %1, %2, 0"
|
||||
[(set_attr "itanium_class" "mmalua")])
|
||||
[(set_attr "itanium_class" "mmmul")])
|
||||
|
||||
(define_expand "umax<mode>3"
|
||||
[(set (match_operand:VECINT 0 "gr_register_operand" "")
|
||||
@ -450,7 +442,7 @@
|
||||
"mix1.r %0 = %r2, %r1"
|
||||
[(set_attr "itanium_class" "mmshf")])
|
||||
|
||||
(define_insn "*mix1_l"
|
||||
(define_insn "mix1_l"
|
||||
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
|
||||
(vec_select:V8QI
|
||||
(vec_concat:V16QI
|
||||
@ -948,7 +940,7 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "*fpack_sfsf"
|
||||
(define_insn "*fpack"
|
||||
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
|
||||
(vec_concat:V2SF
|
||||
(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
|
||||
@ -957,38 +949,6 @@
|
||||
"fpack %0 = %F2, %F1"
|
||||
[(set_attr "itanium_class" "fmisc")])
|
||||
|
||||
(define_insn "*fpack_sfxf"
|
||||
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
|
||||
(vec_concat:V2SF
|
||||
(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
|
||||
(float_truncate:SF
|
||||
(match_operand 2 "fr_register_operand" "f"))))]
|
||||
"GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode"
|
||||
"fpack %0 = %2, %F1"
|
||||
[(set_attr "itanium_class" "fmisc")])
|
||||
|
||||
(define_insn "*fpack_xfsf"
|
||||
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
|
||||
(vec_concat:V2SF
|
||||
(float_truncate:SF
|
||||
(match_operand 1 "fr_register_operand" "f"))
|
||||
(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
|
||||
"GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode"
|
||||
"fpack %0 = %F2, %1"
|
||||
[(set_attr "itanium_class" "fmisc")])
|
||||
|
||||
(define_insn "*fpack_xfxf"
|
||||
[(set (match_operand:V2SF 0 "fr_register_operand" "=f")
|
||||
(vec_concat:V2SF
|
||||
(float_truncate:SF
|
||||
(match_operand 1 "fr_register_operand" "f"))
|
||||
(float_truncate:SF
|
||||
(match_operand 2 "fr_register_operand" "f"))))]
|
||||
"(GET_MODE (operands[1]) == DFmode || GET_MODE (operands[1]) == XFmode)
|
||||
&& (GET_MODE (operands[2]) == DFmode || GET_MODE (operands[2]) == XFmode)"
|
||||
"fpack %0 = %2, %1"
|
||||
[(set_attr "itanium_class" "fmisc")])
|
||||
|
||||
;; Missing operations
|
||||
;; fprcpa
|
||||
;; fpsqrta
|
||||
|
22
gcc/testsuite/gcc.c-torture/execute/simd-6.c
Normal file
22
gcc/testsuite/gcc.c-torture/execute/simd-6.c
Normal file
@ -0,0 +1,22 @@
|
||||
extern void abort (void);
|
||||
extern int memcmp (const void *, const void *, __SIZE_TYPE__);
|
||||
|
||||
typedef unsigned char v8qi __attribute__((vector_size(8)));
|
||||
|
||||
v8qi foo(v8qi x, v8qi y)
|
||||
{
|
||||
return x * y;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
v8qi a = { 1, 2, 3, 4, 5, 6, 7, 8 };
|
||||
v8qi b = { 3, 3, 3, 3, 3, 3, 3, 3 };
|
||||
v8qi c = { 3, 6, 9, 12, 15, 18, 21, 24 };
|
||||
v8qi r;
|
||||
|
||||
r = foo (a, b);
|
||||
if (memcmp (&r, &c, 8) != 0)
|
||||
abort ();
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user