re PR target/46997 (new ia64 vector instructions are broken on HP-UX (big-endian))

2011-02-04  Richard Henderson  <rth@redhat.com>
	    Steve Ellcey  <sje@cup.hp.com>

	PR target/46997
	* config/ia64/predicates.md (mux1_brcst_element): New.
	* config/ia64/ia64-protos.h (ia64_unpack_assemble): New.
	* config/ia64/ia64.c (ia64_unpack_assemble): New.
	(ia64_unpack_sign): New.
	(ia64_expand_unpack): Rewrite using new routines.
	(ia64_expand_widen_sum): Ditto.
	(ia64_expand_dot_prod_v8qi): Ditto.
	* config/ia64/vect.md (mulv8qi3): Rewrite to use new
	routines, add endian check.
	(pmpy2_even): Rename from pmpy2_r, add endian check.
	(pmpy2_odd): Rename from pmpy2_l, add endian check.
	(vec_widen_smult_lo_v4hi): Rewrite using new routines.
	(vec_widen_smult_hi_v4hi): Ditto.
	(vec_widen_umult_lo_v4hi): Ditto.
	(vec_widen_umult_hi_v4hi): Ditto.
	(mulv2si3): Change endian checks.
	(sdot_prodv4hi): Rewrite with new calls.
	(udot_prodv4hi): New.
	(vec_pack_ssat_v4hi): Add endian check.
	(vec_pack_usat_v4hi): Ditto.
	(vec_pack_ssat_v2si): Ditto.
	(max1_even): Rename from max1_r, add endian check.
	(max1_odd): Rename from max1_l, add endian check.
	(*mux1_rev): Format change.
	(*mux1_mix): Ditto.
	(*mux1_shuf): Ditto.
	(*mux1_alt): Ditto.
	(*mux1_brcst_v8qi): Use new predicate.
	(vec_extract_evenv8qi): Remove endian check.
	(vec_extract_oddv8qi): Ditto.
	(vec_interleave_lowv4hi): Format change.
	(vec_interleave_highv4hi): Ditto.
	(mix2_even): Rename from mix2_r, add endian check.
	(mix2_odd): Rename from mux2_l, add endian check.
	(*mux2): Fix mask setting for TARGET_BIG_ENDIAN.
	(vec_extract_evenodd_helper): Format change.
	(vec_extract_evenv4hi): Remove endian check.
	(vec_extract_oddv4hi): Remove endian check.
	(vec_interleave_lowv2si): Format change.
	(vec_interleave_highv2si): Format change.
	(vec_initv2si): Remove endian check.
	(vecinit_v2si): Add endian check.
	(reduc_splus_v2sf): Add endian check.
	(reduc_smax_v2sf): Ditto.
	(reduc_smin_v2sf): Ditto.
	(vec_initv2sf): Remove endian check.
	(fpack): Add endian check.
	(fswap): Add endian check.
	(vec_interleave_highv2sf): Add endian check.
	(vec_interleave_lowv2sf): Add endian check.
	(fmix_lr): Add endian check.
	(vec_setv2sf): Format change.
	(*vec_extractv2sf_0_be): Use shift to extract operand.
	(*vec_extractv2sf_1_be): New.
	(vec_pack_trunc_v4hi): Add endian check.
	(vec_pack_trunc_v2si): Format change.

Co-Authored-By: Steve Ellcey <sje@cup.hp.com>

From-SVN: r169840
This commit is contained in:
Richard Henderson 2011-02-04 13:46:45 -08:00 committed by Steve Ellcey
parent 4946bd35e9
commit 55eaaa5bfb
5 changed files with 418 additions and 343 deletions

View File

@ -1,3 +1,64 @@
2011-02-04 Richard Henderson <rth@redhat.com>
Steve Ellcey <sje@cup.hp.com>
PR target/46997
* config/ia64/predicates.md (mux1_brcst_element): New.
* config/ia64/ia64-protos.h (ia64_unpack_assemble): New.
* config/ia64/ia64.c (ia64_unpack_assemble): New.
(ia64_unpack_sign): New.
(ia64_expand_unpack): Rewrite using new routines.
(ia64_expand_widen_sum): Ditto.
(ia64_expand_dot_prod_v8qi): Ditto.
* config/ia64/vect.md (mulv8qi3): Rewrite to use new
routines, add endian check.
(pmpy2_even): Rename from pmpy2_r, add endian check.
(pmpy2_odd): Rename from pmpy2_l, add endian check.
(vec_widen_smult_lo_v4hi): Rewrite using new routines.
(vec_widen_smult_hi_v4hi): Ditto.
(vec_widen_umult_lo_v4hi): Ditto.
(vec_widen_umult_hi_v4hi): Ditto.
(mulv2si3): Change endian checks.
(sdot_prodv4hi): Rewrite with new calls.
(udot_prodv4hi): New.
(vec_pack_ssat_v4hi): Add endian check.
(vec_pack_usat_v4hi): Ditto.
(vec_pack_ssat_v2si): Ditto.
(max1_even): Rename from max1_r, add endian check.
(max1_odd): Rename from max1_l, add endian check.
(*mux1_rev): Format change.
(*mux1_mix): Ditto.
(*mux1_shuf): Ditto.
(*mux1_alt): Ditto.
(*mux1_brcst_v8qi): Use new predicate.
(vec_extract_evenv8qi): Remove endian check.
(vec_extract_oddv8qi): Ditto.
(vec_interleave_lowv4hi): Format change.
(vec_interleave_highv4hi): Ditto.
(mix2_even): Rename from mix2_r, add endian check.
(mix2_odd): Rename from mux2_l, add endian check.
(*mux2): Fix mask setting for TARGET_BIG_ENDIAN.
(vec_extract_evenodd_helper): Format change.
(vec_extract_evenv4hi): Remove endian check.
(vec_extract_oddv4hi): Remove endian check.
(vec_interleave_lowv2si): Format change.
(vec_interleave_highv2si): Format change.
(vec_initv2si): Remove endian check.
(vecinit_v2si): Add endian check.
(reduc_splus_v2sf): Add endian check.
(reduc_smax_v2sf): Ditto.
(reduc_smin_v2sf): Ditto.
(vec_initv2sf): Remove endian check.
(fpack): Add endian check.
(fswap): Add endian check.
(vec_interleave_highv2sf): Add endian check.
(vec_interleave_lowv2sf): Add endian check.
(fmix_lr): Add endian check.
(vec_setv2sf): Format change.
(*vec_extractv2sf_0_be): Use shift to extract operand.
(*vec_extractv2sf_1_be): New.
(vec_pack_trunc_v4hi): Add endian check.
(vec_pack_trunc_v2si): Format change.
2011-02-04 Jakub Jelinek <jakub@redhat.com>
PR inline-asm/23200

View File

@ -39,9 +39,9 @@ extern bool ia64_expand_movxf_movrf (enum machine_mode, rtx[]);
extern void ia64_expand_compare (rtx *, rtx *, rtx *);
extern void ia64_expand_vecint_cmov (rtx[]);
extern bool ia64_expand_vecint_minmax (enum rtx_code, enum machine_mode, rtx[]);
extern void ia64_unpack_assemble (rtx, rtx, rtx, bool);
extern void ia64_expand_unpack (rtx [], bool, bool);
extern void ia64_expand_widen_sum (rtx[], bool);
extern void ia64_expand_widen_mul_v4hi (rtx [], bool, bool);
extern void ia64_expand_dot_prod_v8qi (rtx[], bool);
extern void ia64_expand_call (rtx, rtx, rtx, int);
extern void ia64_split_call (rtx, rtx, rtx, rtx, rtx, int, int);

View File

@ -1972,12 +1972,13 @@ ia64_expand_vecint_minmax (enum rtx_code code, enum machine_mode mode,
return true;
}
/* Emit an integral vector unpack operation. */
/* The vectors LO and HI each contain N halves of a double-wide vector.
Reassemble either the first N/2 or the second N/2 elements. */
void
ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
ia64_unpack_assemble (rtx out, rtx lo, rtx hi, bool highp)
{
enum machine_mode mode = GET_MODE (operands[1]);
enum machine_mode mode = GET_MODE (lo);
rtx (*gen) (rtx, rtx, rtx);
rtx x;
@ -1993,24 +1994,43 @@ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
gcc_unreachable ();
}
/* Fill in x with the sign extension of each element in op1. */
x = gen_lowpart (mode, out);
if (TARGET_BIG_ENDIAN)
x = gen (x, hi, lo);
else
x = gen (x, lo, hi);
emit_insn (x);
}
/* Return a vector of the sign-extension of VEC. */
static rtx
ia64_unpack_sign (rtx vec, bool unsignedp)
{
enum machine_mode mode = GET_MODE (vec);
rtx zero = CONST0_RTX (mode);
if (unsignedp)
x = CONST0_RTX (mode);
return zero;
else
{
rtx sign = gen_reg_rtx (mode);
bool neg;
x = gen_reg_rtx (mode);
neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
CONST0_RTX (mode));
neg = ia64_expand_vecint_compare (LT, mode, sign, vec, zero);
gcc_assert (!neg);
}
if (TARGET_BIG_ENDIAN)
emit_insn (gen (gen_lowpart (mode, operands[0]), x, operands[1]));
else
emit_insn (gen (gen_lowpart (mode, operands[0]), operands[1], x));
return sign;
}
}
/* Emit an integral vector unpack operation. */
void
ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
{
rtx sign = ia64_unpack_sign (operands[1], unsignedp);
ia64_unpack_assemble (operands[0], operands[1], sign, highp);
}
/* Emit an integral vector widening sum operations. */
@ -2018,85 +2038,22 @@ ia64_expand_unpack (rtx operands[3], bool unsignedp, bool highp)
void
ia64_expand_widen_sum (rtx operands[3], bool unsignedp)
{
rtx l, h, x, s;
enum machine_mode wmode, mode;
rtx (*unpack_l) (rtx, rtx, rtx);
rtx (*unpack_h) (rtx, rtx, rtx);
rtx (*plus) (rtx, rtx, rtx);
enum machine_mode wmode;
rtx l, h, t, sign;
sign = ia64_unpack_sign (operands[1], unsignedp);
wmode = GET_MODE (operands[0]);
mode = GET_MODE (operands[1]);
switch (mode)
{
case V8QImode:
unpack_l = gen_vec_interleave_lowv8qi;
unpack_h = gen_vec_interleave_highv8qi;
plus = gen_addv4hi3;
break;
case V4HImode:
unpack_l = gen_vec_interleave_lowv4hi;
unpack_h = gen_vec_interleave_highv4hi;
plus = gen_addv2si3;
break;
default:
gcc_unreachable ();
}
/* Fill in x with the sign extension of each element in op1. */
if (unsignedp)
x = CONST0_RTX (mode);
else
{
bool neg;
x = gen_reg_rtx (mode);
neg = ia64_expand_vecint_compare (LT, mode, x, operands[1],
CONST0_RTX (mode));
gcc_assert (!neg);
}
l = gen_reg_rtx (wmode);
h = gen_reg_rtx (wmode);
s = gen_reg_rtx (wmode);
if (TARGET_BIG_ENDIAN)
{
emit_insn (unpack_l (gen_lowpart (mode, l), x, operands[1]));
emit_insn (unpack_h (gen_lowpart (mode, h), x, operands[1]));
}
else
{
emit_insn (unpack_l (gen_lowpart (mode, l), operands[1], x));
emit_insn (unpack_h (gen_lowpart (mode, h), operands[1], x));
}
emit_insn (plus (s, l, operands[2]));
emit_insn (plus (operands[0], h, s));
}
ia64_unpack_assemble (l, operands[1], sign, false);
ia64_unpack_assemble (h, operands[1], sign, true);
void
ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp)
{
rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
rtx (*mulhigh)(rtx, rtx, rtx, rtx);
rtx (*interl)(rtx, rtx, rtx);
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
/* For signed, pmpy2.r would appear to more closely match this operation.
However, the vectorizer is more likely to use the LO and HI patterns
in pairs. At which point, with this formulation, the first two insns
of each can be CSEd. */
mulhigh = unsignedp ? gen_pmpyshr2_u : gen_pmpyshr2;
emit_insn (mulhigh (h, operands[1], operands[2], GEN_INT (16)));
interl = highp ? gen_vec_interleave_highv4hi : gen_vec_interleave_lowv4hi;
if (TARGET_BIG_ENDIAN)
emit_insn (interl (gen_lowpart (V4HImode, operands[0]), h, l));
else
emit_insn (interl (gen_lowpart (V4HImode, operands[0]), l, h));
t = expand_binop (wmode, add_optab, l, operands[2], NULL, 0, OPTAB_DIRECT);
t = expand_binop (wmode, add_optab, h, t, operands[0], 0, OPTAB_DIRECT);
if (t != operands[0])
emit_move_insn (operands[0], t);
}
/* Emit a signed or unsigned V8QI dot product operation. */
@ -2104,62 +2061,31 @@ ia64_expand_widen_mul_v4hi (rtx operands[3], bool unsignedp, bool highp)
void
ia64_expand_dot_prod_v8qi (rtx operands[4], bool unsignedp)
{
rtx l1, l2, h1, h2, x1, x2, p1, p2, p3, p4, s1, s2, s3;
rtx op1, op2, sn1, sn2, l1, l2, h1, h2;
rtx p1, p2, p3, p4, s1, s2, s3;
/* Fill in x1 and x2 with the sign extension of each element. */
if (unsignedp)
x1 = x2 = CONST0_RTX (V8QImode);
else
{
bool neg;
x1 = gen_reg_rtx (V8QImode);
x2 = gen_reg_rtx (V8QImode);
neg = ia64_expand_vecint_compare (LT, V8QImode, x1, operands[1],
CONST0_RTX (V8QImode));
gcc_assert (!neg);
neg = ia64_expand_vecint_compare (LT, V8QImode, x2, operands[2],
CONST0_RTX (V8QImode));
gcc_assert (!neg);
}
op1 = operands[1];
op2 = operands[2];
sn1 = ia64_unpack_sign (op1, unsignedp);
sn2 = ia64_unpack_sign (op2, unsignedp);
l1 = gen_reg_rtx (V4HImode);
l2 = gen_reg_rtx (V4HImode);
h1 = gen_reg_rtx (V4HImode);
h2 = gen_reg_rtx (V4HImode);
if (TARGET_BIG_ENDIAN)
{
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l1), x1, operands[1]));
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l2), x2, operands[2]));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h1), x1, operands[1]));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h2), x2, operands[2]));
}
else
{
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l1), operands[1], x1));
emit_insn (gen_vec_interleave_lowv8qi
(gen_lowpart (V8QImode, l2), operands[2], x2));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h1), operands[1], x1));
emit_insn (gen_vec_interleave_highv8qi
(gen_lowpart (V8QImode, h2), operands[2], x2));
}
ia64_unpack_assemble (l1, op1, sn1, false);
ia64_unpack_assemble (l2, op2, sn2, false);
ia64_unpack_assemble (h1, op1, sn1, true);
ia64_unpack_assemble (h2, op2, sn2, true);
p1 = gen_reg_rtx (V2SImode);
p2 = gen_reg_rtx (V2SImode);
p3 = gen_reg_rtx (V2SImode);
p4 = gen_reg_rtx (V2SImode);
emit_insn (gen_pmpy2_r (p1, l1, l2));
emit_insn (gen_pmpy2_l (p2, l1, l2));
emit_insn (gen_pmpy2_r (p3, h1, h2));
emit_insn (gen_pmpy2_l (p4, h1, h2));
emit_insn (gen_pmpy2_even (p1, l1, l2));
emit_insn (gen_pmpy2_even (p2, h1, h2));
emit_insn (gen_pmpy2_odd (p3, l1, l2));
emit_insn (gen_pmpy2_odd (p4, h1, h2));
s1 = gen_reg_rtx (V2SImode);
s2 = gen_reg_rtx (V2SImode);

View File

@ -624,3 +624,7 @@
return REG_P (op) && REG_POINTER (op);
})
;; True if this is the right-most vector element; for mux1 @brcst.
(define_predicate "mux1_brcst_element"
(and (match_code "const_int")
(match_test "INTVAL (op) == (TARGET_BIG_ENDIAN ? 7 : 0)")))

View File

@ -172,35 +172,14 @@
(match_operand:V8QI 2 "gr_register_operand" "r")))]
""
{
rtx r1, l1, r2, l2, rm, lm;
r1 = gen_reg_rtx (V4HImode);
l1 = gen_reg_rtx (V4HImode);
r2 = gen_reg_rtx (V4HImode);
l2 = gen_reg_rtx (V4HImode);
/* Zero-extend the QImode elements into two words of HImode elements
by interleaving them with zero bytes. */
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r1),
operands[1], CONST0_RTX (V8QImode)));
emit_insn (gen_mix1_r (gen_lowpart (V8QImode, r2),
operands[2], CONST0_RTX (V8QImode)));
emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l1),
operands[1], CONST0_RTX (V8QImode)));
emit_insn (gen_mix1_l (gen_lowpart (V8QImode, l2),
operands[2], CONST0_RTX (V8QImode)));
/* Multiply. */
rm = gen_reg_rtx (V4HImode);
lm = gen_reg_rtx (V4HImode);
emit_insn (gen_mulv4hi3 (rm, r1, r2));
emit_insn (gen_mulv4hi3 (lm, l1, l2));
/* Zap the high order bytes of the HImode elements by overwriting those
in one part with the low order bytes of the other. */
emit_insn (gen_mix1_r (operands[0],
gen_lowpart (V8QImode, rm),
gen_lowpart (V8QImode, lm)));
rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
emit_insn (gen_vec_widen_umult_lo_v8qi (l, operands[1], operands[2]));
emit_insn (gen_vec_widen_umult_hi_v8qi (h, operands[1], operands[2]));
if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_pack_trunc_v4hi (operands[0], h, l));
else
emit_insn (gen_vec_pack_trunc_v4hi (operands[0], l, h));
DONE;
})
@ -296,7 +275,7 @@
"pmpyshr2.u %0 = %1, %2, %3"
[(set_attr "itanium_class" "mmmul")])
(define_insn "pmpy2_r"
(define_insn "pmpy2_even"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(mult:V2SI
(vec_select:V2SI
@ -308,10 +287,16 @@
(match_operand:V4HI 2 "gr_register_operand" "r"))
(parallel [(const_int 0) (const_int 2)]))))]
""
"pmpy2.r %0 = %1, %2"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,pmpy2.l %0 = %1, %2";
else
return "%,pmpy2.r %0 = %1, %2";
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "pmpy2_l"
(define_insn "pmpy2_odd"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(mult:V2SI
(vec_select:V2SI
@ -323,7 +308,13 @@
(match_operand:V4HI 2 "gr_register_operand" "r"))
(parallel [(const_int 1) (const_int 3)]))))]
""
"pmpy2.l %0 = %1, %2"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,pmpy2.r %0 = %1, %2";
else
return "%,pmpy2.l %0 = %1, %2";
}
[(set_attr "itanium_class" "mmshf")])
(define_expand "vec_widen_smult_lo_v4hi"
@ -332,7 +323,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")]
""
{
ia64_expand_widen_mul_v4hi (operands, false, false);
rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
ia64_unpack_assemble (operands[0], l, h, false);
DONE;
})
@ -342,7 +337,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")]
""
{
ia64_expand_widen_mul_v4hi (operands, false, true);
rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
emit_insn (gen_pmpyshr2 (h, operands[1], operands[2], GEN_INT (16)));
ia64_unpack_assemble (operands[0], l, h, true);
DONE;
})
@ -352,7 +351,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")]
""
{
ia64_expand_widen_mul_v4hi (operands, true, false);
rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
ia64_unpack_assemble (operands[0], l, h, false);
DONE;
})
@ -362,7 +365,11 @@
(match_operand:V4HI 2 "gr_register_operand" "")]
""
{
ia64_expand_widen_mul_v4hi (operands, true, true);
rtx l = gen_reg_rtx (V4HImode);
rtx h = gen_reg_rtx (V4HImode);
emit_insn (gen_mulv4hi3 (l, operands[1], operands[2]));
emit_insn (gen_pmpyshr2_u (h, operands[1], operands[2], GEN_INT (16)));
ia64_unpack_assemble (operands[0], l, h, true);
DONE;
})
@ -390,12 +397,8 @@
of the full 32-bit product. */
/* T0 = CDBA. */
if (TARGET_BIG_ENDIAN)
x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, GEN_INT (3), const2_rtx,
const1_rtx, const0_rtx));
else
x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
GEN_INT (3), const2_rtx));
x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (4, const1_rtx, const0_rtx,
GEN_INT (3), const2_rtx));
x = gen_rtx_VEC_SELECT (V4HImode, op1h, x);
emit_insn (gen_rtx_SET (VOIDmode, t0, x));
@ -409,15 +412,28 @@
emit_insn (gen_mulv4hi3 (t3, t0, op2h));
/* T4 = CY.h, CY.l, AW.h, AW.l = CY, AW. */
emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t4), t1, t2));
x = gen_lowpart (V4HImode, t4);
if (TARGET_BIG_ENDIAN)
x = gen_mix2_odd (x, t2, t1);
else
x = gen_mix2_even (x, t1, t2);
emit_insn (x);
/* T5 = CZ.l, 0, AX.l, 0 = CZ << 16, AX << 16. */
emit_insn (gen_mix2_l (gen_lowpart (V4HImode, t5),
CONST0_RTX (V4HImode), t3));
x = gen_lowpart (V4HImode, t5);
if (TARGET_BIG_ENDIAN)
x = gen_mix2_even (x, t3, CONST0_RTX (V4HImode));
else
x = gen_mix2_odd (x, CONST0_RTX (V4HImode), t3);
emit_insn (x);
/* T6 = DY.l, 0, BW.l, 0 = DY << 16, BW << 16. */
emit_insn (gen_mix2_r (gen_lowpart (V4HImode, t6),
CONST0_RTX (V4HImode), t3));
x = gen_lowpart (V4HImode, t6);
if (TARGET_BIG_ENDIAN)
x = gen_mix2_odd (x, t3, CONST0_RTX (V4HImode));
else
x = gen_mix2_even (x, CONST0_RTX (V4HImode), t3);
emit_insn (x);
emit_insn (gen_addv2si3 (t7, t4, t5));
emit_insn (gen_addv2si3 (operands[0], t6, t7));
@ -612,16 +628,36 @@
(match_operand:V2SI 3 "gr_register_operand" "")]
""
{
rtx l, r, t;
rtx e, o, t;
r = gen_reg_rtx (V2SImode);
l = gen_reg_rtx (V2SImode);
e = gen_reg_rtx (V2SImode);
o = gen_reg_rtx (V2SImode);
t = gen_reg_rtx (V2SImode);
emit_insn (gen_pmpy2_r (r, operands[1], operands[2]));
emit_insn (gen_pmpy2_l (l, operands[1], operands[2]));
emit_insn (gen_addv2si3 (t, r, operands[3]));
emit_insn (gen_addv2si3 (operands[0], t, l));
emit_insn (gen_pmpy2_even (e, operands[1], operands[2]));
emit_insn (gen_pmpy2_odd (o, operands[1], operands[2]));
emit_insn (gen_addv2si3 (t, e, operands[3]));
emit_insn (gen_addv2si3 (operands[0], t, o));
DONE;
})
(define_expand "udot_prodv4hi"
[(match_operand:V2SI 0 "gr_register_operand" "")
(match_operand:V4HI 1 "gr_register_operand" "")
(match_operand:V4HI 2 "gr_register_operand" "")
(match_operand:V2SI 3 "gr_register_operand" "")]
""
{
rtx l, h, t;
l = gen_reg_rtx (V2SImode);
h = gen_reg_rtx (V2SImode);
t = gen_reg_rtx (V2SImode);
emit_insn (gen_vec_widen_umult_lo_v4hi (l, operands[1], operands[2]));
emit_insn (gen_vec_widen_umult_hi_v4hi (h, operands[1], operands[2]));
emit_insn (gen_addv2si3 (t, l, operands[3]));
emit_insn (gen_addv2si3 (operands[0], t, h));
DONE;
})
@ -677,7 +713,13 @@
(ss_truncate:V4QI
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
""
"pack2.sss %0 = %r1, %r2"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,pack2.sss %0 = %r2, %r1";
else
return "%,pack2.sss %0 = %r1, %r2";
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_pack_usat_v4hi"
@ -688,7 +730,13 @@
(us_truncate:V4QI
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))))]
""
"pack2.uss %0 = %r1, %r2"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,pack2.uss %0 = %r2, %r1";
else
return "%,pack2.uss %0 = %r1, %r2";
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_pack_ssat_v2si"
@ -699,7 +747,13 @@
(ss_truncate:V2HI
(match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))))]
""
"pack4.sss %0 = %r1, %r2"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,pack4.sss %0 = %r2, %r1";
else
return "%,pack4.sss %0 = %r1, %r2";
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "vec_interleave_lowv8qi"
@ -742,54 +796,54 @@
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "mix1_r"
(define_insn "mix1_even"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
(match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 0)
(const_int 8)
(const_int 2)
(const_int 10)
(const_int 4)
(const_int 12)
(const_int 6)
(const_int 14)])))]
(parallel [(const_int 0) (const_int 8)
(const_int 2) (const_int 10)
(const_int 4) (const_int 12)
(const_int 6) (const_int 14)])))]
""
"mix1.r %0 = %r2, %r1"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,mix1.l %0 = %r1, %r2";
else
return "%,mix1.r %0 = %r2, %r1";
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "mix1_l"
(define_insn "mix1_odd"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(vec_concat:V16QI
(match_operand:V8QI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V8QI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 1)
(const_int 9)
(const_int 3)
(const_int 11)
(const_int 5)
(const_int 13)
(const_int 7)
(const_int 15)])))]
(parallel [(const_int 1) (const_int 9)
(const_int 3) (const_int 11)
(const_int 5) (const_int 13)
(const_int 7) (const_int 15)])))]
""
"mix1.l %0 = %r2, %r1"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,mix1.r %0 = %r1, %r2";
else
return "%,mix1.l %0 = %r2, %r1";
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "*mux1_rev"
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r")
(parallel [(const_int 7)
(const_int 6)
(const_int 5)
(const_int 4)
(const_int 3)
(const_int 2)
(const_int 1)
(const_int 0)])))]
(parallel [(const_int 7) (const_int 6)
(const_int 5) (const_int 4)
(const_int 3) (const_int 2)
(const_int 1) (const_int 0)])))]
""
"mux1 %0 = %1, @rev"
[(set_attr "itanium_class" "mmshf")])
@ -798,14 +852,10 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r")
(parallel [(const_int 0)
(const_int 4)
(const_int 2)
(const_int 6)
(const_int 1)
(const_int 5)
(const_int 3)
(const_int 7)])))]
(parallel [(const_int 0) (const_int 4)
(const_int 2) (const_int 6)
(const_int 1) (const_int 5)
(const_int 3) (const_int 7)])))]
""
"mux1 %0 = %1, @mix"
[(set_attr "itanium_class" "mmshf")])
@ -814,14 +864,10 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r")
(parallel [(const_int 0)
(const_int 4)
(const_int 1)
(const_int 5)
(const_int 2)
(const_int 6)
(const_int 3)
(const_int 7)])))]
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)
(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
""
"mux1 %0 = %1, @shuf"
[(set_attr "itanium_class" "mmshf")])
@ -830,14 +876,10 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r")
(parallel [(const_int 0)
(const_int 2)
(const_int 4)
(const_int 6)
(const_int 1)
(const_int 3)
(const_int 5)
(const_int 7)])))]
(parallel [(const_int 0) (const_int 2)
(const_int 4) (const_int 6)
(const_int 1) (const_int 3)
(const_int 5) (const_int 7)])))]
""
"mux1 %0 = %1, @alt"
[(set_attr "itanium_class" "mmshf")])
@ -846,14 +888,14 @@
[(set (match_operand:V8QI 0 "gr_register_operand" "=r")
(vec_select:V8QI
(match_operand:V8QI 1 "gr_register_operand" "r")
(parallel [(const_int 0)
(const_int 0)
(const_int 0)
(const_int 0)
(const_int 0)
(const_int 0)
(const_int 0)
(const_int 0)])))]
(parallel [(match_operand 2 "mux1_brcst_element" "")
(match_dup 2)
(match_dup 2)
(match_dup 2)
(match_dup 2)
(match_dup 2)
(match_dup 2)
(match_dup 2)])))]
""
"mux1 %0 = %1, @brcst"
[(set_attr "itanium_class" "mmshf")])
@ -873,10 +915,7 @@
""
{
rtx temp = gen_reg_rtx (V8QImode);
if (TARGET_BIG_ENDIAN)
emit_insn (gen_mix1_l (temp, operands[2], operands[1]));
else
emit_insn (gen_mix1_r (temp, operands[1], operands[2]));
emit_insn (gen_mix1_even (temp, operands[1], operands[2]));
emit_insn (gen_mux1_alt (operands[0], temp));
DONE;
})
@ -888,10 +927,7 @@
""
{
rtx temp = gen_reg_rtx (V8QImode);
if (TARGET_BIG_ENDIAN)
emit_insn (gen_mix1_r (temp, operands[2], operands[1]));
else
emit_insn (gen_mix1_l (temp, operands[1], operands[2]));
emit_insn (gen_mix1_odd (temp, operands[1], operands[2]));
emit_insn (gen_mux1_alt (operands[0], temp));
DONE;
})
@ -902,10 +938,8 @@
(vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 0)
(const_int 4)
(const_int 1)
(const_int 5)])))]
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
""
{
/* Recall that vector elements are numbered in memory order. */
@ -922,10 +956,8 @@
(vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 2)
(const_int 6)
(const_int 3)
(const_int 7)])))]
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
""
{
/* Recall that vector elements are numbered in memory order. */
@ -936,32 +968,40 @@
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "mix2_r"
(define_insn "mix2_even"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_select:V4HI
(vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 0)
(const_int 4)
(const_int 2)
(const_int 6)])))]
(parallel [(const_int 0) (const_int 4)
(const_int 2) (const_int 6)])))]
""
"mix2.r %0 = %r2, %r1"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,mix2.l %0 = %r1, %r2";
else
return "%,mix2.r %0 = %r2, %r1";
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "mix2_l"
(define_insn "mix2_odd"
[(set (match_operand:V4HI 0 "gr_register_operand" "=r")
(vec_select:V4HI
(vec_concat:V8HI
(match_operand:V4HI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V4HI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 1)
(const_int 5)
(const_int 3)
(const_int 7)])))]
(parallel [(const_int 1) (const_int 5)
(const_int 3) (const_int 7)])))]
""
"mix2.l %0 = %r2, %r1"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,mix2.r %0 = %r1, %r2";
else
return "%,mix2.l %0 = %r2, %r1";
}
[(set_attr "itanium_class" "mmshf")])
(define_insn "*mux2"
@ -974,17 +1014,17 @@
(match_operand 5 "const_int_2bit_operand" "")])))]
""
{
int mask;
int mask = 0;
if (TARGET_BIG_ENDIAN)
{
mask = INTVAL (operands[2]) << 4;
mask |= INTVAL (operands[3]) << 6;
mask |= INTVAL (operands[4]);
mask |= INTVAL (operands[5]) << 2;
mask |= (3 - INTVAL (operands[2])) << 6;
mask |= (3 - INTVAL (operands[3])) << 4;
mask |= (3 - INTVAL (operands[4])) << 2;
mask |= 3 - INTVAL (operands[5]);
}
else
{
mask = INTVAL (operands[2]);
mask |= INTVAL (operands[2]);
mask |= INTVAL (operands[3]) << 2;
mask |= INTVAL (operands[4]) << 4;
mask |= INTVAL (operands[5]) << 6;
@ -998,10 +1038,8 @@
[(set (match_operand:V4HI 0 "gr_register_operand" "")
(vec_select:V4HI
(match_operand:V4HI 1 "gr_register_operand" "")
(parallel [(const_int 0)
(const_int 2)
(const_int 1)
(const_int 3)])))]
(parallel [(const_int 0) (const_int 2)
(const_int 1) (const_int 3)])))]
"")
(define_expand "vec_extract_evenv4hi"
@ -1011,10 +1049,7 @@
""
{
rtx temp = gen_reg_rtx (V4HImode);
if (TARGET_BIG_ENDIAN)
emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
else
emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
emit_insn (gen_mix2_even (temp, operands[1], operands[2]));
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
DONE;
})
@ -1026,10 +1061,7 @@
""
{
rtx temp = gen_reg_rtx (V4HImode);
if (TARGET_BIG_ENDIAN)
emit_insn (gen_mix2_r (temp, operands[1], operands[2]));
else
emit_insn (gen_mix2_l (temp, operands[1], operands[2]));
emit_insn (gen_mix2_odd (temp, operands[1], operands[2]));
emit_insn (gen_vec_extract_evenodd_helper (operands[0], temp));
DONE;
})
@ -1042,15 +1074,13 @@
"mux2 %0 = %1, 0"
[(set_attr "itanium_class" "mmshf")])
;; Note that mix4.r performs the exact same operation.
(define_insn "vec_interleave_lowv2si"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(vec_select:V2SI
(vec_concat:V4SI
(match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 0)
(const_int 2)])))]
(parallel [(const_int 0) (const_int 2)])))]
""
{
/* Recall that vector elements are numbered in memory order. */
@ -1061,15 +1091,13 @@
}
[(set_attr "itanium_class" "mmshf")])
;; Note that mix4.l performs the exact same operation.
(define_insn "vec_interleave_highv2si"
[(set (match_operand:V2SI 0 "gr_register_operand" "=r")
(vec_select:V2SI
(vec_concat:V4SI
(match_operand:V2SI 1 "gr_reg_or_0_operand" "rU")
(match_operand:V2SI 2 "gr_reg_or_0_operand" "rU"))
(parallel [(const_int 1)
(const_int 3)])))]
(parallel [(const_int 1) (const_int 3)])))]
""
{
/* Recall that vector elements are numbered in memory order. */
@ -1088,7 +1116,7 @@
{
if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
operands[2]));
operands[2]));
else
emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
operands[2]));
@ -1103,7 +1131,7 @@
{
if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_interleave_lowv2si (operands[0], operands[1],
operands[2]));
operands[2]));
else
emit_insn (gen_vec_interleave_highv2si (operands[0], operands[1],
operands[2]));
@ -1131,10 +1159,7 @@
if (!gr_reg_or_0_operand (op2, SImode))
op2 = force_reg (SImode, op2);
if (TARGET_BIG_ENDIAN)
x = gen_rtx_VEC_CONCAT (V2SImode, op2, op1);
else
x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2);
x = gen_rtx_VEC_CONCAT (V2SImode, op1, op2);
emit_insn (gen_rtx_SET (VOIDmode, operands[0], x));
DONE;
})
@ -1145,7 +1170,13 @@
(match_operand:SI 1 "gr_reg_or_0_operand" "rO")
(match_operand:SI 2 "gr_reg_or_0_operand" "rO")))]
""
"unpack4.l %0 = %r2, %r1"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,unpack4.l %0 = %r1, %r2";
else
return "%,unpack4.l %0 = %r2, %r1";
}
[(set_attr "itanium_class" "mmshf")])
;; Missing operations
@ -1315,7 +1346,10 @@
""
{
rtx tmp = gen_reg_rtx (V2SFmode);
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
if (TARGET_BIG_ENDIAN)
emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
else
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
emit_insn (gen_addv2sf3 (operands[0], operands[1], tmp));
DONE;
})
@ -1326,7 +1360,10 @@
""
{
rtx tmp = gen_reg_rtx (V2SFmode);
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
if (TARGET_BIG_ENDIAN)
emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
else
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
emit_insn (gen_smaxv2sf3 (operands[0], operands[1], tmp));
DONE;
})
@ -1337,7 +1374,10 @@
""
{
rtx tmp = gen_reg_rtx (V2SFmode);
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
if (TARGET_BIG_ENDIAN)
emit_insn (gen_fswap (tmp, CONST0_RTX (V2SFmode), operands[1]));
else
emit_insn (gen_fswap (tmp, operands[1], CONST0_RTX (V2SFmode)));
emit_insn (gen_sminv2sf3 (operands[0], operands[1], tmp));
DONE;
})
@ -1403,10 +1443,7 @@
if (!fr_reg_or_fp01_operand (op2, SFmode))
op2 = force_reg (SFmode, op2);
if (TARGET_BIG_ENDIAN)
emit_insn (gen_fpack (operands[0], op2, op1));
else
emit_insn (gen_fpack (operands[0], op1, op2));
emit_insn (gen_fpack (operands[0], op1, op2));
DONE;
})
@ -1416,7 +1453,13 @@
(match_operand:SF 1 "fr_reg_or_fp01_operand" "fG")
(match_operand:SF 2 "fr_reg_or_fp01_operand" "fG")))]
""
"fpack %0 = %F2, %F1"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,fpack %0 = %F1, %F2";
else
return "%,fpack %0 = %F2, %F1";
}
[(set_attr "itanium_class" "fmisc")])
(define_insn "fswap"
@ -1427,7 +1470,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 1) (const_int 2)])))]
""
"fswap %0 = %F1, %F2"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,fswap %0 = %F2, %F1";
else
return "%,fswap %0 = %F1, %F2";
}
[(set_attr "itanium_class" "fmisc")])
(define_insn "vec_interleave_highv2sf"
@ -1438,7 +1487,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 1) (const_int 3)])))]
""
"fmix.l %0 = %F2, %F1"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,fmix.r %0 = %F1, %F2";
else
return "%,fmix.l %0 = %F2, %F1";
}
[(set_attr "itanium_class" "fmisc")])
(define_insn "vec_interleave_lowv2sf"
@ -1449,7 +1504,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 0) (const_int 2)])))]
""
"fmix.r %0 = %F2, %F1"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,fmix.l %0 = %F1, %F2";
else
return "%,fmix.r %0 = %F2, %F1";
}
[(set_attr "itanium_class" "fmisc")])
(define_insn "fmix_lr"
@ -1460,7 +1521,13 @@
(match_operand:V2SF 2 "fr_reg_or_0_operand" "fU"))
(parallel [(const_int 0) (const_int 3)])))]
""
"fmix.lr %0 = %F2, %F1"
{
/* Recall that vector elements are numbered in memory order. */
if (TARGET_BIG_ENDIAN)
return "%,fmix.lr %0 = %F1, %F2";
else
return "%,fmix.lr %0 = %F2, %F1";
}
[(set_attr "itanium_class" "fmisc")])
(define_expand "vec_extract_evenv2sf"
@ -1485,23 +1552,24 @@
DONE;
})
(define_expand "vec_setv2sf"
[(match_operand:V2SF 0 "fr_register_operand" "")
(match_operand:SF 1 "fr_register_operand" "")
(match_operand 2 "const_int_operand" "")]
""
{
rtx op0 = operands[0];
rtx tmp = gen_reg_rtx (V2SFmode);
emit_insn (gen_fpack (tmp, operands[1], CONST0_RTX (SFmode)));
switch (INTVAL (operands[2]))
{
case 0:
emit_insn (gen_fmix_lr (operands[0], tmp, operands[0]));
emit_insn (gen_fmix_lr (op0, tmp, op0));
break;
case 1:
emit_insn (gen_vec_interleave_lowv2sf (operands[0], operands[0], tmp));
emit_insn (gen_vec_interleave_lowv2sf (op0, op0, tmp));
break;
default:
gcc_unreachable ();
@ -1528,8 +1596,8 @@
})
(define_insn_and_split "*vec_extractv2sf_0_be"
[(set (match_operand:SF 0 "register_operand" "=r,f")
(unspec:SF [(match_operand:V2SF 1 "register_operand" "rf,r")
[(set (match_operand:SF 0 "register_operand" "=rf,r")
(unspec:SF [(match_operand:V2SF 1 "nonimmediate_operand" "m,r")
(const_int 0)]
UNSPEC_VECT_EXTR))]
"TARGET_BIG_ENDIAN"
@ -1537,31 +1605,44 @@
"reload_completed"
[(set (match_dup 0) (match_dup 1))]
{
if (REG_P (operands[1]) && FR_REGNO_P (REGNO (operands[1])))
operands[0] = gen_rtx_REG (V2SFmode, REGNO (operands[0]));
if (MEM_P (operands[1]))
operands[1] = adjust_address (operands[1], SFmode, 0);
else
operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
{
emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
DONE;
}
})
(define_insn_and_split "*vec_extractv2sf_1"
(define_insn_and_split "*vec_extractv2sf_1_le"
[(set (match_operand:SF 0 "register_operand" "=r")
(unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
(const_int 1)]
UNSPEC_VECT_EXTR))]
""
"!TARGET_BIG_ENDIAN"
"#"
"reload_completed"
"&& reload_completed"
[(const_int 0)]
{
operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
operands[1] = gen_rtx_REG (DImode, REGNO (operands[1]));
if (TARGET_BIG_ENDIAN)
emit_move_insn (operands[0], operands[1]);
else
emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
emit_insn (gen_lshrdi3 (operands[0], operands[1], GEN_INT (32)));
DONE;
})
(define_insn_and_split "*vec_extractv2sf_1_be"
[(set (match_operand:SF 0 "register_operand" "=rf")
(unspec:SF [(match_operand:V2SF 1 "register_operand" "r")
(const_int 1)]
UNSPEC_VECT_EXTR))]
"TARGET_BIG_ENDIAN"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 1))]
{
operands[1] = gen_rtx_REG (SFmode, REGNO (operands[1]));
})
(define_expand "vec_extractv2sf"
[(set (match_operand:SF 0 "register_operand" "")
(unspec:SF [(match_operand:V2SF 1 "register_operand" "")
@ -1610,11 +1691,14 @@
[(match_operand:V8QI 0 "gr_register_operand" "")
(match_operand:V4HI 1 "gr_register_operand" "")
(match_operand:V4HI 2 "gr_register_operand" "")]
"!TARGET_BIG_ENDIAN"
""
{
rtx op1 = gen_lowpart(V8QImode, operands[1]);
rtx op2 = gen_lowpart(V8QImode, operands[2]);
emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
rtx op1 = gen_lowpart (V8QImode, operands[1]);
rtx op2 = gen_lowpart (V8QImode, operands[2]);
if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_extract_oddv8qi (operands[0], op1, op2));
else
emit_insn (gen_vec_extract_evenv8qi (operands[0], op1, op2));
DONE;
})
@ -1624,8 +1708,8 @@
(match_operand:V2SI 2 "gr_register_operand" "")]
""
{
rtx op1 = gen_lowpart(V4HImode, operands[1]);
rtx op2 = gen_lowpart(V4HImode, operands[2]);
rtx op1 = gen_lowpart (V4HImode, operands[1]);
rtx op2 = gen_lowpart (V4HImode, operands[2]);
if (TARGET_BIG_ENDIAN)
emit_insn (gen_vec_extract_oddv4hi (operands[0], op1, op2));
else