mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 10:40:50 +08:00
vmull / vmovl support for Neon.
For Tejas Belagod 2010-08-25 Tejas Belagod <tejas.belagod@arm.com> * lib/target-supports.exp (check_effective_target_vect_unpack): Set vect_unpack supported flag to true for neon. * config/arm/iterators.md (VU, SE, V_widen_l): New. (V_unpack, US): New. * config/arm/neon.md (vec_unpack<US>_hi_<mode>): Expansion for vmovl. (vec_unpack<US>_lo_<mode>): Likewise. (neon_vec_unpack<US>_hi_<mode>): Instruction pattern for vmovl. (neon_vec_unpack<US>_lo_<mode>): Likewise. (vec_widen_<US>mult_lo_<mode>): Expansion for vmull. (vec_widen_<US>mult_hi_<mode>): Likewise. (neon_vec_<US>mult_lo_<mode>"): Instruction pattern for vmull. (neon_vec_<US>mult_hi_<mode>"): Likewise. (neon_unpack<US>_<mode>): Widening move intermediate step for vectorizing without -mvectorize-with-neon-quad. (neon_vec_<US>mult_<mode>): Widening multiply intermediate step for vectorizing without -mvectorize-with-neon-quad. * config/arm/predicates.md (vect_par_constant_high): Check for high-half lanes of a vector. (vect_par_constant_low): Check for low-half lanes of a vector. From-SVN: r163538
This commit is contained in:
parent
34f41f7c1a
commit
46b57af175
@ -1,3 +1,24 @@
|
||||
2010-08-25 Tejas Belagod <tejas.belagod@arm.com>
|
||||
|
||||
* config/arm/iterators.md (VU, SE, V_widen_l): New.
|
||||
(V_unpack, US): New.
|
||||
* config/arm/neon.md (vec_unpack<US>_hi_<mode>): Expansion for
|
||||
vmovl.
|
||||
(vec_unpack<US>_lo_<mode>): Likewise.
|
||||
(neon_vec_unpack<US>_hi_<mode>): Instruction pattern for vmovl.
|
||||
(neon_vec_unpack<US>_lo_<mode>): Likewise.
|
||||
(vec_widen_<US>mult_lo_<mode>): Expansion for vmull.
|
||||
(vec_widen_<US>mult_hi_<mode>): Likewise.
|
||||
(neon_vec_<US>mult_lo_<mode>"): Instruction pattern for vmull.
|
||||
(neon_vec_<US>mult_hi_<mode>"): Likewise.
|
||||
(neon_unpack<US>_<mode>): Widening move intermediate step for
|
||||
vectorizing without -mvectorize-with-neon-quad.
|
||||
(neon_vec_<US>mult_<mode>): Widening multiply intermediate step
|
||||
for vectorizing without -mvectorize-with-neon-quad.
|
||||
* config/arm/predicates.md (vect_par_constant_high): Check for
|
||||
high-half lanes of a vector.
|
||||
(vect_par_constant_low): Check for low-half lanes of a vector.
|
||||
|
||||
2010-08-24 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
* tree-if-conv.c (struct ifc_dr): New.
|
||||
|
@ -136,7 +136,9 @@
|
||||
;; Modes with 32-bit elements only.
|
||||
(define_mode_iterator V32 [V2SI V2SF V4SI V4SF])
|
||||
|
||||
|
||||
;; Modes with 8-bit, 16-bit and 32-bit elements.
|
||||
(define_mode_iterator VU [V16QI V8HI V4SI])
|
||||
|
||||
;;----------------------------------------------------------------------------
|
||||
;; Code iterators
|
||||
;;----------------------------------------------------------------------------
|
||||
@ -156,6 +158,8 @@
|
||||
;; without unsigned variants (for use with *SFmode pattern).
|
||||
(define_code_iterator vqhs_ops [plus smin smax])
|
||||
|
||||
;; A list of widening operators
|
||||
(define_code_iterator SE [sign_extend zero_extend])
|
||||
|
||||
;;----------------------------------------------------------------------------
|
||||
;; Mode attributes
|
||||
@ -360,6 +364,11 @@
|
||||
(V2SF "2") (V4SF "4")
|
||||
(DI "1") (V2DI "2")])
|
||||
|
||||
;; Same as V_widen, but lower-case.
|
||||
(define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")])
|
||||
|
||||
;; Widen. Result is half the number of elements, but widened to double-width.
|
||||
(define_mode_attr V_unpack [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
|
||||
|
||||
;;----------------------------------------------------------------------------
|
||||
;; Code attributes
|
||||
@ -375,3 +384,6 @@
|
||||
|
||||
(define_code_attr cnb [(ltu "CC_C") (geu "CC")])
|
||||
(define_code_attr optab [(ltu "ltu") (geu "geu")])
|
||||
|
||||
;; Assembler mnemonics for signedness of widening operations.
|
||||
(define_code_attr US [(sign_extend "s") (zero_extend "u")])
|
||||
|
@ -4977,3 +4977,205 @@
|
||||
emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "neon_vec_unpack<US>_lo_<mode>"
|
||||
[(set (match_operand:<V_unpack> 0 "register_operand" "=w")
|
||||
(SE:<V_unpack> (vec_select:<V_HALF>
|
||||
(match_operand:VU 1 "register_operand" "w")
|
||||
(match_operand:VU 2 "vect_par_constant_low" ""))))]
|
||||
"TARGET_NEON"
|
||||
"vmovl.<US><V_sz_elem> %q0, %e1"
|
||||
[(set_attr "neon_type" "neon_shift_1")]
|
||||
)
|
||||
|
||||
(define_insn "neon_vec_unpack<US>_hi_<mode>"
|
||||
[(set (match_operand:<V_unpack> 0 "register_operand" "=w")
|
||||
(SE:<V_unpack> (vec_select:<V_HALF>
|
||||
(match_operand:VU 1 "register_operand" "w")
|
||||
(match_operand:VU 2 "vect_par_constant_high" ""))))]
|
||||
"TARGET_NEON"
|
||||
"vmovl.<US><V_sz_elem> %q0, %f1"
|
||||
[(set_attr "neon_type" "neon_shift_1")]
|
||||
)
|
||||
|
||||
(define_expand "vec_unpack<US>_hi_<mode>"
|
||||
[(match_operand:<V_unpack> 0 "register_operand" "")
|
||||
(SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
|
||||
rtx t1;
|
||||
int i;
|
||||
for (i = 0; i < (<V_mode_nunits>/2); i++)
|
||||
RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
|
||||
|
||||
t1 = gen_rtx_PARALLEL (<MODE>mode, v);
|
||||
emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
|
||||
operands[1],
|
||||
t1));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_expand "vec_unpack<US>_lo_<mode>"
|
||||
[(match_operand:<V_unpack> 0 "register_operand" "")
|
||||
(SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
|
||||
rtx t1;
|
||||
int i;
|
||||
for (i = 0; i < (<V_mode_nunits>/2) ; i++)
|
||||
RTVEC_ELT (v, i) = GEN_INT (i);
|
||||
t1 = gen_rtx_PARALLEL (<MODE>mode, v);
|
||||
emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
|
||||
operands[1],
|
||||
t1));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn "neon_vec_<US>mult_lo_<mode>"
|
||||
[(set (match_operand:<V_unpack> 0 "register_operand" "=w")
|
||||
(mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
|
||||
(match_operand:VU 1 "register_operand" "w")
|
||||
(match_operand:VU 2 "vect_par_constant_low" "")))
|
||||
(SE:<V_unpack> (vec_select:<V_HALF>
|
||||
(match_operand:VU 3 "register_operand" "w")
|
||||
(match_dup 2)))))]
|
||||
"TARGET_NEON"
|
||||
"vmull.<US><V_sz_elem> %q0, %e1, %e3"
|
||||
[(set_attr "neon_type" "neon_shift_1")]
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<US>mult_lo_<mode>"
|
||||
[(match_operand:<V_unpack> 0 "register_operand" "")
|
||||
(SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
|
||||
(SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
|
||||
rtx t1;
|
||||
int i;
|
||||
for (i = 0; i < (<V_mode_nunits>/2) ; i++)
|
||||
RTVEC_ELT (v, i) = GEN_INT (i);
|
||||
t1 = gen_rtx_PARALLEL (<MODE>mode, v);
|
||||
|
||||
emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
|
||||
operands[1],
|
||||
t1,
|
||||
operands[2]));
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn "neon_vec_<US>mult_hi_<mode>"
|
||||
[(set (match_operand:<V_unpack> 0 "register_operand" "=w")
|
||||
(mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
|
||||
(match_operand:VU 1 "register_operand" "w")
|
||||
(match_operand:VU 2 "vect_par_constant_high" "")))
|
||||
(SE:<V_unpack> (vec_select:<V_HALF>
|
||||
(match_operand:VU 3 "register_operand" "w")
|
||||
(match_dup 2)))))]
|
||||
"TARGET_NEON"
|
||||
"vmull.<US><V_sz_elem> %q0, %f1, %f3"
|
||||
[(set_attr "neon_type" "neon_shift_1")]
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<US>mult_hi_<mode>"
|
||||
[(match_operand:<V_unpack> 0 "register_operand" "")
|
||||
(SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
|
||||
(SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
|
||||
rtx t1;
|
||||
int i;
|
||||
for (i = 0; i < (<V_mode_nunits>/2) ; i++)
|
||||
RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
|
||||
t1 = gen_rtx_PARALLEL (<MODE>mode, v);
|
||||
|
||||
emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
|
||||
operands[1],
|
||||
t1,
|
||||
operands[2]));
|
||||
DONE;
|
||||
|
||||
}
|
||||
)
|
||||
|
||||
;; Vectorize for non-neon-quad case
|
||||
(define_insn "neon_unpack<US>_<mode>"
|
||||
[(set (match_operand:<V_widen> 0 "register_operand" "=w")
|
||||
(SE:<V_widen> (match_operand:VDI 1 "register_operand" "")))]
|
||||
"TARGET_NEON"
|
||||
"vmovl.<US><V_sz_elem> %q0, %1"
|
||||
[(set_attr "neon_type" "neon_shift_1")]
|
||||
)
|
||||
|
||||
(define_expand "vec_unpack<US>_lo_<mode>"
|
||||
[(match_operand:<V_double_width> 0 "register_operand" "")
|
||||
(SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx tmpreg = gen_reg_rtx (<V_widen>mode);
|
||||
emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
|
||||
emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
|
||||
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_expand "vec_unpack<US>_hi_<mode>"
|
||||
[(match_operand:<V_double_width> 0 "register_operand" "")
|
||||
(SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx tmpreg = gen_reg_rtx (<V_widen>mode);
|
||||
emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
|
||||
emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
|
||||
|
||||
DONE;
|
||||
}
|
||||
)
|
||||
|
||||
(define_insn "neon_vec_<US>mult_<mode>"
|
||||
[(set (match_operand:<V_widen> 0 "register_operand" "=w")
|
||||
(mult:<V_widen> (SE:<V_widen>
|
||||
(match_operand:VDI 1 "register_operand" "w"))
|
||||
(SE:<V_widen>
|
||||
(match_operand:VDI 2 "register_operand" "w"))))]
|
||||
"TARGET_NEON"
|
||||
"vmull.<US><V_sz_elem> %q0, %1, %2"
|
||||
[(set_attr "neon_type" "neon_shift_1")]
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<US>mult_hi_<mode>"
|
||||
[(match_operand:<V_double_width> 0 "register_operand" "")
|
||||
(SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
|
||||
(SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx tmpreg = gen_reg_rtx (<V_widen>mode);
|
||||
emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
|
||||
emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
|
||||
|
||||
DONE;
|
||||
|
||||
}
|
||||
)
|
||||
|
||||
(define_expand "vec_widen_<US>mult_lo_<mode>"
|
||||
[(match_operand:<V_double_width> 0 "register_operand" "")
|
||||
(SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
|
||||
(SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
|
||||
"TARGET_NEON"
|
||||
{
|
||||
rtx tmpreg = gen_reg_rtx (<V_widen>mode);
|
||||
emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
|
||||
emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
|
||||
|
||||
DONE;
|
||||
|
||||
}
|
||||
)
|
||||
|
@ -619,3 +619,61 @@
|
||||
(and (match_test "TARGET_32BIT")
|
||||
(match_operand 0 "arm_di_operand"))))
|
||||
|
||||
;; Predicates for parallel expanders based on mode.
|
||||
(define_special_predicate "vect_par_constant_high"
|
||||
(match_code "parallel")
|
||||
{
|
||||
HOST_WIDE_INT count = XVECLEN (op, 0);
|
||||
int i;
|
||||
int base = GET_MODE_NUNITS (mode);
|
||||
|
||||
if ((count < 1)
|
||||
|| (count != base/2))
|
||||
return false;
|
||||
|
||||
if (!VECTOR_MODE_P (mode))
|
||||
return false;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
rtx elt = XVECEXP (op, 0, i);
|
||||
int val;
|
||||
|
||||
if (GET_CODE (elt) != CONST_INT)
|
||||
return false;
|
||||
|
||||
val = INTVAL (elt);
|
||||
if (val != (base/2) + i)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})
|
||||
|
||||
(define_special_predicate "vect_par_constant_low"
|
||||
(match_code "parallel")
|
||||
{
|
||||
HOST_WIDE_INT count = XVECLEN (op, 0);
|
||||
int i;
|
||||
int base = GET_MODE_NUNITS (mode);
|
||||
|
||||
if ((count < 1)
|
||||
|| (count != base/2))
|
||||
return false;
|
||||
|
||||
if (!VECTOR_MODE_P (mode))
|
||||
return false;
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
rtx elt = XVECEXP (op, 0, i);
|
||||
int val;
|
||||
|
||||
if (GET_CODE (elt) != CONST_INT)
|
||||
return false;
|
||||
|
||||
val = INTVAL (elt);
|
||||
if (val != i)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
})
|
||||
|
@ -1,3 +1,8 @@
|
||||
2010-08-25 Tejas Belagod <tejas.belagod@arm.com>
|
||||
|
||||
* lib/target-supports.exp (check_effective_target_vect_unpack):
|
||||
Set vect_unpack supported flag to true for neon.
|
||||
|
||||
2010-08-24 Sebastian Pop <sebastian.pop@amd.com>
|
||||
|
||||
* gcc.dg/tree-ssa/ifc-5.c: New.
|
||||
|
@ -2640,7 +2640,8 @@ proc check_effective_target_vect_unpack { } {
|
||||
if { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*])
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]
|
||||
|| [istarget spu-*-*] } {
|
||||
|| [istarget spu-*-*]
|
||||
|| ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
|
||||
set et_vect_unpack_saved 1
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user