vmull / vmovl support for Neon.

For Tejas Belagod

2010-08-25  Tejas Belagod  <tejas.belagod@arm.com>

	* lib/target-supports.exp (check_effective_target_vect_unpack):
	Set vect_unpack supported flag to true for neon.

	* config/arm/iterators.md (VU, SE, V_widen_l): New.
	(V_unpack, US): New.
	* config/arm/neon.md (vec_unpack<US>_hi_<mode>): Expansion for
	vmovl.
	(vec_unpack<US>_lo_<mode>): Likewise.
	(neon_vec_unpack<US>_hi_<mode>): Instruction pattern for vmovl.
	(neon_vec_unpack<US>_lo_<mode>): Likewise.
	(vec_widen_<US>mult_lo_<mode>): Expansion for vmull.
	(vec_widen_<US>mult_hi_<mode>): Likewise.
	(neon_vec_<US>mult_lo_<mode>"): Instruction pattern for vmull.
	(neon_vec_<US>mult_hi_<mode>"): Likewise.
	(neon_unpack<US>_<mode>): Widening move intermediate step for
	vectorizing without -mvectorize-with-neon-quad.
	(neon_vec_<US>mult_<mode>): Widening multiply intermediate step
	for vectorizing without -mvectorize-with-neon-quad.
	* config/arm/predicates.md (vect_par_constant_high): Check for
	high-half lanes of a vector.
	(vect_par_constant_low): Check for low-half lanes of a vector.

From-SVN: r163538
This commit is contained in:
Tejas Belagod 2010-08-25 08:23:26 +01:00 committed by Ramana Radhakrishnan
parent 34f41f7c1a
commit 46b57af175
6 changed files with 301 additions and 2 deletions

View File

@ -1,3 +1,24 @@
2010-08-25 Tejas Belagod <tejas.belagod@arm.com>
* config/arm/iterators.md (VU, SE, V_widen_l): New.
(V_unpack, US): New.
* config/arm/neon.md (vec_unpack<US>_hi_<mode>): Expansion for
vmovl.
(vec_unpack<US>_lo_<mode>): Likewise.
(neon_vec_unpack<US>_hi_<mode>): Instruction pattern for vmovl.
(neon_vec_unpack<US>_lo_<mode>): Likewise.
(vec_widen_<US>mult_lo_<mode>): Expansion for vmull.
(vec_widen_<US>mult_hi_<mode>): Likewise.
(neon_vec_<US>mult_lo_<mode>"): Instruction pattern for vmull.
(neon_vec_<US>mult_hi_<mode>"): Likewise.
(neon_unpack<US>_<mode>): Widening move intermediate step for
vectorizing without -mvectorize-with-neon-quad.
(neon_vec_<US>mult_<mode>): Widening multiply intermediate step
for vectorizing without -mvectorize-with-neon-quad.
* config/arm/predicates.md (vect_par_constant_high): Check for
high-half lanes of a vector.
(vect_par_constant_low): Check for low-half lanes of a vector.
2010-08-24 Sebastian Pop <sebastian.pop@amd.com>
* tree-if-conv.c (struct ifc_dr): New.

View File

@ -136,7 +136,9 @@
;; Modes with 32-bit elements only.
(define_mode_iterator V32 [V2SI V2SF V4SI V4SF])
;; Modes with 8-bit, 16-bit and 32-bit elements.
(define_mode_iterator VU [V16QI V8HI V4SI])
;;----------------------------------------------------------------------------
;; Code iterators
;;----------------------------------------------------------------------------
@ -156,6 +158,8 @@
;; without unsigned variants (for use with *SFmode pattern).
(define_code_iterator vqhs_ops [plus smin smax])
;; A list of widening operators
(define_code_iterator SE [sign_extend zero_extend])
;;----------------------------------------------------------------------------
;; Mode attributes
@ -360,6 +364,11 @@
(V2SF "2") (V4SF "4")
(DI "1") (V2DI "2")])
;; Same as V_widen, but lower-case.
(define_mode_attr V_widen_l [(V8QI "v8hi") (V4HI "v4si") ( V2SI "v2di")])
;; Widen. Result is half the number of elements, but widened to double-width.
(define_mode_attr V_unpack [(V16QI "V8HI") (V8HI "V4SI") (V4SI "V2DI")])
;;----------------------------------------------------------------------------
;; Code attributes
@ -375,3 +384,6 @@
(define_code_attr cnb [(ltu "CC_C") (geu "CC")])
(define_code_attr optab [(ltu "ltu") (geu "geu")])
;; Assembler mnemonics for signedness of widening operations.
(define_code_attr US [(sign_extend "s") (zero_extend "u")])

View File

@ -4977,3 +4977,205 @@
emit_insn (gen_orn<mode>3_neon (operands[0], operands[1], operands[2]));
DONE;
})
(define_insn "neon_vec_unpack<US>_lo_<mode>"
[(set (match_operand:<V_unpack> 0 "register_operand" "=w")
(SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 1 "register_operand" "w")
(match_operand:VU 2 "vect_par_constant_low" ""))))]
"TARGET_NEON"
"vmovl.<US><V_sz_elem> %q0, %e1"
[(set_attr "neon_type" "neon_shift_1")]
)
(define_insn "neon_vec_unpack<US>_hi_<mode>"
[(set (match_operand:<V_unpack> 0 "register_operand" "=w")
(SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 1 "register_operand" "w")
(match_operand:VU 2 "vect_par_constant_high" ""))))]
"TARGET_NEON"
"vmovl.<US><V_sz_elem> %q0, %f1"
[(set_attr "neon_type" "neon_shift_1")]
)
(define_expand "vec_unpack<US>_hi_<mode>"
[(match_operand:<V_unpack> 0 "register_operand" "")
(SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
"TARGET_NEON"
{
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
rtx t1;
int i;
for (i = 0; i < (<V_mode_nunits>/2); i++)
RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
t1 = gen_rtx_PARALLEL (<MODE>mode, v);
emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
operands[1],
t1));
DONE;
}
)
(define_expand "vec_unpack<US>_lo_<mode>"
[(match_operand:<V_unpack> 0 "register_operand" "")
(SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
"TARGET_NEON"
{
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
rtx t1;
int i;
for (i = 0; i < (<V_mode_nunits>/2) ; i++)
RTVEC_ELT (v, i) = GEN_INT (i);
t1 = gen_rtx_PARALLEL (<MODE>mode, v);
emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
operands[1],
t1));
DONE;
}
)
(define_insn "neon_vec_<US>mult_lo_<mode>"
[(set (match_operand:<V_unpack> 0 "register_operand" "=w")
(mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 1 "register_operand" "w")
(match_operand:VU 2 "vect_par_constant_low" "")))
(SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 3 "register_operand" "w")
(match_dup 2)))))]
"TARGET_NEON"
"vmull.<US><V_sz_elem> %q0, %e1, %e3"
[(set_attr "neon_type" "neon_shift_1")]
)
(define_expand "vec_widen_<US>mult_lo_<mode>"
[(match_operand:<V_unpack> 0 "register_operand" "")
(SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
(SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
"TARGET_NEON"
{
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
rtx t1;
int i;
for (i = 0; i < (<V_mode_nunits>/2) ; i++)
RTVEC_ELT (v, i) = GEN_INT (i);
t1 = gen_rtx_PARALLEL (<MODE>mode, v);
emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
operands[1],
t1,
operands[2]));
DONE;
}
)
(define_insn "neon_vec_<US>mult_hi_<mode>"
[(set (match_operand:<V_unpack> 0 "register_operand" "=w")
(mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 1 "register_operand" "w")
(match_operand:VU 2 "vect_par_constant_high" "")))
(SE:<V_unpack> (vec_select:<V_HALF>
(match_operand:VU 3 "register_operand" "w")
(match_dup 2)))))]
"TARGET_NEON"
"vmull.<US><V_sz_elem> %q0, %f1, %f3"
[(set_attr "neon_type" "neon_shift_1")]
)
(define_expand "vec_widen_<US>mult_hi_<mode>"
[(match_operand:<V_unpack> 0 "register_operand" "")
(SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
(SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
"TARGET_NEON"
{
rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
rtx t1;
int i;
for (i = 0; i < (<V_mode_nunits>/2) ; i++)
RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
t1 = gen_rtx_PARALLEL (<MODE>mode, v);
emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
operands[1],
t1,
operands[2]));
DONE;
}
)
;; Vectorize for non-neon-quad case
(define_insn "neon_unpack<US>_<mode>"
[(set (match_operand:<V_widen> 0 "register_operand" "=w")
(SE:<V_widen> (match_operand:VDI 1 "register_operand" "")))]
"TARGET_NEON"
"vmovl.<US><V_sz_elem> %q0, %1"
[(set_attr "neon_type" "neon_shift_1")]
)
(define_expand "vec_unpack<US>_lo_<mode>"
[(match_operand:<V_double_width> 0 "register_operand" "")
(SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
"TARGET_NEON"
{
rtx tmpreg = gen_reg_rtx (<V_widen>mode);
emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
DONE;
}
)
(define_expand "vec_unpack<US>_hi_<mode>"
[(match_operand:<V_double_width> 0 "register_operand" "")
(SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
"TARGET_NEON"
{
rtx tmpreg = gen_reg_rtx (<V_widen>mode);
emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
DONE;
}
)
(define_insn "neon_vec_<US>mult_<mode>"
[(set (match_operand:<V_widen> 0 "register_operand" "=w")
(mult:<V_widen> (SE:<V_widen>
(match_operand:VDI 1 "register_operand" "w"))
(SE:<V_widen>
(match_operand:VDI 2 "register_operand" "w"))))]
"TARGET_NEON"
"vmull.<US><V_sz_elem> %q0, %1, %2"
[(set_attr "neon_type" "neon_shift_1")]
)
(define_expand "vec_widen_<US>mult_hi_<mode>"
[(match_operand:<V_double_width> 0 "register_operand" "")
(SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
(SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
"TARGET_NEON"
{
rtx tmpreg = gen_reg_rtx (<V_widen>mode);
emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
DONE;
}
)
(define_expand "vec_widen_<US>mult_lo_<mode>"
[(match_operand:<V_double_width> 0 "register_operand" "")
(SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
(SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
"TARGET_NEON"
{
rtx tmpreg = gen_reg_rtx (<V_widen>mode);
emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
DONE;
}
)

View File

@ -619,3 +619,61 @@
(and (match_test "TARGET_32BIT")
(match_operand 0 "arm_di_operand"))))
;; Predicates for parallel expanders based on mode.
(define_special_predicate "vect_par_constant_high"
(match_code "parallel")
{
HOST_WIDE_INT count = XVECLEN (op, 0);
int i;
int base = GET_MODE_NUNITS (mode);
if ((count < 1)
|| (count != base/2))
return false;
if (!VECTOR_MODE_P (mode))
return false;
for (i = 0; i < count; i++)
{
rtx elt = XVECEXP (op, 0, i);
int val;
if (GET_CODE (elt) != CONST_INT)
return false;
val = INTVAL (elt);
if (val != (base/2) + i)
return false;
}
return true;
})
(define_special_predicate "vect_par_constant_low"
(match_code "parallel")
{
HOST_WIDE_INT count = XVECLEN (op, 0);
int i;
int base = GET_MODE_NUNITS (mode);
if ((count < 1)
|| (count != base/2))
return false;
if (!VECTOR_MODE_P (mode))
return false;
for (i = 0; i < count; i++)
{
rtx elt = XVECEXP (op, 0, i);
int val;
if (GET_CODE (elt) != CONST_INT)
return false;
val = INTVAL (elt);
if (val != i)
return false;
}
return true;
})

View File

@ -1,3 +1,8 @@
2010-08-25 Tejas Belagod <tejas.belagod@arm.com>
* lib/target-supports.exp (check_effective_target_vect_unpack):
Set vect_unpack supported flag to true for neon.
2010-08-24 Sebastian Pop <sebastian.pop@amd.com>
* gcc.dg/tree-ssa/ifc-5.c: New.

View File

@ -2640,7 +2640,8 @@ proc check_effective_target_vect_unpack { } {
if { ([istarget powerpc*-*-*] && ![istarget powerpc-*paired*])
|| [istarget i?86-*-*]
|| [istarget x86_64-*-*]
|| [istarget spu-*-*] } {
|| [istarget spu-*-*]
|| ([istarget arm*-*-*] && [check_effective_target_arm_neon]) } {
set et_vect_unpack_saved 1
}
}