[AARCH64] Add support for floating-point vcond.

gcc/
	* config/aarch64/aarch64-simd.md
	(aarch64_simd_bsl<mode>_internal): Add floating-point modes.
	(aarch64_simd_bsl): Likewise.
	(aarch64_vcond_internal<mode>): Likewise.
	(vcond<mode><mode>): Likewise.
	(aarch64_cm<cmp><mode>): Fix constraints, add new modes.
	* config/aarch64/iterators.md (V_cmp_result): Add V2DF.

gcc/testsuite/
	* gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c: New.
	* gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
	* gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
	* gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
	* gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
	* gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
	* gcc/testsuite/gcc.target/aarch64/vect-fcm.x: Likewise.
	* gcc/testsuite/lib/target-supports.exp
	(check_effective_target_vect_cond): Enable for AArch64.

From-SVN: r195018
This commit is contained in:
James Greenhalgh 2013-01-08 14:57:33 +00:00 committed by James Greenhalgh
parent 4dcd1054bf
commit 385eb93d42
11 changed files with 250 additions and 20 deletions

View File

@ -1,3 +1,13 @@
2013-01-08 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-simd.md
(aarch64_simd_bsl<mode>_internal): Add floating-point modes.
(aarch64_simd_bsl): Likewise.
(aarch64_vcond_internal<mode>): Likewise.
(vcond<mode><mode>): Likewise.
(aarch64_cm<cmp><mode>): Fix constraints, add new modes.
* config/aarch64/iterators.md (V_cmp_result): Add V2DF.
2013-01-08 James Greenhalgh <james.greenhalgh@arm.com>
* config/aarch64/aarch64-builtins.c

View File

@ -1463,7 +1463,7 @@
(set_attr "simd_mode" "V2SI")]
)
;; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
;; vbsl_* intrinsics may compile to any of bsl/bif/bit depending on register
;; allocation. For an intrinsic of form:
;; vD = bsl_* (vS, vN, vM)
;; We can use any of:
@ -1472,11 +1472,12 @@
;; bif vD, vM, vS (if D = N, so 0-bits in vS choose bits from vM, else vN)
(define_insn "aarch64_simd_bsl<mode>_internal"
[(set (match_operand:VDQ 0 "register_operand" "=w,w,w")
(unspec:VDQ [(match_operand:VDQ 1 "register_operand" " 0,w,w")
(match_operand:VDQ 2 "register_operand" " w,w,0")
(match_operand:VDQ 3 "register_operand" " w,0,w")]
UNSPEC_BSL))]
[(set (match_operand:VALL 0 "register_operand" "=w,w,w")
(unspec:VALL
[(match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
(match_operand:VALL 2 "register_operand" " w,w,0")
(match_operand:VALL 3 "register_operand" " w,0,w")]
UNSPEC_BSL))]
"TARGET_SIMD"
"@
bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
@ -1485,15 +1486,15 @@
)
(define_expand "aarch64_simd_bsl<mode>"
[(set (match_operand:VDQ 0 "register_operand")
(unspec:VDQ [(match_operand:<V_cmp_result> 1 "register_operand")
(match_operand:VDQ 2 "register_operand")
(match_operand:VDQ 3 "register_operand")]
UNSPEC_BSL))]
[(set (match_operand:VALL 0 "register_operand")
(unspec:VALL [(match_operand:<V_cmp_result> 1 "register_operand")
(match_operand:VALL 2 "register_operand")
(match_operand:VALL 3 "register_operand")]
UNSPEC_BSL))]
"TARGET_SIMD"
{
/* We can't alias operands together if they have different modes. */
operands[1] = gen_lowpart (<MODE>mode, operands[1]);
operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
})
(define_expand "aarch64_vcond_internal<mode>"
@ -1574,14 +1575,64 @@
DONE;
})
(define_expand "vcond<mode><mode>"
[(set (match_operand:VDQ 0 "register_operand")
(if_then_else:VDQ
(define_expand "aarch64_vcond_internal<mode>"
[(set (match_operand:VDQF 0 "register_operand")
(if_then_else:VDQF
(match_operator 3 "comparison_operator"
[(match_operand:VDQ 4 "register_operand")
(match_operand:VDQ 5 "nonmemory_operand")])
(match_operand:VDQ 1 "register_operand")
(match_operand:VDQ 2 "register_operand")))]
[(match_operand:VDQF 4 "register_operand")
(match_operand:VDQF 5 "nonmemory_operand")])
(match_operand:VDQF 1 "register_operand")
(match_operand:VDQF 2 "register_operand")))]
"TARGET_SIMD"
{
int inverse = 0;
rtx mask = gen_reg_rtx (<V_cmp_result>mode);
if (!REG_P (operands[5])
&& (operands[5] != CONST0_RTX (<MODE>mode)))
operands[5] = force_reg (<MODE>mode, operands[5]);
switch (GET_CODE (operands[3]))
{
case LT:
inverse = 1;
/* Fall through. */
case GE:
emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
break;
case LE:
inverse = 1;
/* Fall through. */
case GT:
emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
break;
case NE:
inverse = 1;
/* Fall through. */
case EQ:
emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
break;
default:
gcc_unreachable ();
}
if (inverse)
emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[2],
operands[1]));
else
emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask, operands[1],
operands[2]));
DONE;
})
(define_expand "vcond<mode><mode>"
[(set (match_operand:VALL 0 "register_operand")
(if_then_else:VALL
(match_operator 3 "comparison_operator"
[(match_operand:VALL 4 "register_operand")
(match_operand:VALL 5 "nonmemory_operand")])
(match_operand:VALL 1 "register_operand")
(match_operand:VALL 2 "register_operand")))]
"TARGET_SIMD"
{
emit_insn (gen_aarch64_vcond_internal<mode> (operands[0], operands[1],
@ -2866,6 +2917,22 @@
(set_attr "simd_mode" "<MODE>")]
)
;; fcm(eq|ge|le|lt|gt)
(define_insn "aarch64_cm<cmp><mode>"
[(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
(unspec:<V_cmp_result>
[(match_operand:VDQF 1 "register_operand" "w,w")
(match_operand:VDQF 2 "aarch64_simd_reg_or_zero" "w,Dz")]
VCMP_S))]
"TARGET_SIMD"
"@
fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
fcm<cmp>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
[(set_attr "simd_type" "simd_fcmp")
(set_attr "simd_mode" "<MODE>")]
)
;; addp
(define_insn "aarch64_addp<mode>"

View File

@ -1,3 +1,15 @@
2013-01-08 James Greenhalgh <james.greenhalgh@arm.com>
* gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-d.c: New.
* gcc/testsuite/gcc.target/aarch64/vect-fcm-eq-f.c: Likewise.
* gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-d.c: Likewise.
* gcc/testsuite/gcc.target/aarch64/vect-fcm-ge-f.c: Likewise.
* gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-d.c: Likewise.
* gcc/testsuite/gcc.target/aarch64/vect-fcm-gt-f.c: Likewise.
* gcc/testsuite/gcc.target/aarch64/vect-fcm.x: Likewise.
* gcc/testsuite/lib/target-supports.exp
(check_effective_target_vect_cond): Enable for AArch64.
2013-01-08 James Greenhalgh <james.greenhalgh@arm.com>
* gcc.target/aarch64/vsqrt.c (test_square_root_v2sf): Use

View File

@ -0,0 +1,14 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE double
#define OP ==
#define INV_OP !=
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -0,0 +1,14 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE float
#define OP ==
#define INV_OP !=
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
/* { dg-final { scan-assembler-times "fcmeq\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -0,0 +1,14 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE double
#define OP >=
#define INV_OP <
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -0,0 +1,14 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE float
#define OP >=
#define INV_OP <
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
/* { dg-final { scan-assembler-times "fcmge\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -0,0 +1,14 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE double
#define OP >
#define INV_OP <=
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, v\[0-9\]+\.2d" 2 } } */
/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.2d, v\[0-9\]+\.2d, 0" 1 } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -0,0 +1,14 @@
/* { dg-do run } */
/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-all -fno-unroll-loops --save-temps -fno-inline" } */
#define FTYPE float
#define OP >
#define INV_OP <=
#include "vect-fcm.x"
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 3 "vect" } } */
/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s" 2 } } */
/* { dg-final { scan-assembler-times "fcmgt\\tv\[0-9\]+\.\[24\]s, v\[0-9\]+\.\[24\]s, 0" 1 } } */
/* { dg-final { cleanup-tree-dump "vect" } } */
/* { dg-final { cleanup-saved-temps } } */

View File

@ -0,0 +1,56 @@
#include <stdlib.h>
#define N 16
FTYPE input1[N] =
{2.0, 4.0, 8.0, 16.0,
2.125, 4.25, 8.5, 17.0,
-2.0, -4.0, -8.0, -16.0,
-2.125, -4.25, -8.5, -17.0};
FTYPE input2[N] =
{-2.0, 4.0, -8.0, 16.0,
2.125, -4.25, 8.5, -17.0,
2.0, -4.0, 8.0, -16.0,
-2.125, 4.25, -8.5, 17.0};
void
foo (FTYPE *in1, FTYPE *in2, FTYPE *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = (in1[i] OP in2[i]) ? 2.0 : 4.0;
}
void
bar (FTYPE *in1, FTYPE *in2, FTYPE *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = (in1[i] INV_OP in2[i]) ? 4.0 : 2.0;
}
void
foobar (FTYPE *in1, FTYPE *in2, FTYPE *output)
{
int i = 0;
/* Vectorizable. */
for (i = 0; i < N; i++)
output[i] = (in1[i] OP 0.0) ? 4.0 : 2.0;
}
int
main (int argc, char **argv)
{
FTYPE out1[N];
FTYPE out2[N];
int i = 0;
foo (input1, input2, out1);
bar (input1, input2, out2);
for (i = 0; i < N; i++)
if (out1[i] != out2[i])
abort ();
return 0;
}

View File

@ -3699,7 +3699,8 @@ proc check_effective_target_vect_condition { } {
verbose "check_effective_target_vect_cond: using cached result" 2
} else {
set et_vect_cond_saved 0
if { [istarget powerpc*-*-*]
if { [istarget aarch64*-*-*]
|| [istarget powerpc*-*-*]
|| [istarget ia64-*-*]
|| [istarget i?86-*-*]
|| [istarget spu-*-*]