mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-02-19 23:00:24 +08:00
[ARM] Turning off 64bits ops in Neon
2013-03-21 Christophe Lyon <christophe.lyon@linaro.org> gcc/ * config/arm/arm-protos.h (tune_params): Add prefer_neon_for_64bits field. * config/arm/arm.c (prefer_neon_for_64bits): New variable. (arm_slowmul_tune): Default prefer_neon_for_64bits to false. (arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto. (arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto. (arm_cortex_a15_tune, arm_cortex_a5_tune): Ditto. (arm_cortex_a9_tune, arm_v6m_tune, arm_fa726te_tune): Ditto. (arm_option_override): Handle -mneon-for-64bits new option. * config/arm/arm.h (TARGET_PREFER_NEON_64BITS): New macro. (prefer_neon_for_64bits): Declare new variable. * config/arm/arm.md (arch): Rename neon_onlya8 and neon_nota8 to avoid_neon_for_64bits and neon_for_64bits. Remove onlya8 and nota8. (arch_enabled): Handle new arch types. Remove support for onlya8 and nota8. (one_cmpldi2): Use new arch names. * config/arm/arm.opt (mneon-for-64bits): Add option. * config/arm/neon.md (adddi3_neon, subdi3_neon, iordi3_neon) (anddi3_neon, xordi3_neon, ashldi3_neon, <shift>di3_neon): Use neon_for_64bits instead of nota8 and avoid_neon_for_64bits instead of onlya8. * doc/invoke.texi (-mneon-for-64bits): Document. gcc/testsuite: * gcc.target/arm/neon-for-64bits-1.c: New tests. * gcc.target/arm/neon-for-64bits-2.c: Likewise. From-SVN: r196876
This commit is contained in:
parent
5a2d2a7900
commit
65074f54c5
@ -1,3 +1,29 @@
|
||||
2013-03-21 Christophe Lyon <christophe.lyon@linaro.org>
|
||||
|
||||
* config/arm/arm-protos.h (tune_params): Add
|
||||
prefer_neon_for_64bits field.
|
||||
* config/arm/arm.c (prefer_neon_for_64bits): New variable.
|
||||
(arm_slowmul_tune): Default prefer_neon_for_64bits to false.
|
||||
(arm_fastmul_tune, arm_strongarm_tune, arm_xscale_tune): Ditto.
|
||||
(arm_9e_tune, arm_v6t2_tune, arm_cortex_tune): Ditto.
|
||||
(arm_cortex_a15_tune, arm_cortex_a5_tune): Ditto.
|
||||
(arm_cortex_a9_tune, arm_v6m_tune, arm_fa726te_tune): Ditto.
|
||||
(arm_option_override): Handle -mneon-for-64bits new option.
|
||||
* config/arm/arm.h (TARGET_PREFER_NEON_64BITS): New macro.
|
||||
(prefer_neon_for_64bits): Declare new variable.
|
||||
* config/arm/arm.md (arch): Rename neon_onlya8 and neon_nota8 to
|
||||
avoid_neon_for_64bits and neon_for_64bits. Remove onlya8 and
|
||||
nota8.
|
||||
(arch_enabled): Handle new arch types. Remove support for onlya8
|
||||
and nota8.
|
||||
(one_cmpldi2): Use new arch names.
|
||||
* config/arm/arm.opt (mneon-for-64bits): Add option.
|
||||
* config/arm/neon.md (adddi3_neon, subdi3_neon, iordi3_neon)
|
||||
(anddi3_neon, xordi3_neon, ashldi3_neon, <shift>di3_neon): Use
|
||||
neon_for_64bits instead of nota8 and avoid_neon_for_64bits instead
|
||||
of onlya8.
|
||||
* doc/invoke.texi (-mneon-for-64bits): Document.
|
||||
|
||||
2013-03-21 Richard Biener <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/39326
|
||||
|
@ -269,6 +269,8 @@ struct tune_params
|
||||
bool logical_op_non_short_circuit[2];
|
||||
/* Vectorizer costs. */
|
||||
const struct cpu_vec_costs* vec_costs;
|
||||
/* Prefer Neon for 64-bit bitops. */
|
||||
bool prefer_neon_for_64bits;
|
||||
};
|
||||
|
||||
extern const struct tune_params *current_tune;
|
||||
|
@ -839,6 +839,10 @@ int arm_arch_thumb2;
|
||||
int arm_arch_arm_hwdiv;
|
||||
int arm_arch_thumb_hwdiv;
|
||||
|
||||
/* Nonzero if we should use Neon to handle 64-bits operations rather
|
||||
than core registers. */
|
||||
int prefer_neon_for_64bits = 0;
|
||||
|
||||
/* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
|
||||
we must report the mode of the memory reference from
|
||||
TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
|
||||
@ -936,6 +940,7 @@ const struct tune_params arm_slowmul_tune =
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_fastmul_tune =
|
||||
@ -950,6 +955,7 @@ const struct tune_params arm_fastmul_tune =
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
/* StrongARM has early execution of branches, so a sequence that is worth
|
||||
@ -967,6 +973,7 @@ const struct tune_params arm_strongarm_tune =
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_xscale_tune =
|
||||
@ -981,6 +988,7 @@ const struct tune_params arm_xscale_tune =
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_9e_tune =
|
||||
@ -995,6 +1003,7 @@ const struct tune_params arm_9e_tune =
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_v6t2_tune =
|
||||
@ -1009,6 +1018,7 @@ const struct tune_params arm_v6t2_tune =
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
/* Generic Cortex tuning. Use more specific tunings if appropriate. */
|
||||
@ -1024,6 +1034,7 @@ const struct tune_params arm_cortex_tune =
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_cortex_a15_tune =
|
||||
@ -1038,6 +1049,7 @@ const struct tune_params arm_cortex_a15_tune =
|
||||
true, /* Prefer LDRD/STRD. */
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
/* Branches can be dual-issued on Cortex-A5, so conditional execution is
|
||||
@ -1055,6 +1067,7 @@ const struct tune_params arm_cortex_a5_tune =
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{false, false}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_cortex_a9_tune =
|
||||
@ -1069,6 +1082,7 @@ const struct tune_params arm_cortex_a9_tune =
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
|
||||
@ -1085,6 +1099,7 @@ const struct tune_params arm_v6m_tune =
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{false, false}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
const struct tune_params arm_fa726te_tune =
|
||||
@ -1099,6 +1114,7 @@ const struct tune_params arm_fa726te_tune =
|
||||
false, /* Prefer LDRD/STRD. */
|
||||
{true, true}, /* Prefer non short circuit. */
|
||||
&arm_default_vec_cost, /* Vectorizer costs. */
|
||||
false /* Prefer Neon for 64-bits bitops. */
|
||||
};
|
||||
|
||||
|
||||
@ -2129,6 +2145,12 @@ arm_option_override (void)
|
||||
global_options.x_param_values,
|
||||
global_options_set.x_param_values);
|
||||
|
||||
/* Use Neon to perform 64-bits operations rather than core
|
||||
registers. */
|
||||
prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
|
||||
if (use_neon_for_64bits == 1)
|
||||
prefer_neon_for_64bits = true;
|
||||
|
||||
/* Use the alternative scheduling-pressure algorithm by default. */
|
||||
maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
|
||||
global_options.x_param_values,
|
||||
|
@ -354,6 +354,9 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
|
||||
#define TARGET_IDIV ((TARGET_ARM && arm_arch_arm_hwdiv) \
|
||||
|| (TARGET_THUMB2 && arm_arch_thumb_hwdiv))
|
||||
|
||||
/* Should NEON be used for 64-bits bitops. */
|
||||
#define TARGET_PREFER_NEON_64BITS (prefer_neon_for_64bits)
|
||||
|
||||
/* True iff the full BPABI is being used. If TARGET_BPABI is true,
|
||||
then TARGET_AAPCS_BASED must be true -- but the converse does not
|
||||
hold. TARGET_BPABI implies the use of the BPABI runtime library,
|
||||
@ -539,6 +542,10 @@ extern int arm_arch_arm_hwdiv;
|
||||
/* Nonzero if chip supports integer division instruction in Thumb mode. */
|
||||
extern int arm_arch_thumb_hwdiv;
|
||||
|
||||
/* Nonzero if we should use Neon to handle 64-bits operations rather
|
||||
than core registers. */
|
||||
extern int prefer_neon_for_64bits;
|
||||
|
||||
#ifndef TARGET_DEFAULT
|
||||
#define TARGET_DEFAULT (MASK_APCS_FRAME)
|
||||
#endif
|
||||
|
@ -94,7 +94,7 @@
|
||||
; for ARM or Thumb-2 with arm_arch6, and nov6 for ARM without
|
||||
; arm_arch6. This attribute is used to compute attribute "enabled",
|
||||
; use type "any" to enable an alternative in all cases.
|
||||
(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,onlya8,neon_onlya8,nota8,neon_nota8,iwmmxt,iwmmxt2"
|
||||
(define_attr "arch" "any,a,t,32,t1,t2,v6,nov6,neon_for_64bits,avoid_neon_for_64bits,iwmmxt,iwmmxt2"
|
||||
(const_string "any"))
|
||||
|
||||
(define_attr "arch_enabled" "no,yes"
|
||||
@ -129,22 +129,14 @@
|
||||
(match_test "TARGET_32BIT && !arm_arch6"))
|
||||
(const_string "yes")
|
||||
|
||||
(and (eq_attr "arch" "onlya8")
|
||||
(eq_attr "tune" "cortexa8"))
|
||||
(and (eq_attr "arch" "avoid_neon_for_64bits")
|
||||
(match_test "TARGET_NEON")
|
||||
(not (match_test "TARGET_PREFER_NEON_64BITS")))
|
||||
(const_string "yes")
|
||||
|
||||
(and (eq_attr "arch" "neon_onlya8")
|
||||
(eq_attr "tune" "cortexa8")
|
||||
(match_test "TARGET_NEON"))
|
||||
(const_string "yes")
|
||||
|
||||
(and (eq_attr "arch" "nota8")
|
||||
(not (eq_attr "tune" "cortexa8")))
|
||||
(const_string "yes")
|
||||
|
||||
(and (eq_attr "arch" "neon_nota8")
|
||||
(not (eq_attr "tune" "cortexa8"))
|
||||
(match_test "TARGET_NEON"))
|
||||
(and (eq_attr "arch" "neon_for_64bits")
|
||||
(match_test "TARGET_NEON")
|
||||
(match_test "TARGET_PREFER_NEON_64BITS"))
|
||||
(const_string "yes")
|
||||
|
||||
(and (eq_attr "arch" "iwmmxt2")
|
||||
@ -4330,7 +4322,7 @@
|
||||
[(set_attr "length" "*,8,8,*")
|
||||
(set_attr "predicable" "no,yes,yes,no")
|
||||
(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
|
||||
(set_attr "arch" "neon_nota8,*,*,neon_onlya8")]
|
||||
(set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
|
||||
)
|
||||
|
||||
(define_expand "one_cmplsi2"
|
||||
@ -4498,7 +4490,7 @@
|
||||
"TARGET_32BIT <qhs_zextenddi_cond>"
|
||||
"#"
|
||||
[(set_attr "length" "8,4,8,8")
|
||||
(set_attr "arch" "neon_nota8,*,*,neon_onlya8")
|
||||
(set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")
|
||||
(set_attr "ce_count" "2")
|
||||
(set_attr "predicable" "yes")]
|
||||
)
|
||||
@ -4513,7 +4505,7 @@
|
||||
(set_attr "ce_count" "2")
|
||||
(set_attr "shift" "1")
|
||||
(set_attr "predicable" "yes")
|
||||
(set_attr "arch" "neon_nota8,*,a,t,neon_onlya8")]
|
||||
(set_attr "arch" "neon_for_64bits,*,a,t,avoid_neon_for_64bits")]
|
||||
)
|
||||
|
||||
;; Splits for all extensions to DImode
|
||||
|
@ -247,3 +247,7 @@ that may trigger Cortex-M3 errata.
|
||||
munaligned-access
|
||||
Target Report Var(unaligned_access) Init(2)
|
||||
Enable unaligned word and halfword accesses to packed data.
|
||||
|
||||
mneon-for-64bits
|
||||
Target Report RejectNegative Var(use_neon_for_64bits) Init(0)
|
||||
Use Neon to perform 64-bits operations rather than core registers.
|
||||
|
@ -487,7 +487,7 @@
|
||||
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1,*,*,*")
|
||||
(set_attr "conds" "*,clob,clob,*,clob,clob,clob")
|
||||
(set_attr "length" "*,8,8,*,8,8,8")
|
||||
(set_attr "arch" "nota8,*,*,onlya8,*,*,*")]
|
||||
(set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
|
||||
)
|
||||
|
||||
(define_insn "*sub<mode>3_neon"
|
||||
@ -524,7 +524,7 @@
|
||||
[(set_attr "neon_type" "neon_int_2,*,*,*,neon_int_2")
|
||||
(set_attr "conds" "*,clob,clob,clob,*")
|
||||
(set_attr "length" "*,8,8,8,*")
|
||||
(set_attr "arch" "nota8,*,*,*,onlya8")]
|
||||
(set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
|
||||
)
|
||||
|
||||
(define_insn "*mul<mode>3_neon"
|
||||
@ -699,7 +699,7 @@
|
||||
}
|
||||
[(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
|
||||
(set_attr "length" "*,*,8,8,*,*")
|
||||
(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
|
||||
(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
|
||||
)
|
||||
|
||||
;; The concrete forms of the Neon immediate-logic instructions are vbic and
|
||||
@ -744,7 +744,7 @@
|
||||
}
|
||||
[(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
|
||||
(set_attr "length" "*,*,8,8,*,*")
|
||||
(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")]
|
||||
(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
|
||||
)
|
||||
|
||||
(define_insn "orn<mode>3_neon"
|
||||
@ -840,7 +840,7 @@
|
||||
veor\t%P0, %P1, %P2"
|
||||
[(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
|
||||
(set_attr "length" "*,8,8,*")
|
||||
(set_attr "arch" "nota8,*,*,onlya8")]
|
||||
(set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
|
||||
)
|
||||
|
||||
(define_insn "one_cmpl<mode>2"
|
||||
@ -1162,7 +1162,7 @@
|
||||
}
|
||||
DONE;
|
||||
}"
|
||||
[(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
|
||||
[(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
|
||||
(set_attr "opt" "*,*,speed,speed,*,*")]
|
||||
)
|
||||
|
||||
@ -1263,7 +1263,7 @@
|
||||
|
||||
DONE;
|
||||
}"
|
||||
[(set_attr "arch" "nota8,nota8,*,*,onlya8,onlya8")
|
||||
[(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
|
||||
(set_attr "opt" "*,*,speed,speed,*,*")]
|
||||
)
|
||||
|
||||
|
@ -510,7 +510,8 @@ Objective-C and Objective-C++ Dialects}.
|
||||
-mtp=@var{name} -mtls-dialect=@var{dialect} @gol
|
||||
-mword-relocations @gol
|
||||
-mfix-cortex-m3-ldrd @gol
|
||||
-munaligned-access}
|
||||
-munaligned-access @gol
|
||||
-mneon-for-64bits}
|
||||
|
||||
@emph{AVR Options}
|
||||
@gccoptlist{-mmcu=@var{mcu} -maccumulate-args -mbranch-cost=@var{cost} @gol
|
||||
@ -11530,6 +11531,11 @@ setting of this option. If unaligned access is enabled then the
|
||||
preprocessor symbol @code{__ARM_FEATURE_UNALIGNED} will also be
|
||||
defined.
|
||||
|
||||
@item -mneon-for-64bits
|
||||
@opindex mneon-for-64bits
|
||||
Enables using Neon to handle scalar 64-bits operations. This is
|
||||
disabled by default since the cost of moving data from core registers
|
||||
to Neon is high.
|
||||
@end table
|
||||
|
||||
@node AVR Options
|
||||
|
@ -1,3 +1,8 @@
|
||||
2013-03-21 Christophe Lyon <christophe.lyon@linaro.org>
|
||||
|
||||
* gcc.target/arm/neon-for-64bits-1.c: New tests.
|
||||
* gcc.target/arm/neon-for-64bits-2.c: Likewise.
|
||||
|
||||
2013-03-21 Richard Biener <rguenther@suse.de>
|
||||
|
||||
* gcc.dg/vect/vect-outer-3a-big-array.c: Adjust.
|
||||
@ -19,7 +24,6 @@
|
||||
2013-03-20 Jeff Law <law@redhat.com>
|
||||
|
||||
* g++.dg/tree-ssa/ssa-dom.C: New test.
|
||||
|
||||
|
||||
2013-03-20 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
|
54
gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
Normal file
54
gcc/testsuite/gcc.target/arm/neon-for-64bits-1.c
Normal file
@ -0,0 +1,54 @@
|
||||
/* Check that Neon is *not* used by default to handle 64-bits scalar
|
||||
operations. */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_neon_ok } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
typedef long long i64;
|
||||
typedef unsigned long long u64;
|
||||
typedef unsigned int u32;
|
||||
typedef int i32;
|
||||
|
||||
/* Unary operators */
|
||||
#define UNARY_OP(name, op) \
|
||||
void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
|
||||
|
||||
/* Binary operators */
|
||||
#define BINARY_OP(name, op) \
|
||||
void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
|
||||
|
||||
/* Unsigned shift */
|
||||
#define SHIFT_U(name, op, amount) \
|
||||
void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
|
||||
|
||||
/* Signed shift */
|
||||
#define SHIFT_S(name, op, amount) \
|
||||
void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
|
||||
|
||||
UNARY_OP(not, ~)
|
||||
|
||||
BINARY_OP(add, +)
|
||||
BINARY_OP(sub, -)
|
||||
BINARY_OP(and, &)
|
||||
BINARY_OP(or, |)
|
||||
BINARY_OP(xor, ^)
|
||||
|
||||
SHIFT_U(right1, >>, 1)
|
||||
SHIFT_U(right2, >>, 2)
|
||||
SHIFT_U(right5, >>, 5)
|
||||
SHIFT_U(rightn, >>, c)
|
||||
|
||||
SHIFT_S(right1, >>, 1)
|
||||
SHIFT_S(right2, >>, 2)
|
||||
SHIFT_S(right5, >>, 5)
|
||||
SHIFT_S(rightn, >>, c)
|
||||
|
||||
/* { dg-final {scan-assembler-times "vmvn" 0} } */
|
||||
/* { dg-final {scan-assembler-times "vadd" 0} } */
|
||||
/* { dg-final {scan-assembler-times "vsub" 0} } */
|
||||
/* { dg-final {scan-assembler-times "vand" 0} } */
|
||||
/* { dg-final {scan-assembler-times "vorr" 0} } */
|
||||
/* { dg-final {scan-assembler-times "veor" 0} } */
|
||||
/* { dg-final {scan-assembler-times "vshr" 0} } */
|
57
gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
Normal file
57
gcc/testsuite/gcc.target/arm/neon-for-64bits-2.c
Normal file
@ -0,0 +1,57 @@
|
||||
/* Check that Neon is used to handle 64-bits scalar operations. */
|
||||
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_neon_ok } */
|
||||
/* { dg-options "-O2 -mneon-for-64bits" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
typedef long long i64;
|
||||
typedef unsigned long long u64;
|
||||
typedef unsigned int u32;
|
||||
typedef int i32;
|
||||
|
||||
/* Unary operators */
|
||||
#define UNARY_OP(name, op) \
|
||||
void unary_##name(u64 *a, u64 *b) { *a = op (*b + 0x1234567812345678ULL) ; }
|
||||
|
||||
/* Binary operators */
|
||||
#define BINARY_OP(name, op) \
|
||||
void binary_##name(u64 *a, u64 *b, u64 *c) { *a = *b op *c ; }
|
||||
|
||||
/* Unsigned shift */
|
||||
#define SHIFT_U(name, op, amount) \
|
||||
void ushift_##name(u64 *a, u64 *b, int c) { *a = *b op amount; }
|
||||
|
||||
/* Signed shift */
|
||||
#define SHIFT_S(name, op, amount) \
|
||||
void sshift_##name(i64 *a, i64 *b, int c) { *a = *b op amount; }
|
||||
|
||||
UNARY_OP(not, ~)
|
||||
|
||||
BINARY_OP(add, +)
|
||||
BINARY_OP(sub, -)
|
||||
BINARY_OP(and, &)
|
||||
BINARY_OP(or, |)
|
||||
BINARY_OP(xor, ^)
|
||||
|
||||
SHIFT_U(right1, >>, 1)
|
||||
SHIFT_U(right2, >>, 2)
|
||||
SHIFT_U(right5, >>, 5)
|
||||
SHIFT_U(rightn, >>, c)
|
||||
|
||||
SHIFT_S(right1, >>, 1)
|
||||
SHIFT_S(right2, >>, 2)
|
||||
SHIFT_S(right5, >>, 5)
|
||||
SHIFT_S(rightn, >>, c)
|
||||
|
||||
/* { dg-final {scan-assembler-times "vmvn" 1} } */
|
||||
/* Two vadd: 1 in unary_not, 1 in binary_add */
|
||||
/* { dg-final {scan-assembler-times "vadd" 2} } */
|
||||
/* { dg-final {scan-assembler-times "vsub" 1} } */
|
||||
/* { dg-final {scan-assembler-times "vand" 1} } */
|
||||
/* { dg-final {scan-assembler-times "vorr" 1} } */
|
||||
/* { dg-final {scan-assembler-times "veor" 1} } */
|
||||
/* 6 vshr for right shifts by constant, and variable right shift uses
|
||||
vshl with a negative amount in register. */
|
||||
/* { dg-final {scan-assembler-times "vshr" 6} } */
|
||||
/* { dg-final {scan-assembler-times "vshl" 2} } */
|
Loading…
Reference in New Issue
Block a user