mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 05:00:26 +08:00
[AArch64] Pattern-match SVE extending loads
This patch pattern-matches a partial SVE load followed by a sign or zero extension into an extending load. (The partial load is already an extending load; we just don't rely on the upper bits of the elements.) Nothing yet uses the extra LDFF1 and LDNF1 combinations, but it seemed more consistent to provide them, since I needed to update the pattern to use a predicated extension anyway. 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/aarch64-sve.md (@aarch64_load_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>): (@aarch64_load_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>) (@aarch64_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>): Combine into... (@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>): ...this new pattern, handling extension to partial modes as well as full modes. Describe the extension as a predicated rather than unpredicated extension. (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>) (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>) (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>): Combine into... (@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>): ...this new pattern, handling extension to partial modes as well as full modes. Describe the extension as a predicated rather than unpredicated extension. * config/aarch64/aarch64-sve-builtins.cc (function_expander::use_contiguous_load_insn): Add an extra predicate for extending loads. * config/aarch64/aarch64.c (aarch64_extending_load_p): New function. (aarch64_sve_adjust_stmt_cost): Likewise. (aarch64_add_stmt_cost): Use aarch64_sve_adjust_stmt_cost to adjust the cost of SVE vector stmts. gcc/testsuite/ * gcc.target/aarch64/sve/load_extend_1.c: New test. * gcc.target/aarch64/sve/load_extend_2.c: Likewise. * gcc.target/aarch64/sve/load_extend_3.c: Likewise. * gcc.target/aarch64/sve/load_extend_4.c: Likewise. * gcc.target/aarch64/sve/load_extend_5.c: Likewise. * gcc.target/aarch64/sve/load_extend_6.c: Likewise. * gcc.target/aarch64/sve/load_extend_7.c: Likewise. * gcc.target/aarch64/sve/load_extend_8.c: Likewise. * gcc.target/aarch64/sve/load_extend_9.c: Likewise. * gcc.target/aarch64/sve/load_extend_10.c: Likewise. * gcc.target/aarch64/sve/reduc_4.c: Add --param aarch64-sve-compare-costs=0. From-SVN: r278343
This commit is contained in:
parent
e58703e2c1
commit
217ccab8f4
@ -1,3 +1,30 @@
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-sve.md
|
||||
(@aarch64_load_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>):
|
||||
(@aarch64_load_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>)
|
||||
(@aarch64_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>):
|
||||
Combine into...
|
||||
(@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>):
|
||||
...this new pattern, handling extension to partial modes as well
|
||||
as full modes. Describe the extension as a predicated rather than
|
||||
unpredicated extension.
|
||||
(@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>)
|
||||
(@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>)
|
||||
(@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>):
|
||||
Combine into...
|
||||
(@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>):
|
||||
...this new pattern, handling extension to partial modes as well
|
||||
as full modes. Describe the extension as a predicated rather than
|
||||
unpredicated extension.
|
||||
* config/aarch64/aarch64-sve-builtins.cc
|
||||
(function_expander::use_contiguous_load_insn): Add an extra
|
||||
predicate for extending loads.
|
||||
* config/aarch64/aarch64.c (aarch64_extending_load_p): New function.
|
||||
(aarch64_sve_adjust_stmt_cost): Likewise.
|
||||
(aarch64_add_stmt_cost): Use aarch64_sve_adjust_stmt_cost to adjust
|
||||
the cost of SVE vector stmts.
|
||||
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* config/aarch64/iterators.md (SVE_HSDI): New mode iterator.
|
||||
|
@ -2790,7 +2790,9 @@ function_expander::use_vcond_mask_insn (insn_code icode,
|
||||
}
|
||||
|
||||
/* Implement the call using instruction ICODE, which loads memory operand 1
|
||||
into register operand 0 under the control of predicate operand 2. */
|
||||
into register operand 0 under the control of predicate operand 2.
|
||||
Extending loads have a further predicate (operand 3) that nominally
|
||||
controls the extension. */
|
||||
rtx
|
||||
function_expander::use_contiguous_load_insn (insn_code icode)
|
||||
{
|
||||
@ -2799,6 +2801,8 @@ function_expander::use_contiguous_load_insn (insn_code icode)
|
||||
add_output_operand (icode);
|
||||
add_mem_operand (mem_mode, get_contiguous_base (mem_mode));
|
||||
add_input_operand (icode, args[0]);
|
||||
if (GET_MODE_UNIT_BITSIZE (mem_mode) < type_suffix (0).element_bits)
|
||||
add_input_operand (icode, CONSTM1_RTX (VNx16BImode));
|
||||
return generate_insn (icode);
|
||||
}
|
||||
|
||||
|
@ -1189,39 +1189,22 @@
|
||||
;; -------------------------------------------------------------------------
|
||||
|
||||
;; Predicated load and extend, with 8 elements per 128-bit block.
|
||||
(define_insn "@aarch64_load_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>"
|
||||
[(set (match_operand:VNx8_WIDE 0 "register_operand" "=w")
|
||||
(ANY_EXTEND:VNx8_WIDE
|
||||
(unspec:VNx8_NARROW
|
||||
[(match_operand:VNx8BI 2 "register_operand" "Upl")
|
||||
(match_operand:VNx8_NARROW 1 "memory_operand" "m")]
|
||||
UNSPEC_LD1_SVE)))]
|
||||
"TARGET_SVE"
|
||||
"ld1<ANY_EXTEND:s><VNx8_NARROW:Vesize>\t%0.<VNx8_WIDE:Vetype>, %2/z, %1"
|
||||
)
|
||||
|
||||
;; Predicated load and extend, with 4 elements per 128-bit block.
|
||||
(define_insn "@aarch64_load_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
|
||||
[(set (match_operand:VNx4_WIDE 0 "register_operand" "=w")
|
||||
(ANY_EXTEND:VNx4_WIDE
|
||||
(unspec:VNx4_NARROW
|
||||
[(match_operand:VNx4BI 2 "register_operand" "Upl")
|
||||
(match_operand:VNx4_NARROW 1 "memory_operand" "m")]
|
||||
UNSPEC_LD1_SVE)))]
|
||||
"TARGET_SVE"
|
||||
"ld1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.<VNx4_WIDE:Vetype>, %2/z, %1"
|
||||
)
|
||||
|
||||
;; Predicated load and extend, with 2 elements per 128-bit block.
|
||||
(define_insn "@aarch64_load_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
|
||||
[(set (match_operand:VNx2_WIDE 0 "register_operand" "=w")
|
||||
(ANY_EXTEND:VNx2_WIDE
|
||||
(unspec:VNx2_NARROW
|
||||
[(match_operand:VNx2BI 2 "register_operand" "Upl")
|
||||
(match_operand:VNx2_NARROW 1 "memory_operand" "m")]
|
||||
UNSPEC_LD1_SVE)))]
|
||||
"TARGET_SVE"
|
||||
"ld1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.<VNx2_WIDE:Vetype>, %2/z, %1"
|
||||
(define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
|
||||
[(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
|
||||
(unspec:SVE_HSDI
|
||||
[(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
|
||||
(ANY_EXTEND:SVE_HSDI
|
||||
(unspec:SVE_PARTIAL_I
|
||||
[(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
|
||||
(match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
|
||||
UNSPEC_LD1_SVE))]
|
||||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
|
||||
"ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
|
||||
"&& !CONSTANT_P (operands[3])"
|
||||
{
|
||||
operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
|
||||
}
|
||||
)
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
@ -1268,46 +1251,24 @@
|
||||
;; - LDNF1W
|
||||
;; -------------------------------------------------------------------------
|
||||
|
||||
;; Predicated first-faulting or non-faulting load and extend, with 8 elements
|
||||
;; per 128-bit block.
|
||||
(define_insn "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx8_WIDE:mode><VNx8_NARROW:mode>"
|
||||
[(set (match_operand:VNx8_WIDE 0 "register_operand" "=w")
|
||||
(ANY_EXTEND:VNx8_WIDE
|
||||
(unspec:VNx8_NARROW
|
||||
[(match_operand:VNx8BI 2 "register_operand" "Upl")
|
||||
(match_operand:VNx8_NARROW 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
|
||||
(reg:VNx16BI FFRT_REGNUM)]
|
||||
SVE_LDFF1_LDNF1)))]
|
||||
"TARGET_SVE"
|
||||
"ld<fn>f1<ANY_EXTEND:s><VNx8_NARROW:Vesize>\t%0.<VNx8_WIDE:Vetype>, %2/z, %1"
|
||||
)
|
||||
|
||||
;; Predicated first-faulting or non-faulting load and extend, with 4 elements
|
||||
;; per 128-bit block.
|
||||
(define_insn "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
|
||||
[(set (match_operand:VNx4_WIDE 0 "register_operand" "=w")
|
||||
(ANY_EXTEND:VNx4_WIDE
|
||||
(unspec:VNx4_NARROW
|
||||
[(match_operand:VNx4BI 2 "register_operand" "Upl")
|
||||
(match_operand:VNx4_NARROW 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
|
||||
(reg:VNx16BI FFRT_REGNUM)]
|
||||
SVE_LDFF1_LDNF1)))]
|
||||
"TARGET_SVE"
|
||||
"ld<fn>f1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.<VNx4_WIDE:Vetype>, %2/z, %1"
|
||||
)
|
||||
|
||||
;; Predicated first-faulting or non-faulting load and extend, with 2 elements
|
||||
;; per 128-bit block.
|
||||
(define_insn "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
|
||||
[(set (match_operand:VNx2_WIDE 0 "register_operand" "=w")
|
||||
(ANY_EXTEND:VNx2_WIDE
|
||||
(unspec:VNx2_NARROW
|
||||
[(match_operand:VNx2BI 2 "register_operand" "Upl")
|
||||
(match_operand:VNx2_NARROW 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
|
||||
(reg:VNx16BI FFRT_REGNUM)]
|
||||
SVE_LDFF1_LDNF1)))]
|
||||
"TARGET_SVE"
|
||||
"ld<fn>f1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.<VNx2_WIDE:Vetype>, %2/z, %1"
|
||||
;; Predicated first-faulting or non-faulting load and extend.
|
||||
(define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
|
||||
[(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
|
||||
(unspec:SVE_HSDI
|
||||
[(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
|
||||
(ANY_EXTEND:SVE_HSDI
|
||||
(unspec:SVE_PARTIAL_I
|
||||
[(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
|
||||
(match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
|
||||
(reg:VNx16BI FFRT_REGNUM)]
|
||||
SVE_LDFF1_LDNF1))]
|
||||
UNSPEC_PRED_X))]
|
||||
"TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
|
||||
"ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
|
||||
"&& !CONSTANT_P (operands[3])"
|
||||
{
|
||||
operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
|
||||
}
|
||||
)
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
|
@ -12879,6 +12879,49 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
|
||||
}
|
||||
}
|
||||
|
||||
/* Return true if STMT_INFO extends the result of a load. */
|
||||
static bool
|
||||
aarch64_extending_load_p (stmt_vec_info stmt_info)
|
||||
{
|
||||
gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
|
||||
if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
|
||||
return false;
|
||||
|
||||
tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
|
||||
tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
|
||||
tree rhs_type = TREE_TYPE (rhs);
|
||||
if (!INTEGRAL_TYPE_P (lhs_type)
|
||||
|| !INTEGRAL_TYPE_P (rhs_type)
|
||||
|| TYPE_PRECISION (lhs_type) <= TYPE_PRECISION (rhs_type))
|
||||
return false;
|
||||
|
||||
stmt_vec_info def_stmt_info = stmt_info->vinfo->lookup_def (rhs);
|
||||
return (def_stmt_info
|
||||
&& STMT_VINFO_DATA_REF (def_stmt_info)
|
||||
&& DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
|
||||
}
|
||||
|
||||
/* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
|
||||
for STMT_INFO, which has cost kind KIND. Adjust the cost as necessary
|
||||
for SVE targets. */
|
||||
static unsigned int
|
||||
aarch64_sve_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
|
||||
unsigned int stmt_cost)
|
||||
{
|
||||
/* Unlike vec_promote_demote, vector_stmt conversions do not change the
|
||||
vector register size or number of units. Integer promotions of this
|
||||
type therefore map to SXT[BHW] or UXT[BHW].
|
||||
|
||||
Most loads have extending forms that can do the sign or zero extension
|
||||
on the fly. Optimistically assume that a load followed by an extension
|
||||
will fold to this form during combine, and that the extension therefore
|
||||
comes for free. */
|
||||
if (kind == vector_stmt && aarch64_extending_load_p (stmt_info))
|
||||
stmt_cost = 0;
|
||||
|
||||
return stmt_cost;
|
||||
}
|
||||
|
||||
/* Implement targetm.vectorize.add_stmt_cost. */
|
||||
static unsigned
|
||||
aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
|
||||
@ -12894,6 +12937,9 @@ aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
|
||||
int stmt_cost =
|
||||
aarch64_builtin_vectorization_cost (kind, vectype, misalign);
|
||||
|
||||
if (stmt_info && vectype && aarch64_sve_mode_p (TYPE_MODE (vectype)))
|
||||
stmt_cost = aarch64_sve_adjust_stmt_cost (kind, stmt_info, stmt_cost);
|
||||
|
||||
/* Statements in an inner loop relative to the loop being
|
||||
vectorized are weighted more heavily. The value here is
|
||||
arbitrary and could potentially be improved with analysis. */
|
||||
|
@ -1,3 +1,18 @@
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* gcc.target/aarch64/sve/load_extend_1.c: New test.
|
||||
* gcc.target/aarch64/sve/load_extend_2.c: Likewise.
|
||||
* gcc.target/aarch64/sve/load_extend_3.c: Likewise.
|
||||
* gcc.target/aarch64/sve/load_extend_4.c: Likewise.
|
||||
* gcc.target/aarch64/sve/load_extend_5.c: Likewise.
|
||||
* gcc.target/aarch64/sve/load_extend_6.c: Likewise.
|
||||
* gcc.target/aarch64/sve/load_extend_7.c: Likewise.
|
||||
* gcc.target/aarch64/sve/load_extend_8.c: Likewise.
|
||||
* gcc.target/aarch64/sve/load_extend_9.c: Likewise.
|
||||
* gcc.target/aarch64/sve/load_extend_10.c: Likewise.
|
||||
* gcc.target/aarch64/sve/reduc_4.c: Add
|
||||
--param aarch64-sve-compare-costs=0.
|
||||
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* gcc.target/aarch64/sve/cost_model_1.c: Expect the loop to be
|
||||
|
31
gcc/testsuite/gcc.target/aarch64/sve/load_extend_1.c
Normal file
31
gcc/testsuite/gcc.target/aarch64/sve/load_extend_1.c
Normal file
@ -0,0 +1,31 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define TEST_LOOP(TYPE1, TYPE2) \
|
||||
void \
|
||||
f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst, TYPE1 *restrict src1, \
|
||||
TYPE2 *restrict src2, int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; ++i) \
|
||||
dst[i] += src1[i] + src2[i]; \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
T (uint16_t, uint8_t) \
|
||||
T (uint32_t, uint8_t) \
|
||||
T (uint64_t, uint8_t) \
|
||||
T (uint32_t, uint16_t) \
|
||||
T (uint64_t, uint16_t) \
|
||||
T (uint64_t, uint32_t)
|
||||
|
||||
TEST_ALL (TEST_LOOP)
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.h,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tuxt.\t} } } */
|
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_10.c
Normal file
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_10.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void
|
||||
f1 (uint64_t *dst, uint32_t *src1, int16_t *src2)
|
||||
{
|
||||
for (int i = 0; i < 7; ++i)
|
||||
dst[i] += (int32_t) (src1[i] + src2[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1sh\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsxt.\t} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsxtw\tz[0-9]+\.d,} 1 } } */
|
31
gcc/testsuite/gcc.target/aarch64/sve/load_extend_2.c
Normal file
31
gcc/testsuite/gcc.target/aarch64/sve/load_extend_2.c
Normal file
@ -0,0 +1,31 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define TEST_LOOP(TYPE1, TYPE2) \
|
||||
void \
|
||||
f_##TYPE1##_##TYPE2 (TYPE1 *restrict dst, TYPE1 *restrict src1, \
|
||||
TYPE2 *restrict src2, int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; ++i) \
|
||||
dst[i] += src1[i] + src2[i]; \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
T (int16_t, int8_t) \
|
||||
T (int32_t, int8_t) \
|
||||
T (int64_t, int8_t) \
|
||||
T (int32_t, int16_t) \
|
||||
T (int64_t, int16_t) \
|
||||
T (int64_t, int32_t)
|
||||
|
||||
TEST_ALL (TEST_LOOP)
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.h,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.s,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1sh\tz[0-9]+\.s,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1sh\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1sw\tz[0-9]+\.d,} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-not {\tsxt.\t} } } */
|
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_3.c
Normal file
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_3.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void
|
||||
f1 (uint32_t *dst, uint16_t *src1, uint8_t *src2)
|
||||
{
|
||||
for (int i = 0; i < 7; ++i)
|
||||
dst[i] += (uint16_t) (src1[i] + src2[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.s,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tuxt.\t} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.s,} 1 } } */
|
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_4.c
Normal file
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_4.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void
|
||||
f1 (uint64_t *dst, uint16_t *src1, uint8_t *src2)
|
||||
{
|
||||
for (int i = 0; i < 7; ++i)
|
||||
dst[i] += (uint16_t) (src1[i] + src2[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tuxt.\t} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tuxth\tz[0-9]+\.d,} 1 } } */
|
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_5.c
Normal file
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_5.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void
|
||||
f1 (uint64_t *dst, uint32_t *src1, uint8_t *src2)
|
||||
{
|
||||
for (int i = 0; i < 7; ++i)
|
||||
dst[i] += (uint32_t) (src1[i] + src2[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1b\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tuxt.\t} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 1 } } */
|
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_6.c
Normal file
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_6.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void
|
||||
f1 (uint64_t *dst, uint32_t *src1, uint16_t *src2)
|
||||
{
|
||||
for (int i = 0; i < 7; ++i)
|
||||
dst[i] += (uint32_t) (src1[i] + src2[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tuxt.\t} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tuxtw\tz[0-9]+\.d,} 1 } } */
|
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_7.c
Normal file
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_7.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void
|
||||
f1 (uint32_t *dst, uint16_t *src1, int8_t *src2)
|
||||
{
|
||||
for (int i = 0; i < 7; ++i)
|
||||
dst[i] += (int16_t) (src1[i] + src2[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.s,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsxt.\t} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.s,} 1 } } */
|
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_8.c
Normal file
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_8.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void
|
||||
f1 (uint64_t *dst, uint16_t *src1, int8_t *src2)
|
||||
{
|
||||
for (int i = 0; i < 7; ++i)
|
||||
dst[i] += (int16_t) (src1[i] + src2[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsxt.\t} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsxth\tz[0-9]+\.d,} 1 } } */
|
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_9.c
Normal file
21
gcc/testsuite/gcc.target/aarch64/sve/load_extend_9.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize -msve-vector-bits=512" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
void
|
||||
f1 (uint64_t *dst, uint32_t *src1, int8_t *src2)
|
||||
{
|
||||
for (int i = 0; i < 7; ++i)
|
||||
dst[i] += (int32_t) (src1[i] + src2[i]);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1sb\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tsxt.\t} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tsxtw\tz[0-9]+\.d,} 1 } } */
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
|
||||
|
||||
double
|
||||
f (double *restrict a, double *restrict b, int *lookup)
|
||||
|
Loading…
x
Reference in New Issue
Block a user