mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 05:20:24 +08:00
[AArch64] Add truncation for partial SVE modes
This patch adds support for "truncating" to a partial SVE vector from either a full SVE vector or a wider partial vector. This truncation is actually a no-op and so should have zero cost in the vector cost model. 2019-11-16 Richard Sandiford <richard.sandiford@arm.com> gcc/ * config/aarch64/aarch64-sve.md (trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2): New pattern. * config/aarch64/aarch64.c (aarch64_integer_truncation_p): New function. (aarch64_sve_adjust_stmt_cost): Call it. gcc/testsuite/ * gcc.target/aarch64/sve/mask_struct_load_1.c: Add --param aarch64-sve-compare-costs=0. * gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_3.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_4.c: Likewise. * gcc.target/aarch64/sve/mask_struct_load_5.c: Likewise. * gcc.target/aarch64/sve/pack_1.c: Likewise. * gcc.target/aarch64/sve/truncate_1.c: New test. From-SVN: r278344
This commit is contained in:
parent
217ccab8f4
commit
2d56600c8d
@ -1,3 +1,11 @@
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-sve.md
|
||||
(trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2): New pattern.
|
||||
* config/aarch64/aarch64.c (aarch64_integer_truncation_p): New
|
||||
function.
|
||||
(aarch64_sve_adjust_stmt_cost): Call it.
|
||||
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* config/aarch64/aarch64-sve.md
|
||||
|
@ -72,6 +72,7 @@
|
||||
;; ---- [INT] General unary arithmetic corresponding to rtx codes
|
||||
;; ---- [INT] General unary arithmetic corresponding to unspecs
|
||||
;; ---- [INT] Sign and zero extension
|
||||
;; ---- [INT] Truncation
|
||||
;; ---- [INT] Logical inverse
|
||||
;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
|
||||
;; ---- [FP] General unary arithmetic corresponding to unspecs
|
||||
@ -2888,6 +2889,29 @@
|
||||
[(set_attr "movprfx" "*,yes,yes")]
|
||||
)
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; ---- [INT] Truncation
|
||||
;; -------------------------------------------------------------------------
|
||||
;; The patterns in this section are synthetic.
|
||||
;; -------------------------------------------------------------------------
|
||||
|
||||
;; Truncate to a partial SVE vector from either a full vector or a
|
||||
;; wider partial vector. This is a no-op, because we can just ignore
|
||||
;; the unused upper bits of the source.
|
||||
(define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
|
||||
[(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
|
||||
(truncate:SVE_PARTIAL_I
|
||||
(match_operand:SVE_HSDI 1 "register_operand" "w")))]
|
||||
"TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
|
||||
"#"
|
||||
"&& reload_completed"
|
||||
[(set (match_dup 0) (match_dup 1))]
|
||||
{
|
||||
operands[1] = aarch64_replace_reg_mode (operands[1],
|
||||
<SVE_PARTIAL_I:MODE>mode);
|
||||
}
|
||||
)
|
||||
|
||||
;; -------------------------------------------------------------------------
|
||||
;; ---- [INT] Logical inverse
|
||||
;; -------------------------------------------------------------------------
|
||||
|
@ -12901,6 +12901,21 @@ aarch64_extending_load_p (stmt_vec_info stmt_info)
|
||||
&& DR_IS_READ (STMT_VINFO_DATA_REF (def_stmt_info)));
|
||||
}
|
||||
|
||||
/* Return true if STMT_INFO is an integer truncation. */
|
||||
static bool
|
||||
aarch64_integer_truncation_p (stmt_vec_info stmt_info)
|
||||
{
|
||||
gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
|
||||
if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)))
|
||||
return false;
|
||||
|
||||
tree lhs_type = TREE_TYPE (gimple_assign_lhs (assign));
|
||||
tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
|
||||
return (INTEGRAL_TYPE_P (lhs_type)
|
||||
&& INTEGRAL_TYPE_P (rhs_type)
|
||||
&& TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type));
|
||||
}
|
||||
|
||||
/* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
|
||||
for STMT_INFO, which has cost kind KIND. Adjust the cost as necessary
|
||||
for SVE targets. */
|
||||
@ -12919,6 +12934,11 @@ aarch64_sve_adjust_stmt_cost (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
|
||||
if (kind == vector_stmt && aarch64_extending_load_p (stmt_info))
|
||||
stmt_cost = 0;
|
||||
|
||||
/* For similar reasons, vector_stmt integer truncations are a no-op,
|
||||
because we can just ignore the unused upper bits of the source. */
|
||||
if (kind == vector_stmt && aarch64_integer_truncation_p (stmt_info))
|
||||
stmt_cost = 0;
|
||||
|
||||
return stmt_cost;
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,14 @@
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* gcc.target/aarch64/sve/mask_struct_load_1.c: Add
|
||||
--param aarch64-sve-compare-costs=0.
|
||||
* gcc.target/aarch64/sve/mask_struct_load_2.c: Likewise.
|
||||
* gcc.target/aarch64/sve/mask_struct_load_3.c: Likewise.
|
||||
* gcc.target/aarch64/sve/mask_struct_load_4.c: Likewise.
|
||||
* gcc.target/aarch64/sve/mask_struct_load_5.c: Likewise.
|
||||
* gcc.target/aarch64/sve/pack_1.c: Likewise.
|
||||
* gcc.target/aarch64/sve/truncate_1.c: New test.
|
||||
|
||||
2019-11-16 Richard Sandiford <richard.sandiford@arm.com>
|
||||
|
||||
* gcc.target/aarch64/sve/load_extend_1.c: New test.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -ffast-math --param aarch64-sve-compare-costs=0" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
/* { dg-options "-O2 -ftree-vectorize --param aarch64-sve-compare-costs=0" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
44
gcc/testsuite/gcc.target/aarch64/sve/truncate_1.c
Normal file
44
gcc/testsuite/gcc.target/aarch64/sve/truncate_1.c
Normal file
@ -0,0 +1,44 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize" } */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#define TEST_LOOP(TYPE1, TYPE2, SHIFT) \
|
||||
void \
|
||||
f_##TYPE1##_##TYPE2 (TYPE2 *restrict dst, TYPE1 *restrict src1, \
|
||||
TYPE1 *restrict src2, int n) \
|
||||
{ \
|
||||
for (int i = 0; i < n; ++i) \
|
||||
dst[i] = (TYPE1) (src1[i] + src2[i]) >> SHIFT; \
|
||||
}
|
||||
|
||||
#define TEST_ALL(T) \
|
||||
T (uint16_t, uint8_t, 2) \
|
||||
T (uint32_t, uint8_t, 18) \
|
||||
T (uint64_t, uint8_t, 34) \
|
||||
T (uint32_t, uint16_t, 3) \
|
||||
T (uint64_t, uint16_t, 19) \
|
||||
T (uint64_t, uint32_t, 4)
|
||||
|
||||
TEST_ALL (TEST_LOOP)
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.h,} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.s,} 4 } } */
|
||||
/* { dg-final { scan-assembler-times {\tld1d\tz[0-9]+\.d,} 6 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
|
||||
/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #2\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #18\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #34\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #3\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #19\n} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.d, z[0-9]+\.d, #4\n} 1 } } */
|
||||
|
||||
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.h,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.s,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst1b\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.s,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst1h\tz[0-9]+\.d,} 1 } } */
|
||||
/* { dg-final { scan-assembler-times {\tst1w\tz[0-9]+\.d,} 1 } } */
|
Loading…
x
Reference in New Issue
Block a user