aarch64: Add support for unpacked SVE shifts

This patch adds support for unpacked SVE LSL, ASR and LSR.
For right shifts, the type suffix needs to be taken from the
element size rather than the container size.

gcc/
	* config/aarch64/aarch64-sve.md (<ASHIFT:optab><mode>3)
	(v<ASHIFT:optab><mode>3, @aarch64_pred_<optab><mode>)
	(*post_ra_v<ASHIFT:optab><mode>3): Extend from SVE_FULL_I to SVE_I.

gcc/testsuite/
	* gcc.target/aarch64/sve/shift_2.c: New test.
This commit is contained in:
Richard Sandiford 2021-01-11 18:03:20 +00:00
parent cbe9758ff4
commit b81fbfe1eb
2 changed files with 99 additions and 18 deletions

View File

@ -4500,9 +4500,9 @@
;; Unpredicated shift by a scalar, which expands into one of the vector
;; shifts below.
(define_expand "<ASHIFT:optab><mode>3"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(ASHIFT:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand")
[(set (match_operand:SVE_I 0 "register_operand")
(ASHIFT:SVE_I
(match_operand:SVE_I 1 "register_operand")
(match_operand:<VEL> 2 "general_operand")))]
"TARGET_SVE"
{
@ -4527,12 +4527,12 @@
;; Unpredicated shift by a vector.
(define_expand "v<optab><mode>3"
[(set (match_operand:SVE_FULL_I 0 "register_operand")
(unspec:SVE_FULL_I
[(set (match_operand:SVE_I 0 "register_operand")
(unspec:SVE_I
[(match_dup 3)
(ASHIFT:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand")
(match_operand:SVE_FULL_I 2 "aarch64_sve_<lr>shift_operand"))]
(ASHIFT:SVE_I
(match_operand:SVE_I 1 "register_operand")
(match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
{
@ -4545,12 +4545,12 @@
;; likely to gain much and would make the instruction seem less uniform
;; to the register allocator.
(define_insn_and_split "@aarch64_pred_<optab><mode>"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
(unspec:SVE_FULL_I
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?&w")
(unspec:SVE_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
(ASHIFT:SVE_FULL_I
(match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")
(match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
(ASHIFT:SVE_I
(match_operand:SVE_I 2 "register_operand" "w, 0, w, w")
(match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
UNSPEC_PRED_X))]
"TARGET_SVE"
"@
@ -4560,7 +4560,7 @@
movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
"&& reload_completed
&& !register_operand (operands[3], <MODE>mode)"
[(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))]
[(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
""
[(set_attr "movprfx" "*,*,*,yes")]
)
@ -4569,10 +4569,10 @@
;; These are generated by splitting a predicated instruction whose
;; predicate is unused.
(define_insn "*post_ra_v<optab><mode>3"
[(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
(ASHIFT:SVE_FULL_I
(match_operand:SVE_FULL_I 1 "register_operand" "w")
(match_operand:SVE_FULL_I 2 "aarch64_simd_<lr>shift_imm")))]
[(set (match_operand:SVE_I 0 "register_operand" "=w")
(ASHIFT:SVE_I
(match_operand:SVE_I 1 "register_operand" "w")
(match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
"TARGET_SVE && reload_completed"
"<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
)

View File

@ -0,0 +1,81 @@
/* { dg-do assemble { target aarch64_asm_sve_ok } } */
/* { dg-options "-O -msve-vector-bits=2048 -save-temps" } */
#include <stdint.h>
#define TEST_SHIFT_IMM(TYPE, NAME, OP, AMT) \
TYPE NAME##_##TYPE##_##AMT (TYPE a) { return a OP AMT; }
#define TEST_SHIFT(TYPE, NAME, OP, LIMIT) \
TYPE NAME##_##TYPE##_reg (TYPE a, TYPE b) { return a OP b; } \
TEST_SHIFT_IMM (TYPE, NAME, OP, 1) \
TEST_SHIFT_IMM (TYPE, NAME, OP, 5) \
TEST_SHIFT_IMM (TYPE, NAME, OP, LIMIT)
#define TEST_TYPE(TYPE, SIZE, LIMIT) \
typedef TYPE TYPE##SIZE __attribute__((vector_size(SIZE))); \
TEST_SHIFT (TYPE##SIZE, shl, <<, LIMIT) \
TEST_SHIFT (TYPE##SIZE, shr, >>, LIMIT) \
TEST_TYPE (int8_t, 32, 7)
TEST_TYPE (uint8_t, 32, 7)
TEST_TYPE (int8_t, 64, 7)
TEST_TYPE (uint8_t, 64, 7)
TEST_TYPE (int16_t, 64, 15)
TEST_TYPE (uint16_t, 64, 15)
TEST_TYPE (int8_t, 128, 7)
TEST_TYPE (uint8_t, 128, 7)
TEST_TYPE (int16_t, 128, 15)
TEST_TYPE (uint16_t, 128, 15)
TEST_TYPE (int32_t, 128, 31)
TEST_TYPE (uint32_t, 128, 31)
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 6 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 4 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 3 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 1 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 6 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 4 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 2 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 3 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #1\n} 3 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #1\n} 2 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #1\n} 1 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 6 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 4 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 2 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 3 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 2 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #5\n} 3 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #5\n} 2 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #5\n} 1 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 6 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 4 } } */
/* { dg-final { scan-assembler-times {\tlsl\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 2 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 3 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */
/* { dg-final { scan-assembler-times {\tlsr\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.b, z[0-9]+\.b, #7\n} 3 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.h, z[0-9]+\.h, #15\n} 2 } } */
/* { dg-final { scan-assembler-times {\tasr\tz[0-9]+\.s, z[0-9]+\.s, #31\n} 1 } } */