mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-02-24 11:09:16 +08:00
[ARM] PR68532: Fix up vuzp for big endian
gcc/ChangeLog: 2016-02-09 Charles Baylis <charles.baylis@linaro.org> PR target/68532 * config/arm/arm.c (neon_endian_lane_map): New function. (neon_vector_pair_endian_lane_map): New function. (arm_evpc_neon_vuzp): Allow for big endian lane order. * config/arm/arm_neon.h (vuzpq_s8): Adjust shuffle patterns for big endian. (vuzpq_s16): Likewise. (vuzpq_s32): Likewise. (vuzpq_f32): Likewise. (vuzpq_u8): Likewise. (vuzpq_u16): Likewise. (vuzpq_u32): Likewise. (vuzpq_p8): Likewise. (vuzpq_p16): Likewise. gcc/testsuite/ChangeLog: 2016-02-09 Charles Baylis <charles.baylis@linaro.org> PR target/68532 * gcc.c-torture/execute/pr68532.c: New test. From-SVN: r233251
This commit is contained in:
parent
b890a4410b
commit
4b79ac23c6
@ -1,3 +1,20 @@
|
||||
2016-02-09 Charles Baylis <charles.baylis@linaro.org>
|
||||
|
||||
PR target/68532
|
||||
* config/arm/arm.c (neon_endian_lane_map): New function.
|
||||
(neon_vector_pair_endian_lane_map): New function.
|
||||
(arm_evpc_neon_vuzp): Allow for big endian lane order.
|
||||
* config/arm/arm_neon.h (vuzpq_s8): Adjust shuffle patterns for big
|
||||
endian.
|
||||
(vuzpq_s16): Likewise.
|
||||
(vuzpq_s32): Likewise.
|
||||
(vuzpq_f32): Likewise.
|
||||
(vuzpq_u8): Likewise.
|
||||
(vuzpq_u16): Likewise.
|
||||
(vuzpq_u32): Likewise.
|
||||
(vuzpq_p8): Likewise.
|
||||
(vuzpq_p16): Likewise.
|
||||
|
||||
2016-02-11 Alexandre Oliva <aoliva@redhat.com>
|
||||
|
||||
PR target/69634
|
||||
|
@ -28203,6 +28203,37 @@ arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
|
||||
arm_expand_vec_perm_1 (target, op0, op1, sel);
|
||||
}
|
||||
|
||||
/* Map lane ordering between architectural lane order, and GCC lane order,
|
||||
taking into account ABI. See comment above output_move_neon for details. */
|
||||
|
||||
static int
|
||||
neon_endian_lane_map (machine_mode mode, int lane)
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
int nelems = GET_MODE_NUNITS (mode);
|
||||
/* Reverse lane order. */
|
||||
lane = (nelems - 1 - lane);
|
||||
/* Reverse D register order, to match ABI. */
|
||||
if (GET_MODE_SIZE (mode) == 16)
|
||||
lane = lane ^ (nelems / 2);
|
||||
}
|
||||
return lane;
|
||||
}
|
||||
|
||||
/* Some permutations index into pairs of vectors, this is a helper function
|
||||
to map indexes into those pairs of vectors. */
|
||||
|
||||
static int
|
||||
neon_pair_endian_lane_map (machine_mode mode, int lane)
|
||||
{
|
||||
int nelem = GET_MODE_NUNITS (mode);
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
lane =
|
||||
neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
|
||||
return lane;
|
||||
}
|
||||
|
||||
/* Generate or test for an insn that supports a constant permutation. */
|
||||
|
||||
/* Recognize patterns for the VUZP insns. */
|
||||
@ -28213,14 +28244,22 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
|
||||
unsigned int i, odd, mask, nelt = d->nelt;
|
||||
rtx out0, out1, in0, in1;
|
||||
rtx (*gen)(rtx, rtx, rtx, rtx);
|
||||
int first_elem;
|
||||
int swap_nelt;
|
||||
|
||||
if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
|
||||
return false;
|
||||
|
||||
/* Note that these are little-endian tests. Adjust for big-endian later. */
|
||||
if (d->perm[0] == 0)
|
||||
/* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
|
||||
big endian pattern on 64 bit vectors, so we correct for that. */
|
||||
swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
|
||||
&& GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
|
||||
|
||||
first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
|
||||
|
||||
if (first_elem == neon_endian_lane_map (d->vmode, 0))
|
||||
odd = 0;
|
||||
else if (d->perm[0] == 1)
|
||||
else if (first_elem == neon_endian_lane_map (d->vmode, 1))
|
||||
odd = 1;
|
||||
else
|
||||
return false;
|
||||
@ -28228,8 +28267,9 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
|
||||
|
||||
for (i = 0; i < nelt; i++)
|
||||
{
|
||||
unsigned elt = (i * 2 + odd) & mask;
|
||||
if (d->perm[i] != elt)
|
||||
unsigned elt =
|
||||
(neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
|
||||
if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -28253,11 +28293,8 @@ arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
|
||||
|
||||
in0 = d->op0;
|
||||
in1 = d->op1;
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
std::swap (in0, in1);
|
||||
odd = !odd;
|
||||
}
|
||||
if (swap_nelt != 0)
|
||||
std::swap (in0, in1);
|
||||
|
||||
out0 = d->target;
|
||||
out1 = gen_reg_rtx (d->vmode);
|
||||
|
@ -8741,9 +8741,9 @@ vuzpq_s8 (int8x16_t __a, int8x16_t __b)
|
||||
int8x16x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{ 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
|
||||
{ 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{ 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
|
||||
{ 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
|
||||
@ -8759,9 +8759,9 @@ vuzpq_s16 (int16x8_t __a, int16x8_t __b)
|
||||
int16x8x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 9, 11, 13, 15, 1, 3, 5, 7 });
|
||||
{ 5, 7, 1, 3, 13, 15, 9, 11 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 8, 10, 12, 14, 0, 2, 4, 6 });
|
||||
{ 4, 6, 0, 2, 12, 14, 8, 10 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 0, 2, 4, 6, 8, 10, 12, 14 });
|
||||
@ -8776,8 +8776,8 @@ vuzpq_s32 (int32x4_t __a, int32x4_t __b)
|
||||
{
|
||||
int32x4x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
|
||||
@ -8790,8 +8790,8 @@ vuzpq_f32 (float32x4_t __a, float32x4_t __b)
|
||||
{
|
||||
float32x4x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
|
||||
@ -8805,9 +8805,9 @@ vuzpq_u8 (uint8x16_t __a, uint8x16_t __b)
|
||||
uint8x16x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{ 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
|
||||
{ 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{ 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
|
||||
{ 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
|
||||
@ -8823,9 +8823,9 @@ vuzpq_u16 (uint16x8_t __a, uint16x8_t __b)
|
||||
uint16x8x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 9, 11, 13, 15, 1, 3, 5, 7 });
|
||||
{ 5, 7, 1, 3, 13, 15, 9, 11 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 8, 10, 12, 14, 0, 2, 4, 6 });
|
||||
{ 4, 6, 0, 2, 12, 14, 8, 10 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 0, 2, 4, 6, 8, 10, 12, 14 });
|
||||
@ -8840,8 +8840,8 @@ vuzpq_u32 (uint32x4_t __a, uint32x4_t __b)
|
||||
{
|
||||
uint32x4x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 5, 7, 1, 3 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 4, 6, 0, 2 });
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 3, 1, 7, 5 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 2, 0, 6, 4 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint32x4_t) { 0, 2, 4, 6 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint32x4_t) { 1, 3, 5, 7 });
|
||||
@ -8855,9 +8855,9 @@ vuzpq_p8 (poly8x16_t __a, poly8x16_t __b)
|
||||
poly8x16x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{ 17, 19, 21, 23, 25, 27, 29, 31, 1, 3, 5, 7, 9, 11, 13, 15 });
|
||||
{ 9, 11, 13, 15, 1, 3, 5, 7, 25, 27, 29, 31, 17, 19, 21, 23 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{ 16, 18, 20, 22, 24, 26, 28, 30, 0, 2, 4, 6, 8, 10, 12, 14 });
|
||||
{ 8, 10, 12, 14, 0, 2, 4, 6, 24, 26, 28, 30, 16, 18, 20, 22 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint8x16_t)
|
||||
{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 });
|
||||
@ -8873,9 +8873,9 @@ vuzpq_p16 (poly16x8_t __a, poly16x8_t __b)
|
||||
poly16x8x2_t __rv;
|
||||
#ifdef __ARM_BIG_ENDIAN
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 9, 11, 13, 15, 1, 3, 5, 7 });
|
||||
{ 5, 7, 1, 3, 13, 15, 9, 11 });
|
||||
__rv.val[1] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 8, 10, 12, 14, 0, 2, 4, 6 });
|
||||
{ 4, 6, 0, 2, 12, 14, 8, 10 });
|
||||
#else
|
||||
__rv.val[0] = __builtin_shuffle (__a, __b, (uint16x8_t)
|
||||
{ 0, 2, 4, 6, 8, 10, 12, 14 });
|
||||
|
@ -1,3 +1,8 @@
|
||||
2016-02-09 Charles Baylis <charles.baylis@linaro.org>
|
||||
|
||||
PR target/68532
|
||||
* gcc.c-torture/execute/pr68532.c: New test.
|
||||
|
||||
2016-02-11 Alexandre Oliva <aoliva@redhat.com>
|
||||
|
||||
PR target/69634
|
||||
|
22
gcc/testsuite/gcc.c-torture/execute/pr68532.c
Normal file
22
gcc/testsuite/gcc.c-torture/execute/pr68532.c
Normal file
@ -0,0 +1,22 @@
|
||||
/* { dg-options "-O2 -ftree-vectorize -fno-vect-cost-model" } */
|
||||
|
||||
#define SIZE 128
|
||||
unsigned short _Alignas (16) in[SIZE];
|
||||
|
||||
__attribute__ ((noinline)) int
|
||||
test (unsigned short sum, unsigned short *in, int x)
|
||||
{
|
||||
for (int j = 0; j < SIZE; j += 8)
|
||||
sum += in[j] * x;
|
||||
return sum;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
for (int i = 0; i < SIZE; i++)
|
||||
in[i] = i;
|
||||
if (test (0, in, 1) != 960)
|
||||
__builtin_abort ();
|
||||
return 0;
|
||||
}
|
Loading…
Reference in New Issue
Block a user