mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-05 08:20:29 +08:00
arm.c (arm_evpc_neon_vext): New function.
2012-09-04 Christophe Lyon <christophe.lyon@linaro.org> gcc/ * config/arm/arm.c (arm_evpc_neon_vext): New function. (arm_expand_vec_perm_const_1): Add call to arm_evpc_neon_vext. gcc/testsuite/ * gcc.target/arm/neon-vext.c: New test. * gcc.target/arm/neon-vext-execute.c: Ditto. From-SVN: r190911
This commit is contained in:
parent
ee3bea0b28
commit
434641a57b
@ -1,3 +1,10 @@
|
||||
2012-09-04 Christophe Lyon <christophe.lyon@linaro.org>
|
||||
|
||||
* config/arm/arm.c (arm_evpc_neon_vext): New
|
||||
function.
|
||||
(arm_expand_vec_perm_const_1): Add call to
|
||||
arm_evpc_neon_vext.
|
||||
|
||||
2012-09-04 Oleg Endo <olegendo@gcc.gnu.org>
|
||||
|
||||
PR target/51244
|
||||
|
@ -25937,6 +25937,72 @@ arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Recognize patterns for the VEXT insns. */
|
||||
|
||||
static bool
|
||||
arm_evpc_neon_vext (struct expand_vec_perm_d *d)
|
||||
{
|
||||
unsigned int i, nelt = d->nelt;
|
||||
rtx (*gen) (rtx, rtx, rtx, rtx);
|
||||
rtx offset;
|
||||
|
||||
unsigned int location;
|
||||
|
||||
unsigned int next = d->perm[0] + 1;
|
||||
|
||||
/* TODO: Handle GCC's numbering of elements for big-endian. */
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
return false;
|
||||
|
||||
/* Check if the extracted indexes are increasing by one. */
|
||||
for (i = 1; i < nelt; next++, i++)
|
||||
{
|
||||
/* If we hit the most significant element of the 2nd vector in
|
||||
the previous iteration, no need to test further. */
|
||||
if (next == 2 * nelt)
|
||||
return false;
|
||||
|
||||
/* If we are operating on only one vector: it could be a
|
||||
rotation. If there are only two elements of size < 64, let
|
||||
arm_evpc_neon_vrev catch it. */
|
||||
if (d->one_vector_p && (next == nelt))
|
||||
{
|
||||
if ((nelt == 2) && (d->vmode != V2DImode))
|
||||
return false;
|
||||
else
|
||||
next = 0;
|
||||
}
|
||||
|
||||
if (d->perm[i] != next)
|
||||
return false;
|
||||
}
|
||||
|
||||
location = d->perm[0];
|
||||
|
||||
switch (d->vmode)
|
||||
{
|
||||
case V16QImode: gen = gen_neon_vextv16qi; break;
|
||||
case V8QImode: gen = gen_neon_vextv8qi; break;
|
||||
case V4HImode: gen = gen_neon_vextv4hi; break;
|
||||
case V8HImode: gen = gen_neon_vextv8hi; break;
|
||||
case V2SImode: gen = gen_neon_vextv2si; break;
|
||||
case V4SImode: gen = gen_neon_vextv4si; break;
|
||||
case V2SFmode: gen = gen_neon_vextv2sf; break;
|
||||
case V4SFmode: gen = gen_neon_vextv4sf; break;
|
||||
case V2DImode: gen = gen_neon_vextv2di; break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Success! */
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
offset = GEN_INT (location);
|
||||
emit_insn (gen (d->target, d->op0, d->op1, offset));
|
||||
return true;
|
||||
}
|
||||
|
||||
/* The NEON VTBL instruction is a fully variable permuation that's even
|
||||
stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
|
||||
is mask the index operand as VEC_PERM_EXPR requires. Therefore we
|
||||
@ -25976,6 +26042,12 @@ arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
|
||||
static bool
|
||||
arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
|
||||
{
|
||||
/* Check if the input mask matches vext before reordering the
|
||||
operands. */
|
||||
if (TARGET_NEON)
|
||||
if (arm_evpc_neon_vext (d))
|
||||
return true;
|
||||
|
||||
/* The pattern matching functions above are written to look for a small
|
||||
number to begin the sequence (0, 1, N/2). If we begin with an index
|
||||
from the second operand, we can swap the operands. */
|
||||
|
@ -1,3 +1,8 @@
|
||||
2012-09-04 Christophe Lyon <christophe.lyon@linaro.org>
|
||||
|
||||
* gcc.target/arm/neon-vext.c: New test.
|
||||
* gcc.target/arm/neon-vext-execute.c: Ditto.
|
||||
|
||||
2012-09-04 Janus Weil <janus@gcc.gnu.org>
|
||||
|
||||
PR fortran/54243
|
||||
|
340
gcc/testsuite/gcc.target/arm/neon-vext-execute.c
Normal file
340
gcc/testsuite/gcc.target/arm/neon-vext-execute.c
Normal file
@ -0,0 +1,340 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target arm_neon_ok } */
|
||||
/* { dg-require-effective-target arm_little_endian } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
|
||||
uint8x8_t
|
||||
tst_vext_u8 (uint8x8_t __a, uint8x8_t __b)
|
||||
{
|
||||
uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
|
||||
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint8x8_t
|
||||
tst_vext_u8_rotate (uint8x8_t __a)
|
||||
{
|
||||
uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint16x4_t
|
||||
tst_vext_u16 (uint16x4_t __a, uint16x4_t __b)
|
||||
{
|
||||
uint16x4_t __mask1 = {2, 3, 4, 5};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint16x4_t
|
||||
tst_vext_u16_rotate (uint16x4_t __a)
|
||||
{
|
||||
uint16x4_t __mask1 = {2, 3, 0, 1};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint32x2_t
|
||||
tst_vext_u32 (uint32x2_t __a, uint32x2_t __b)
|
||||
{
|
||||
uint32x2_t __mask1 = {1, 2};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
/* This one is mapped into vrev64.32. */
|
||||
uint32x2_t
|
||||
tst_vext_u32_rotate (uint32x2_t __a)
|
||||
{
|
||||
uint32x2_t __mask1 = {1, 0};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint8x16_t
|
||||
tst_vextq_u8 (uint8x16_t __a, uint8x16_t __b)
|
||||
{
|
||||
uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 16, 17, 18, 19};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint8x16_t
|
||||
tst_vextq_u8_rotate (uint8x16_t __a)
|
||||
{
|
||||
uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 0, 1, 2, 3};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint16x8_t
|
||||
tst_vextq_u16 (uint16x8_t __a, uint16x8_t __b)
|
||||
{
|
||||
uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint16x8_t
|
||||
tst_vextq_u16_rotate (uint16x8_t __a)
|
||||
{
|
||||
uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint32x4_t
|
||||
tst_vextq_u32 (uint32x4_t __a, uint32x4_t __b)
|
||||
{
|
||||
uint32x4_t __mask1 = {1, 2, 3, 4};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint32x4_t
|
||||
tst_vextq_u32_rotate (uint32x4_t __a)
|
||||
{
|
||||
uint32x4_t __mask1 = {1, 2, 3, 0};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint64x2_t
|
||||
tst_vextq_u64 (uint64x2_t __a, uint64x2_t __b)
|
||||
{
|
||||
uint64x2_t __mask1 = {1, 2};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint64x2_t
|
||||
tst_vextq_u64_rotate (uint64x2_t __a)
|
||||
{
|
||||
uint64x2_t __mask1 = {1, 0};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
uint8_t arr_u8x8[] = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
uint8_t arr2_u8x8[] = {8, 9, 10, 11, 12, 13, 14, 15};
|
||||
uint16_t arr_u16x4[] = {0, 1, 2, 3};
|
||||
uint16_t arr2_u16x4[] = {4, 5, 6, 7};
|
||||
uint32_t arr_u32x2[] = {0, 1};
|
||||
uint32_t arr2_u32x2[] = {2, 3};
|
||||
uint8_t arr_u8x16[] = {0, 1, 2, 3, 4, 5, 6, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15};
|
||||
uint8_t arr2_u8x16[] = {16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24, 25, 26, 27, 28, 29, 30, 31};
|
||||
uint16_t arr_u16x8[] = {0, 1, 2, 3, 4, 5, 6, 7};
|
||||
uint16_t arr2_u16x8[] = {8, 9, 10, 11, 12, 13, 14, 15};
|
||||
uint32_t arr_u32x4[] = {0, 1, 2, 3};
|
||||
uint32_t arr2_u32x4[] = {4, 5, 6, 7};
|
||||
uint64_t arr_u64x2[] = {0, 1};
|
||||
uint64_t arr2_u64x2[] = {2, 3};
|
||||
|
||||
uint8_t expected_u8x8[] = {2, 3, 4, 5, 6, 7, 8, 9};
|
||||
uint8_t expected_rot_u8x8[] = {2, 3, 4, 5, 6, 7, 0, 1};
|
||||
uint16_t expected_u16x4[] = {2, 3, 4, 5};
|
||||
uint16_t expected_rot_u16x4[] = {2, 3, 0, 1};
|
||||
uint32_t expected_u32x2[] = {1, 2};
|
||||
uint32_t expected_rot_u32x2[] = {1, 0};
|
||||
uint8_t expected_u8x16[] = {4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 16, 17, 18, 19};
|
||||
uint8_t expected_rot_u8x16[] = {4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 0, 1, 2, 3,};
|
||||
uint16_t expected_u16x8[] = {2, 3, 4, 5, 6, 7, 8, 9};
|
||||
uint16_t expected_rot_u16x8[] = {2, 3, 4, 5, 6, 7, 0, 1};
|
||||
uint32_t expected_u32x4[] = {1, 2, 3, 4};
|
||||
uint32_t expected_rot_u32x4[] = {1, 2, 3, 0};
|
||||
uint64_t expected_u64x2[] = {1, 2};
|
||||
uint64_t expected_rot_u64x2[] = {1, 0};
|
||||
|
||||
uint8x8_t vec_u8x8 = vld1_u8 (arr_u8x8);
|
||||
uint8x8_t vec2_u8x8 = vld1_u8 (arr2_u8x8);
|
||||
uint16x4_t vec_u16x4 = vld1_u16 (arr_u16x4);
|
||||
uint16x4_t vec2_u16x4 = vld1_u16 (arr2_u16x4);
|
||||
uint32x2_t vec_u32x2 = vld1_u32 (arr_u32x2);
|
||||
uint32x2_t vec2_u32x2 = vld1_u32 (arr2_u32x2);
|
||||
uint8x16_t vec_u8x16 = vld1q_u8 (arr_u8x16);
|
||||
uint8x16_t vec2_u8x16 = vld1q_u8 (arr2_u8x16);
|
||||
uint16x8_t vec_u16x8 = vld1q_u16 (arr_u16x8);
|
||||
uint16x8_t vec2_u16x8 = vld1q_u16 (arr2_u16x8);
|
||||
uint32x4_t vec_u32x4 = vld1q_u32 (arr_u32x4);
|
||||
uint32x4_t vec2_u32x4 = vld1q_u32 (arr2_u32x4);
|
||||
uint64x2_t vec_u64x2 = vld1q_u64 (arr_u64x2);
|
||||
uint64x2_t vec2_u64x2 = vld1q_u64 (arr2_u64x2);
|
||||
|
||||
uint8x8_t result_u8x8;
|
||||
uint16x4_t result_u16x4;
|
||||
uint32x2_t result_u32x2;
|
||||
uint8x16_t result_u8x16;
|
||||
uint16x8_t result_u16x8;
|
||||
uint32x4_t result_u32x4;
|
||||
uint64x2_t result_u64x2;
|
||||
|
||||
union {uint8x8_t v; uint8_t buf[8];} mem_u8x8;
|
||||
union {uint16x4_t v; uint16_t buf[4];} mem_u16x4;
|
||||
union {uint32x2_t v; uint32_t buf[2];} mem_u32x2;
|
||||
union {uint8x16_t v; uint8_t buf[16];} mem_u8x16;
|
||||
union {uint16x8_t v; uint16_t buf[8];} mem_u16x8;
|
||||
union {uint32x4_t v; uint32_t buf[4];} mem_u32x4;
|
||||
union {uint64x2_t v; uint64_t buf[2];} mem_u64x2;
|
||||
|
||||
int i;
|
||||
|
||||
result_u8x8 = tst_vext_u8 (vec_u8x8, vec2_u8x8);
|
||||
vst1_u8 (mem_u8x8.buf, result_u8x8);
|
||||
|
||||
for (i=0; i<8; i++)
|
||||
if (mem_u8x8.buf[i] != expected_u8x8[i])
|
||||
{
|
||||
printf ("tst_vext_u8[%d]=%d expected %d\n",
|
||||
i, mem_u8x8.buf[i], expected_u8x8[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
result_u8x8 = tst_vext_u8_rotate (vec_u8x8);
|
||||
vst1_u8 (mem_u8x8.buf, result_u8x8);
|
||||
|
||||
for (i=0; i<8; i++)
|
||||
if (mem_u8x8.buf[i] != expected_rot_u8x8[i])
|
||||
{
|
||||
printf ("tst_vext_u8_rotate[%d]=%d expected %d\n",
|
||||
i, mem_u8x8.buf[i], expected_rot_u8x8[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
|
||||
result_u16x4 = tst_vext_u16 (vec_u16x4, vec2_u16x4);
|
||||
vst1_u16 (mem_u16x4.buf, result_u16x4);
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
if (mem_u16x4.buf[i] != expected_u16x4[i])
|
||||
{
|
||||
printf ("tst_vext_u16[%d]=%d expected %d\n",
|
||||
i, mem_u16x4.buf[i], expected_u16x4[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
result_u16x4 = tst_vext_u16_rotate (vec_u16x4);
|
||||
vst1_u16 (mem_u16x4.buf, result_u16x4);
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
if (mem_u16x4.buf[i] != expected_rot_u16x4[i])
|
||||
{
|
||||
printf ("tst_vext_u16_rotate[%d]=%d expected %d\n",
|
||||
i, mem_u16x4.buf[i], expected_rot_u16x4[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
|
||||
result_u32x2 = tst_vext_u32 (vec_u32x2, vec2_u32x2);
|
||||
vst1_u32 (mem_u32x2.buf, result_u32x2);
|
||||
|
||||
for (i=0; i<2; i++)
|
||||
if (mem_u32x2.buf[i] != expected_u32x2[i])
|
||||
{
|
||||
printf ("tst_vext_u32[%d]=%d expected %d\n",
|
||||
i, mem_u32x2.buf[i], expected_u32x2[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
result_u32x2 = tst_vext_u32_rotate (vec_u32x2);
|
||||
vst1_u32 (mem_u32x2.buf, result_u32x2);
|
||||
|
||||
for (i=0; i<2; i++)
|
||||
if (mem_u32x2.buf[i] != expected_rot_u32x2[i])
|
||||
{
|
||||
printf ("tst_vext_u32_rotate[%d]=%d expected %d\n",
|
||||
i, mem_u32x2.buf[i], expected_rot_u32x2[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
|
||||
result_u8x16 = tst_vextq_u8 (vec_u8x16, vec2_u8x16);
|
||||
vst1q_u8 (mem_u8x16.buf, result_u8x16);
|
||||
|
||||
for (i=0; i<16; i++)
|
||||
if (mem_u8x16.buf[i] != expected_u8x16[i])
|
||||
{
|
||||
printf ("tst_vextq_u8[%d]=%d expected %d\n",
|
||||
i, mem_u8x16.buf[i], expected_u8x16[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
result_u8x16 = tst_vextq_u8_rotate (vec_u8x16);
|
||||
vst1q_u8 (mem_u8x16.buf, result_u8x16);
|
||||
|
||||
for (i=0; i<16; i++)
|
||||
if (mem_u8x16.buf[i] != expected_rot_u8x16[i])
|
||||
{
|
||||
printf ("tst_vextq_u8_rotate[%d]=%d expected %d\n",
|
||||
i, mem_u8x16.buf[i], expected_rot_u8x16[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
result_u16x8 = tst_vextq_u16 (vec_u16x8, vec2_u16x8);
|
||||
vst1q_u16 (mem_u16x8.buf, result_u16x8);
|
||||
|
||||
for (i=0; i<8; i++)
|
||||
if (mem_u16x8.buf[i] != expected_u16x8[i])
|
||||
{
|
||||
printf ("tst_vextq_u16[%d]=%d expected %d\n",
|
||||
i, mem_u16x8.buf[i], expected_u16x8[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
result_u16x8 = tst_vextq_u16_rotate (vec_u16x8);
|
||||
vst1q_u16 (mem_u16x8.buf, result_u16x8);
|
||||
|
||||
for (i=0; i<8; i++)
|
||||
if (mem_u16x8.buf[i] != expected_rot_u16x8[i])
|
||||
{
|
||||
printf ("tst_vextq_u16_rotate[%d]=%d expected %d\n",
|
||||
i, mem_u16x8.buf[i], expected_rot_u16x8[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
result_u32x4 = tst_vextq_u32 (vec_u32x4, vec2_u32x4);
|
||||
vst1q_u32 (mem_u32x4.buf, result_u32x4);
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
if (mem_u32x4.buf[i] != expected_u32x4[i])
|
||||
{
|
||||
printf ("tst_vextq_u32[%d]=%d expected %d\n",
|
||||
i, mem_u32x4.buf[i], expected_u32x4[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
result_u32x4 = tst_vextq_u32_rotate (vec_u32x4);
|
||||
vst1q_u32 (mem_u32x4.buf, result_u32x4);
|
||||
|
||||
for (i=0; i<4; i++)
|
||||
if (mem_u32x4.buf[i] != expected_rot_u32x4[i])
|
||||
{
|
||||
printf ("tst_vextq_u32_rotate[%d]=%d expected %d\n",
|
||||
i, mem_u32x4.buf[i], expected_rot_u32x4[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
result_u64x2 = tst_vextq_u64 (vec_u64x2, vec2_u64x2);
|
||||
vst1q_u64 (mem_u64x2.buf, result_u64x2);
|
||||
|
||||
for (i=0; i<2; i++)
|
||||
if (mem_u64x2.buf[i] != expected_u64x2[i])
|
||||
{
|
||||
printf ("tst_vextq_u64[%d]=%lld expected %lld\n",
|
||||
i, mem_u64x2.buf[i], expected_u64x2[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
result_u64x2 = tst_vextq_u64_rotate (vec_u64x2);
|
||||
vst1q_u64 (mem_u64x2.buf, result_u64x2);
|
||||
|
||||
for (i=0; i<2; i++)
|
||||
if (mem_u64x2.buf[i] != expected_rot_u64x2[i])
|
||||
{
|
||||
printf ("tst_vextq_u64_rotate[%d]=%lld expected %lld\n",
|
||||
i, mem_u64x2.buf[i], expected_rot_u64x2[i]);
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
115
gcc/testsuite/gcc.target/arm/neon-vext.c
Normal file
115
gcc/testsuite/gcc.target/arm/neon-vext.c
Normal file
@ -0,0 +1,115 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target arm_neon_ok } */
|
||||
/* { dg-require-effective-target arm_little_endian } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-add-options arm_neon } */
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
uint8x8_t
|
||||
tst_vext_u8 (uint8x8_t __a, uint8x8_t __b)
|
||||
{
|
||||
uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
|
||||
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint8x8_t
|
||||
tst_vext_u8_rotate (uint8x8_t __a)
|
||||
{
|
||||
uint8x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint16x4_t
|
||||
tst_vext_u16 (uint16x4_t __a, uint16x4_t __b)
|
||||
{
|
||||
uint16x4_t __mask1 = {2, 3, 4, 5};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint16x4_t
|
||||
tst_vext_u16_rotate (uint16x4_t __a)
|
||||
{
|
||||
uint16x4_t __mask1 = {2, 3, 0, 1};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint32x2_t
|
||||
tst_vext_u32 (uint32x2_t __a, uint32x2_t __b)
|
||||
{
|
||||
uint32x2_t __mask1 = {1, 2};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
/* This one is mapped into vrev64.32. */
|
||||
uint32x2_t
|
||||
tst_vext_u32_rotate (uint32x2_t __a)
|
||||
{
|
||||
uint32x2_t __mask1 = {1, 0};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint8x16_t
|
||||
tst_vextq_u8 (uint8x16_t __a, uint8x16_t __b)
|
||||
{
|
||||
uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 16, 17, 18, 19};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint8x16_t
|
||||
tst_vextq_u8_rotate (uint8x16_t __a)
|
||||
{
|
||||
uint8x16_t __mask1 = {4, 5, 6, 7, 8, 9, 10, 11,
|
||||
12, 13, 14, 15, 0, 1, 2, 3};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint16x8_t
|
||||
tst_vextq_u16 (uint16x8_t __a, uint16x8_t __b)
|
||||
{
|
||||
uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 8, 9};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint16x8_t
|
||||
tst_vextq_u16_rotate (uint16x8_t __a)
|
||||
{
|
||||
uint16x8_t __mask1 = {2, 3, 4, 5, 6, 7, 0, 1};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint32x4_t
|
||||
tst_vextq_u32 (uint32x4_t __a, uint32x4_t __b)
|
||||
{
|
||||
uint32x4_t __mask1 = {1, 2, 3, 4};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint32x4_t
|
||||
tst_vextq_u32_rotate (uint32x4_t __a)
|
||||
{
|
||||
uint32x4_t __mask1 = {1, 2, 3, 0};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
uint64x2_t
|
||||
tst_vextq_u64 (uint64x2_t __a, uint64x2_t __b)
|
||||
{
|
||||
uint64x2_t __mask1 = {1, 2};
|
||||
return __builtin_shuffle ( __a, __b, __mask1) ;
|
||||
}
|
||||
|
||||
uint64x2_t
|
||||
tst_vextq_u64_rotate (uint64x2_t __a)
|
||||
{
|
||||
uint64x2_t __mask1 = {1, 0};
|
||||
return __builtin_shuffle ( __a, __mask1) ;
|
||||
}
|
||||
|
||||
/* { dg-final {scan-assembler-times "vext\.8\\t" 4} } */
|
||||
/* { dg-final {scan-assembler-times "vext\.16\\t" 4} } */
|
||||
/* { dg-final {scan-assembler-times "vext\.32\\t" 3} } */
|
||||
/* { dg-final {scan-assembler-times "vrev64\.32\\t" 1} } */
|
||||
/* { dg-final {scan-assembler-times "vext\.64\\t" 2} } */
|
Loading…
x
Reference in New Issue
Block a user