mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-25 07:40:29 +08:00
i386.c (expand_vec_perm_movs): New method matching movs patterns.
* config/i386/i386.c (expand_vec_perm_movs): New method matching movs patterns. (expand_vec_perm_1): Try the new method. * gcc.target/i386/sse2-movs.c: New test. From-SVN: r263549
This commit is contained in:
parent
4720f4afba
commit
107192f73e
gcc
@ -1,3 +1,9 @@
|
||||
2018-08-14 Allan Sandfeld Jensen <allan.jensen@qt.io>
|
||||
|
||||
* config/i386/i386.c (expand_vec_perm_movs): New method matching movs
|
||||
patterns.
|
||||
(expand_vec_perm_1): Try the new method.
|
||||
|
||||
2018-08-14 Ilya Leoshkevich <iii@linux.ibm.com>
|
||||
|
||||
PR target/86547
|
||||
|
@ -113,7 +113,7 @@ _mm_setzero_pd (void)
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_move_sd (__m128d __A, __m128d __B)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_movsd ((__v2df)__A, (__v2df)__B);
|
||||
return __extension__ (__m128d) __builtin_shuffle((__v2df)__A, (__v2df)__B, (__v2di){2, 1});
|
||||
}
|
||||
|
||||
/* Load two DPFP values from P. The address must be 16-byte aligned. */
|
||||
|
@ -46145,6 +46145,43 @@ expand_vselect_vconcat (rtx target, rtx op0, rtx op1,
|
||||
return ok;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
||||
using movss or movsd. */
|
||||
static bool
|
||||
expand_vec_perm_movs (struct expand_vec_perm_d *d)
|
||||
{
|
||||
machine_mode vmode = d->vmode;
|
||||
unsigned i, nelt = d->nelt;
|
||||
rtx x;
|
||||
|
||||
if (d->one_operand_p)
|
||||
return false;
|
||||
|
||||
if (TARGET_SSE2 && (vmode == V2DFmode || vmode == V4SFmode))
|
||||
;
|
||||
else
|
||||
return false;
|
||||
|
||||
/* Only the first element is changed. */
|
||||
if (d->perm[0] != nelt && d->perm[0] != 0)
|
||||
return false;
|
||||
for (i = 1; i < nelt; ++i)
|
||||
if (d->perm[i] != i + nelt - d->perm[0])
|
||||
return false;
|
||||
|
||||
if (d->testing_p)
|
||||
return true;
|
||||
|
||||
if (d->perm[0] == nelt)
|
||||
x = gen_rtx_VEC_MERGE (vmode, d->op1, d->op0, GEN_INT (1));
|
||||
else
|
||||
x = gen_rtx_VEC_MERGE (vmode, d->op0, d->op1, GEN_INT (1));
|
||||
|
||||
emit_insn (gen_rtx_SET (d->target, x));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* A subroutine of ix86_expand_vec_perm_builtin_1. Try to implement D
|
||||
in terms of blendp[sd] / pblendw / pblendvb / vpblendd. */
|
||||
|
||||
@ -46887,6 +46924,10 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
|
||||
}
|
||||
}
|
||||
|
||||
/* Try movss/movsd instructions. */
|
||||
if (expand_vec_perm_movs (d))
|
||||
return true;
|
||||
|
||||
/* Finally, try the fully general two operand permute. */
|
||||
if (expand_vselect_vconcat (d->target, d->op0, d->op1, d->perm, nelt,
|
||||
d->testing_p))
|
||||
|
@ -1011,7 +1011,10 @@ _mm_storer_ps (float *__P, __m128 __A)
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_move_ss (__m128 __A, __m128 __B)
|
||||
{
|
||||
return (__m128) __builtin_ia32_movss ((__v4sf)__A, (__v4sf)__B);
|
||||
return (__m128) __builtin_shuffle ((__v4sf)__A, (__v4sf)__B,
|
||||
__extension__
|
||||
(__attribute__((__vector_size__ (16))) int)
|
||||
{4,1,2,3});
|
||||
}
|
||||
|
||||
/* Extracts one of the four words of A. The selector N must be immediate. */
|
||||
|
@ -1,3 +1,7 @@
|
||||
2018-08-14 Allan Sandfeld Jensen <allan.jensen@qt.io>
|
||||
|
||||
* gcc.target/i386/sse2-movs.c: New test.
|
||||
|
||||
2018-08-14 Martin Sebor <msebor@redhat.com>
|
||||
|
||||
PR tree-optimization/86650
|
||||
|
21
gcc/testsuite/gcc.target/i386/sse2-movs.c
Normal file
21
gcc/testsuite/gcc.target/i386/sse2-movs.c
Normal file
@ -0,0 +1,21 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -msse2" } */
|
||||
/* { dg-require-effective-target sse2 } */
|
||||
/* { dg-final { scan-assembler "movss" } } */
|
||||
/* { dg-final { scan-assembler "movsd" } } */
|
||||
/* { dg-final { scan-assembler-not "unpcklps" } } */
|
||||
/* { dg-final { scan-assembler-not "shufps" } } */
|
||||
/* { dg-final { scan-assembler-not "shufpd" } } */
|
||||
|
||||
typedef float v4sf __attribute__ ((vector_size (16)));
|
||||
typedef double v2df __attribute__ ((vector_size (16)));
|
||||
|
||||
v4sf movss(v4sf a, v4sf b)
|
||||
{
|
||||
return (v4sf){b[0],a[1],a[2],a[3]};
|
||||
}
|
||||
|
||||
v2df movsd(v2df a, v2df b)
|
||||
{
|
||||
return (v2df){b[0],a[1]};
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user