i386: Prefer TARGET_AVX over TARGET_SSE_TYPELESS_STORES

movaps/movups is one byte shorter than movdqa/movdqu.  But it isn't the
case for AVX nor AVX512.  This patch prefers TARGET_AVX over
TARGET_SSE_TYPELESS_STORES and adjust vmovups checks in assembly ouputs.

gcc/

	PR target/91461
	* config/i386/i386.md (*movoi_internal_avx): Remove
	TARGET_SSE_TYPELESS_STORES check.
	(*movti_internal): Prefer TARGET_AVX over
	TARGET_SSE_TYPELESS_STORES.
	(*movtf_internal): Likewise.
	* config/i386/sse.md (mov<mode>_internal): Prefer TARGET_AVX over
	TARGET_SSE_TYPELESS_STORES.  Remove "<MODE_SIZE> == 16" check
	from TARGET_SSE_TYPELESS_STORES.

gcc/testsuite/

	PR target/91461
	* gcc.target/i386/avx256-unaligned-store-2.c: Don't check
	vmovups.
	* gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
	* gcc.target/i386/pieces-memcpy-4.c: Likewise.
	* gcc.target/i386/pieces-memcpy-5.c: Likewise.
	* gcc.target/i386/pieces-memcpy-6.c: Likewise.
	* gcc.target/i386/pieces-strcpy-2.c: Likewise.
	* gcc.target/i386/pr90980-1.c: Likewise.
	* gcc.target/i386/pr87317-4.c: Check "\tvmovd\t" instead of
	"vmovd" to avoid matching "vmovdqu".
	* gcc.target/i386/pr87317-5.c: Likewise.
	* gcc.target/i386/pr87317-7.c: Likewise.
	* gcc.target/i386/pr91461-1.c: New test.
	* gcc.target/i386/pr91461-2.c: Likewise.
	* gcc.target/i386/pr91461-3.c: Likewise.
	* gcc.target/i386/pr91461-4.c: Likewise.
	* gcc.target/i386/pr91461-5.c: Likewise.
This commit is contained in:
H.J. Lu 2020-01-28 11:32:56 -08:00
parent 6c8e584430
commit dd9b529f08
19 changed files with 253 additions and 27 deletions

View File

@ -1,3 +1,15 @@
2020-01-28 H.J. Lu <hongjiu.lu@intel.com>
PR target/91461
* config/i386/i386.md (*movoi_internal_avx): Remove
TARGET_SSE_TYPELESS_STORES check.
(*movti_internal): Prefer TARGET_AVX over
TARGET_SSE_TYPELESS_STORES.
(*movtf_internal): Likewise.
* config/i386/sse.md (mov<mode>_internal): Prefer TARGET_AVX over
TARGET_SSE_TYPELESS_STORES. Remove "<MODE_SIZE> == 16" check
from TARGET_SSE_TYPELESS_STORES.
2020-01-28 David Malcolm <dmalcolm@redhat.com>
* diagnostic-core.h (warning_at): Rename overload to...

View File

@ -1980,9 +1980,7 @@
(and (eq_attr "alternative" "1")
(match_test "TARGET_AVX512VL"))
(const_string "XI")
(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(and (eq_attr "alternative" "3")
(match_test "TARGET_SSE_TYPELESS_STORES")))
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V8SF")
]
(const_string "OI")))])
@ -2059,13 +2057,13 @@
(and (eq_attr "alternative" "3")
(match_test "TARGET_AVX512VL"))
(const_string "XI")
(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(and (eq_attr "alternative" "5")
(match_test "TARGET_SSE_TYPELESS_STORES"))))
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
(match_test "optimize_function_for_size_p (cfun)")
(const_string "V4SF")
]
@ -3324,13 +3322,13 @@
(set (attr "mode")
(cond [(eq_attr "alternative" "3,4")
(const_string "DI")
(match_test "TARGET_AVX")
(const_string "TI")
(match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(const_string "V4SF")
(and (eq_attr "alternative" "2")
(match_test "TARGET_SSE_TYPELESS_STORES"))
(const_string "V4SF")
(match_test "TARGET_AVX")
(const_string "TI")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")

View File

@ -1116,13 +1116,12 @@
(cond [(and (eq_attr "alternative" "1")
(match_test "TARGET_AVX512VL"))
(const_string "<sseinsnmode>")
(and (match_test "<MODE_SIZE> == 16")
(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(and (eq_attr "alternative" "3")
(match_test "TARGET_SSE_TYPELESS_STORES"))))
(const_string "<ssePSmode>")
(match_test "TARGET_AVX")
(const_string "<sseinsnmode>")
(ior (match_test "TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL")
(and (eq_attr "alternative" "3")
(match_test "TARGET_SSE_TYPELESS_STORES")))
(const_string "<ssePSmode>")
(ior (not (match_test "TARGET_SSE2"))
(match_test "optimize_function_for_size_p (cfun)"))
(const_string "V4SF")

View File

@ -1,3 +1,24 @@
2020-01-28 H.J. Lu <hongjiu.lu@intel.com>
PR target/91461
* gcc.target/i386/avx256-unaligned-store-2.c: Don't check
vmovups.
* gcc.target/i386/avx256-unaligned-store-3.c: Likewise.
* gcc.target/i386/pieces-memcpy-4.c: Likewise.
* gcc.target/i386/pieces-memcpy-5.c: Likewise.
* gcc.target/i386/pieces-memcpy-6.c: Likewise.
* gcc.target/i386/pieces-strcpy-2.c: Likewise.
* gcc.target/i386/pr90980-1.c: Likewise.
* gcc.target/i386/pr87317-4.c: Check "\tvmovd\t" instead of
"vmovd" to avoid matching "vmovdqu".
* gcc.target/i386/pr87317-5.c: Likewise.
* gcc.target/i386/pr87317-7.c: Likewise.
* gcc.target/i386/pr91461-1.c: New test.
* gcc.target/i386/pr91461-2.c: Likewise.
* gcc.target/i386/pr91461-3.c: Likewise.
* gcc.target/i386/pr91461-4.c: Likewise.
* gcc.target/i386/pr91461-5.c: Likewise.
2020-01-28 David Malcolm <dmalcolm@redhat.com>
* gcc.dg/plugin/diagnostic_plugin_test_metadata.c: Update for

View File

@ -23,6 +23,6 @@ avx_test (void)
}
}
/* { dg-final { scan-assembler-not "vmovups.*movv32qi_internal/3" } } */
/* { dg-final { scan-assembler "vmovups.*movv16qi_internal/3" } } */
/* { dg-final { scan-assembler-not "vmovdqu.*movv32qi_internal/3" } } */
/* { dg-final { scan-assembler "vmovdqu.*movv16qi_internal/3" } } */
/* { dg-final { scan-assembler "vextract.128" } } */

View File

@ -17,6 +17,6 @@ avx_test (void)
d[i] = c[i] * 20.0;
}
/* { dg-final { scan-assembler-not "vmovups.*movv4df_internal/3" } } */
/* { dg-final { scan-assembler "vmovups.*movv2df_internal/3" } } */
/* { dg-final { scan-assembler-not "vmovupd.*movv4df_internal/3" } } */
/* { dg-final { scan-assembler "vmovupd.*movv2df_internal/3" } } */
/* { dg-final { scan-assembler "vextractf128" } } */

View File

@ -9,5 +9,4 @@ foo (void)
__builtin_memcpy (dst, src, 18);
}
/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */

View File

@ -9,5 +9,4 @@ foo (void)
__builtin_memcpy (dst, src, 19);
}
/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 1 } } */
/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 1 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */

View File

@ -9,5 +9,4 @@ foo (void)
__builtin_memcpy (dst, src, 33);
}
/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 2 } } */
/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 2 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */

View File

@ -12,4 +12,4 @@ foo (char *s)
}
/* { dg-final { scan-assembler-times "vmovdqa\[ \\t\]+\[^\n\]*%xmm" 4 } } */
/* { dg-final { scan-assembler-times "vmovups\[ \\t\]+\[^\n\]*%xmm" 4 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[ \\t\]+\[^\n\]*%xmm" 4 } } */

View File

@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */
/* { dg-final { scan-assembler-not "vmovd" } } */
/* { dg-final { scan-assembler-not "\tvmovd\t" } } */
#include <immintrin.h>

View File

@ -1,7 +1,7 @@
/* { dg-do compile } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxwq" 1 } } */
/* { dg-final { scan-assembler-not "vmovd" } } */
/* { dg-final { scan-assembler-not "\tvmovd\t" } } */
#include <immintrin.h>

View File

@ -1,7 +1,7 @@
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -march=haswell" } */
/* { dg-final { scan-assembler-times "vpmovzxbd" 1 } } */
/* { dg-final { scan-assembler-not "vmovd" } } */
/* { dg-final { scan-assembler-not "\tvmovd\t" } } */
#include <immintrin.h>

View File

@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-options "-march=skylake-avx512 -O2" } */
/* { dg-final { scan-assembler-times "(?:vmovups|vmovdqu)\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
/* { dg-final { scan-assembler-times "vmovdqu\[2346\]*\[ \\t\]+\[^\{\n\]*%xmm\[0-9\]+\[^\n\]*\\)(?:\n|\[ \\t\]+#)" 2 } } */
#include <immintrin.h>

View File

@ -0,0 +1,66 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx" } */
/* { dg-final { scan-assembler "\tvmovdqa\t" } } */
/* { dg-final { scan-assembler "\tvmovdqu\t" } } */
/* { dg-final { scan-assembler "\tvmovapd\t" } } */
/* { dg-final { scan-assembler "\tvmovupd\t" } } */
/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
/* { dg-final { scan-assembler-not "\tvmovups\t" } } */
#include <immintrin.h>
void
foo1 (__m128i *p, __m128i x)
{
*p = x;
}
void
foo2 (__m128d *p, __m128d x)
{
*p = x;
}
void
foo3 (__float128 *p, __float128 x)
{
*p = x;
}
void
foo4 (__m128i_u *p, __m128i x)
{
*p = x;
}
void
foo5 (__m128d_u *p, __m128d x)
{
*p = x;
}
typedef __float128 __float128_u __attribute__ ((__aligned__ (1)));
void
foo6 (__float128_u *p, __float128 x)
{
*p = x;
}
#ifdef __x86_64__
typedef __int128 __int128_u __attribute__ ((__aligned__ (1)));
extern __int128 int128;
void
foo7 (__int128 *p)
{
*p = int128;
}
void
foo8 (__int128_u *p)
{
*p = int128;
}
#endif

View File

@ -0,0 +1,19 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx" } */
/* { dg-final { scan-assembler "\tvmovdqa\t" } } */
/* { dg-final { scan-assembler "\tvmovapd\t" } } */
/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
#include <immintrin.h>
void
foo1 (__m256i *p, __m256i x)
{
*p = x;
}
void
foo2 (__m256d *p, __m256d x)
{
*p = x;
}

View File

@ -0,0 +1,76 @@
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -mavx512f -mavx512vl" } */
/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
/* { dg-final { scan-assembler-not "\tvmovups\t" } } */
#include <immintrin.h>
void
foo1 (__m128i *p, __m128i a)
{
register __m128i x __asm ("xmm16") = a;
asm volatile ("" : "+v" (x));
*p = x;
}
void
foo2 (__m128d *p, __m128d a)
{
register __m128d x __asm ("xmm16") = a;
asm volatile ("" : "+v" (x));
*p = x;
}
void
foo3 (__float128 *p, __float128 a)
{
register __float128 x __asm ("xmm16") = a;
asm volatile ("" : "+v" (x));
*p = x;
}
void
foo4 (__m128i_u *p, __m128i a)
{
register __m128i x __asm ("xmm16") = a;
asm volatile ("" : "+v" (x));
*p = x;
}
void
foo5 (__m128d_u *p, __m128d a)
{
register __m128d x __asm ("xmm16") = a;
asm volatile ("" : "+v" (x));
*p = x;
}
typedef __float128 __float128_u __attribute__ ((__aligned__ (1)));
void
foo6 (__float128_u *p, __float128 a)
{
register __float128 x __asm ("xmm16") = a;
asm volatile ("" : "+v" (x));
*p = x;
}
typedef __int128 __int128_u __attribute__ ((__aligned__ (1)));
extern __int128 int128;
void
foo7 (__int128 *p)
{
register __int128 x __asm ("xmm16") = int128;
asm volatile ("" : "+v" (x));
*p = x;
}
void
foo8 (__int128_u *p)
{
register __int128 x __asm ("xmm16") = int128;
asm volatile ("" : "+v" (x));
*p = x;
}

View File

@ -0,0 +1,21 @@
/* { dg-do compile { target { ! ia32 } } } */
/* { dg-options "-O2 -mavx512f -mavx512vl" } */
/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
#include <immintrin.h>
void
foo1 (__m256i *p, __m256i a)
{
register __m256i x __asm ("xmm16") = a;
asm volatile ("" : "+v" (x));
*p = x;
}
void
foo2 (__m256d *p, __m256d a)
{
register __m256d x __asm ("xmm16") = a;
asm volatile ("" : "+v" (x));
*p = x;
}

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mavx512f" } */
/* { dg-final { scan-assembler-not "\tvmovaps\t" } } */
#include <immintrin.h>
void
foo1 (__m512i *p, __m512i x)
{
*p = x;
}
void
foo2 (__m512d *p, __m512d x)
{
*p = x;
}