mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-04 23:51:47 +08:00
i386: Optimize psubusw compared to 0 into pminuw compared to op0 [PR96906]
The following patch renames VI12_AVX2 iterator to VI12_AVX2_AVX512BW for consistency with some other iterators, as I need VI12_AVX2 without AVX512BW for this change. The real meat is a combiner split which combine can use to optimize psubusw compared to 0 into pminuw compared to op0 (and similarly for psubusb compared to 0 into pminub compared to op0). According to Agner Fog's tables, psubus[bw] and pminu[bw] timings are the same, but the advantage of pminu[bw] is that the comparison doesn't need a zero operand, so e.g. for -msse4.1 it causes changes like - psubusw %xmm1, %xmm0 - pxor %xmm1, %xmm1 + pminuw %xmm0, %xmm1 pcmpeqw %xmm1, %xmm0 and similarly for avx2: - vpsubusb %ymm1, %ymm0, %ymm0 - vpxor %xmm1, %xmm1, %xmm1 - vpcmpeqb %ymm1, %ymm0, %ymm0 + vpminub %ymm1, %ymm0, %ymm1 + vpcmpeqb %ymm0, %ymm1, %ymm0 I haven't done the AVX512{BW,VL} define_split(s), they'll need to match the UNSPEC_PCMP which are used for avx512 comparisons. 2020-11-26 Jakub Jelinek <jakub@redhat.com> PR target/96906 * config/i386/sse.md (VI12_AVX2): Remove V64QI/V32HI modes. (VI12_AVX2_AVX512BW): New mode iterator. (<sse2_avx2>_<plusminus_insn><mode>3<mask_name>, uavg<mode>3_ceil, <sse2_avx2>_uavg<mode>3<mask_name>): Use VI12_AVX2_AVX512BW iterator instead of VI12_AVX2. (*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>): Likewise. (*<sse2_avx2>_uavg<mode>3<mask_name>): Likewise. (*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>): Add a new define_split after this insn. * gcc.target/i386/pr96906-1.c: New test.
This commit is contained in:
parent
768ce4f0ce
commit
32b0abb24b
@ -466,6 +466,10 @@
|
||||
[(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") TI])
|
||||
|
||||
(define_mode_iterator VI12_AVX2
|
||||
[(V32QI "TARGET_AVX2") V16QI
|
||||
(V16HI "TARGET_AVX2") V8HI])
|
||||
|
||||
(define_mode_iterator VI12_AVX2_AVX512BW
|
||||
[(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
|
||||
(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
|
||||
|
||||
@ -11395,18 +11399,18 @@
|
||||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_expand "<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
|
||||
[(set (match_operand:VI12_AVX2 0 "register_operand")
|
||||
(sat_plusminus:VI12_AVX2
|
||||
(match_operand:VI12_AVX2 1 "vector_operand")
|
||||
(match_operand:VI12_AVX2 2 "vector_operand")))]
|
||||
[(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
|
||||
(sat_plusminus:VI12_AVX2_AVX512BW
|
||||
(match_operand:VI12_AVX2_AVX512BW 1 "vector_operand")
|
||||
(match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))]
|
||||
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
|
||||
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
|
||||
|
||||
(define_insn "*<sse2_avx2>_<plusminus_insn><mode>3<mask_name>"
|
||||
[(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
|
||||
(sat_plusminus:VI12_AVX2
|
||||
(match_operand:VI12_AVX2 1 "vector_operand" "<comm>0,v")
|
||||
(match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))]
|
||||
[(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
|
||||
(sat_plusminus:VI12_AVX2_AVX512BW
|
||||
(match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "<comm>0,v")
|
||||
(match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))]
|
||||
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
|
||||
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
|
||||
"@
|
||||
@ -11418,6 +11422,23 @@
|
||||
(set_attr "prefix" "orig,maybe_evex")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
;; PR96906 - optimize psubusw compared to 0 into pminuw compared to op0.
|
||||
(define_split
|
||||
[(set (match_operand:VI12_AVX2 0 "register_operand")
|
||||
(eq:VI12_AVX2
|
||||
(us_minus:VI12_AVX2
|
||||
(match_operand:VI12_AVX2 1 "vector_operand")
|
||||
(match_operand:VI12_AVX2 2 "vector_operand"))
|
||||
(match_operand:VI12_AVX2 3 "const0_operand")))]
|
||||
"TARGET_SSE2
|
||||
&& (<MODE>mode != V8HImode || TARGET_SSE4_1)
|
||||
&& ix86_binary_operator_ok (US_MINUS, <MODE>mode, operands)"
|
||||
[(set (match_dup 4)
|
||||
(umin:VI12_AVX2 (match_dup 1) (match_dup 2)))
|
||||
(set (match_dup 0)
|
||||
(eq:VI12_AVX2 (match_dup 4) (match_dup 1)))]
|
||||
"operands[4] = gen_reg_rtx (<MODE>mode);")
|
||||
|
||||
(define_expand "mulv8qi3"
|
||||
[(set (match_operand:V8QI 0 "register_operand")
|
||||
(mult:V8QI (match_operand:V8QI 1 "register_operand")
|
||||
@ -12022,15 +12043,15 @@
|
||||
})
|
||||
|
||||
(define_expand "uavg<mode>3_ceil"
|
||||
[(set (match_operand:VI12_AVX2 0 "register_operand")
|
||||
(truncate:VI12_AVX2
|
||||
[(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
|
||||
(truncate:VI12_AVX2_AVX512BW
|
||||
(lshiftrt:<ssedoublemode>
|
||||
(plus:<ssedoublemode>
|
||||
(plus:<ssedoublemode>
|
||||
(zero_extend:<ssedoublemode>
|
||||
(match_operand:VI12_AVX2 1 "vector_operand"))
|
||||
(match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
|
||||
(zero_extend:<ssedoublemode>
|
||||
(match_operand:VI12_AVX2 2 "vector_operand")))
|
||||
(match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
|
||||
(match_dup 3))
|
||||
(const_int 1))))]
|
||||
"TARGET_SSE2"
|
||||
@ -15744,15 +15765,15 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
(define_expand "<sse2_avx2>_uavg<mode>3<mask_name>"
|
||||
[(set (match_operand:VI12_AVX2 0 "register_operand")
|
||||
(truncate:VI12_AVX2
|
||||
[(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand")
|
||||
(truncate:VI12_AVX2_AVX512BW
|
||||
(lshiftrt:<ssedoublemode>
|
||||
(plus:<ssedoublemode>
|
||||
(plus:<ssedoublemode>
|
||||
(zero_extend:<ssedoublemode>
|
||||
(match_operand:VI12_AVX2 1 "vector_operand"))
|
||||
(match_operand:VI12_AVX2_AVX512BW 1 "vector_operand"))
|
||||
(zero_extend:<ssedoublemode>
|
||||
(match_operand:VI12_AVX2 2 "vector_operand")))
|
||||
(match_operand:VI12_AVX2_AVX512BW 2 "vector_operand")))
|
||||
(match_dup <mask_expand_op3>))
|
||||
(const_int 1))))]
|
||||
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
|
||||
@ -15762,15 +15783,15 @@
|
||||
})
|
||||
|
||||
(define_insn "*<sse2_avx2>_uavg<mode>3<mask_name>"
|
||||
[(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
|
||||
(truncate:VI12_AVX2
|
||||
[(set (match_operand:VI12_AVX2_AVX512BW 0 "register_operand" "=x,v")
|
||||
(truncate:VI12_AVX2_AVX512BW
|
||||
(lshiftrt:<ssedoublemode>
|
||||
(plus:<ssedoublemode>
|
||||
(plus:<ssedoublemode>
|
||||
(zero_extend:<ssedoublemode>
|
||||
(match_operand:VI12_AVX2 1 "vector_operand" "%0,v"))
|
||||
(match_operand:VI12_AVX2_AVX512BW 1 "vector_operand" "%0,v"))
|
||||
(zero_extend:<ssedoublemode>
|
||||
(match_operand:VI12_AVX2 2 "vector_operand" "xBm,vm")))
|
||||
(match_operand:VI12_AVX2_AVX512BW 2 "vector_operand" "xBm,vm")))
|
||||
(match_operand:<ssedoublemode> <mask_expand_op3> "const1_operand"))
|
||||
(const_int 1))))]
|
||||
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
|
||||
|
62
gcc/testsuite/gcc.target/i386/pr96906-1.c
Normal file
62
gcc/testsuite/gcc.target/i386/pr96906-1.c
Normal file
@ -0,0 +1,62 @@
|
||||
/* PR target/96906 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -mavx2" } */
|
||||
/* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*xmm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "\tvpminuw\[^\n\r]*xmm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "\tvpminub\[^\n\r]*ymm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "\tvpminuw\[^\n\r]*ymm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "\tvpcmpeqb\[^\n\r]*xmm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "\tvpcmpeqw\[^\n\r]*xmm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "\tvpcmpeqb\[^\n\r]*ymm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "\tvpcmpeqw\[^\n\r]*ymm" 2 } } */
|
||||
/* { dg-final { scan-assembler-not "\tvpsubus\[bw]" } } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__m128i
|
||||
f1 (__m128i x, __m128i y)
|
||||
{
|
||||
return _mm_cmpeq_epi16 (_mm_subs_epu16 (x, y), _mm_setzero_si128 ());
|
||||
}
|
||||
|
||||
__m128i
|
||||
f2 (__m128i x, __m128i y)
|
||||
{
|
||||
return _mm_cmpeq_epi16 (_mm_min_epu16 (x, y), x);
|
||||
}
|
||||
|
||||
__m128i
|
||||
f3 (__m128i x, __m128i y)
|
||||
{
|
||||
return _mm_cmpeq_epi8 (_mm_subs_epu8 (x, y), _mm_setzero_si128 ());
|
||||
}
|
||||
|
||||
__m128i
|
||||
f4 (__m128i x, __m128i y)
|
||||
{
|
||||
return _mm_cmpeq_epi8 (_mm_min_epu8 (x, y), x);
|
||||
}
|
||||
|
||||
__m256i
|
||||
f5 (__m256i x, __m256i y)
|
||||
{
|
||||
return _mm256_cmpeq_epi16 (_mm256_subs_epu16 (x, y), _mm256_setzero_si256 ());
|
||||
}
|
||||
|
||||
__m256i
|
||||
f6 (__m256i x, __m256i y)
|
||||
{
|
||||
return _mm256_cmpeq_epi16 (_mm256_min_epu16 (x, y), x);
|
||||
}
|
||||
|
||||
__m256i
|
||||
f7 (__m256i x, __m256i y)
|
||||
{
|
||||
return _mm256_cmpeq_epi8 (_mm256_subs_epu8 (x, y), _mm256_setzero_si256 ());
|
||||
}
|
||||
|
||||
__m256i
|
||||
f8 (__m256i x, __m256i y)
|
||||
{
|
||||
return _mm256_cmpeq_epi8 (_mm256_min_epu8 (x, y), x);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user