2
0
mirror of git://gcc.gnu.org/git/gcc.git synced 2025-04-25 07:40:29 +08:00

i386: Fix some -mavx512vl -mno-avx512bw bugs [PR99321]

As I wrote in the mail with the previous PR99321 fix, we have various
bugs where we emit instructions that need avx512bw and avx512vl
ISAs when compiling with -mavx512vl -mno-avx512bw.

Without the following patch, the attached testcase fails with:
/tmp/ccW4PsfG.s: Assembler messages:
/tmp/ccW4PsfG.s:9: Error: unsupported instruction `vpaddb'
/tmp/ccW4PsfG.s:20: Error: unsupported instruction `vpaddb'
/tmp/ccW4PsfG.s:31: Error: unsupported instruction `vpaddw'
/tmp/ccW4PsfG.s:42: Error: unsupported instruction `vpaddw'
/tmp/ccW4PsfG.s:53: Error: unsupported instruction `vpsubb'
/tmp/ccW4PsfG.s:64: Error: unsupported instruction `vpsubb'
/tmp/ccW4PsfG.s:75: Error: unsupported instruction `vpsubw'
/tmp/ccW4PsfG.s:86: Error: unsupported instruction `vpsubw'
/tmp/ccW4PsfG.s:97: Error: unsupported instruction `vpmullw'
/tmp/ccW4PsfG.s:108: Error: unsupported instruction `vpmullw'
/tmp/ccW4PsfG.s:133: Error: unsupported instruction `vpminub'
/tmp/ccW4PsfG.s:144: Error: unsupported instruction `vpminuw'
/tmp/ccW4PsfG.s:155: Error: unsupported instruction `vpminuw'
/tmp/ccW4PsfG.s:166: Error: unsupported instruction `vpminsb'
/tmp/ccW4PsfG.s:177: Error: unsupported instruction `vpminsb'
/tmp/ccW4PsfG.s:202: Error: unsupported instruction `vpminsw'
/tmp/ccW4PsfG.s:227: Error: unsupported instruction `vpmaxub'
/tmp/ccW4PsfG.s:238: Error: unsupported instruction `vpmaxuw'
/tmp/ccW4PsfG.s:249: Error: unsupported instruction `vpmaxuw'
/tmp/ccW4PsfG.s:260: Error: unsupported instruction `vpmaxsb'
/tmp/ccW4PsfG.s:271: Error: unsupported instruction `vpmaxsb'
/tmp/ccW4PsfG.s:296: Error: unsupported instruction `vpmaxsw'

We already have Yw constraint which is equivalent to v for
-mavx512bw -mavx512vl and to nothing otherwise, per discussions
this patch changes it to stand for x otherwise.  As it is an
undocumented internal constraint, hopefully it won't affect
any inline asm in the wild.
For the instructions that need both we need to use Yw and
v for modes that don't need that.

2021-03-07  Jakub Jelinek  <jakub@redhat.com>

	PR target/99321
	* config/i386/constraints.md (Yw): Use SSE_REGS if TARGET_SSE
	but TARGET_AVX512BW or TARGET_AVX512VL is not set.  Adjust description
	and comment.
	* config/i386/sse.md (v_Yw): New define_mode_attr.
	(*<insn><mode>3, *mul<mode>3<mask_name>, *avx2_<code><mode>3,
	*sse4_1_<code><mode>3<mask_name>): Use <v_Yw> instead of v
	in constraints.
	* config/i386/mmx.md (mmx_pshufw_1, *vec_dupv4hi): Use Yw instead of
	xYw in constraints.

	* lib/target-supports.exp
	(check_effective_target_assembler_march_noavx512bw): New effective
	target.
	* gcc.target/i386/avx512vl-pr99321-1.c: New test.
This commit is contained in:
Jakub Jelinek 2021-03-07 10:27:28 +01:00
parent 0ad6a2e2f0
commit a18ebd6c43
5 changed files with 79 additions and 22 deletions

@ -110,7 +110,7 @@
;; v any EVEX encodable SSE register for AVX512VL target,
;; otherwise any SSE register
;; w any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
;; target.
;; target, otherwise any SSE register.
(define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
"First SSE register (@code{%xmm0}).")
@ -148,8 +148,8 @@
"@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
(define_register_constraint "Yw"
"TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS"
"@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target.")
"TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
"@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.")
;; We use the B prefix to denote any number of internal operands:
;; f FLAGS_REG

@ -2021,9 +2021,9 @@
})
(define_insn "mmx_pshufw_1"
[(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
[(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
(vec_select:V4HI
(match_operand:V4HI 1 "register_mmxmem_operand" "ym,xYw")
(match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yw")
(parallel [(match_operand 2 "const_0_to_3_operand")
(match_operand 3 "const_0_to_3_operand")
(match_operand 4 "const_0_to_3_operand")
@ -2105,10 +2105,10 @@
(set_attr "mode" "DI,TI")])
(define_insn "*vec_dupv4hi"
[(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
[(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
(vec_duplicate:V4HI
(truncate:HI
(match_operand:SI 1 "register_operand" "0,xYw"))))]
(match_operand:SI 1 "register_operand" "0,Yw"))))]
"(TARGET_MMX || TARGET_MMX_WITH_SSE)
&& (TARGET_SSE || TARGET_3DNOW_A)"
"@

@ -560,6 +560,14 @@
(V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
(V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
(define_mode_attr v_Yw
[(V16QI "Yw") (V32QI "Yw") (V64QI "v")
(V8HI "Yw") (V16HI "Yw") (V32HI "v")
(V4SI "v") (V8SI "v") (V16SI "v")
(V2DI "v") (V4DI "v") (V8DI "v")
(V4SF "v") (V8SF "v") (V16SF "v")
(V2DF "v") (V4DF "v") (V8DF "v")])
(define_mode_attr sse2_avx_avx512f
[(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
(V8HI "avx512vl") (V16HI "avx512vl") (V32HI "avx512bw")
@ -11677,10 +11685,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*<insn><mode>3"
[(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
[(set (match_operand:VI_AVX2 0 "register_operand" "=x,<v_Yw>")
(plusminus:VI_AVX2
(match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
(match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
(match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,<v_Yw>")
(match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,<v_Yw>mBr")))]
"TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@ -11790,9 +11798,9 @@
"ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
(define_insn "*mul<mode>3<mask_name>"
[(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
(match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
[(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>")
(match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))]
"TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
&& <mask_mode512bit_condition> && <mask_avx512bw_condition>"
"@
@ -12618,10 +12626,10 @@
"ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
(define_insn "*avx2_<code><mode>3"
[(set (match_operand:VI124_256 0 "register_operand" "=v")
[(set (match_operand:VI124_256 0 "register_operand" "=<v_Yw>")
(maxmin:VI124_256
(match_operand:VI124_256 1 "nonimmediate_operand" "%v")
(match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
(match_operand:VI124_256 1 "nonimmediate_operand" "%<v_Yw>")
(match_operand:VI124_256 2 "nonimmediate_operand" "<v_Yw>m")))]
"TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
"vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
[(set_attr "type" "sseiadd")
@ -12745,10 +12753,10 @@
})
(define_insn "*sse4_1_<code><mode>3<mask_name>"
[(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
[(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,<v_Yw>")
(smaxmin:VI14_128
(match_operand:VI14_128 1 "vector_operand" "%0,0,v")
(match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
(match_operand:VI14_128 1 "vector_operand" "%0,0,<v_Yw>")
(match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
"TARGET_SSE4_1
&& <mask_mode512bit_condition>
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"
@ -12830,10 +12838,10 @@
})
(define_insn "*sse4_1_<code><mode>3<mask_name>"
[(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
[(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,<v_Yw>")
(umaxmin:VI24_128
(match_operand:VI24_128 1 "vector_operand" "%0,0,v")
(match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
(match_operand:VI24_128 1 "vector_operand" "%0,0,<v_Yw>")
(match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
"TARGET_SSE4_1
&& <mask_mode512bit_condition>
&& !(MEM_P (operands[1]) && MEM_P (operands[2]))"

@ -0,0 +1,39 @@
/* PR target/99321 */
/* { dg-do assemble { target lp64 } } */
/* { dg-require-effective-target avx512vl } */
/* { dg-require-effective-target assembler_march_noavx512bw } */
/* { dg-options "-O2 -mavx512vl -mno-avx512bw -Wa,-march=+noavx512bw" } */
#include <x86intrin.h>
typedef unsigned char V1 __attribute__((vector_size (16)));
typedef unsigned char V2 __attribute__((vector_size (32)));
typedef unsigned short V3 __attribute__((vector_size (16)));
typedef unsigned short V4 __attribute__((vector_size (32)));
void f1 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
void f2 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
void f3 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
void f4 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
void f5 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
void f6 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
void f7 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
void f8 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
void f9 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
void f10 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
void f11 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f12 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f13 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f14 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f15 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f16 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f17 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f18 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f19 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f20 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f21 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f22 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f23 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f24 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f25 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f26 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }

@ -8945,6 +8945,16 @@ proc check_effective_target_avx512bw { } {
} "-mavx512bw" ]
}
# Return 1 if -Wa,-march=+noavx512bw is supported.
proc check_effective_target_assembler_march_noavx512bw {} {
if { [istarget i?86*-*-*] || [istarget x86_64*-*-*] } {
return [check_no_compiler_messages assembler_march_noavx512bw object {
void foo (void) {}
} "-mno-avx512bw -Wa,-march=+noavx512bw"]
}
return 0
}
# Return 1 if avx512vp2intersect instructions can be compiled.
proc check_effective_target_avx512vp2intersect { } {
return [check_no_compiler_messages avx512vp2intersect object {