x86/APX: convert runtime special case to build-time one

cpu_flags_match() is a hot path. Move the special casing that
b7267244a355 ("Support Intel AMX-MOVRS") added there to i386-gen, thus
affecting only build time performance.
This commit is contained in:
Jan Beulich 2025-01-17 10:28:15 +01:00
parent 247357d23f
commit b88282d573
3 changed files with 18 additions and 12 deletions

View File

@ -2262,14 +2262,6 @@ cpu_flags_match (const insn_template *t)
the "if()" below. */
gas_assert (!cpu_flags_all_zero (&all));
/* For APX_F extension of multiple cpuid enabled insns, we could not
use APX_F(cpuid_A&cpuid_B) since the transformation could not be
done. Instead, we will use cpuid_A & APX_F(cpuid_B), then the
"any" bitfield would not be set for cpuid_A. Set cpuid_A for "any"
here since it is its original meaning. */
if (all.bitfield.cpuamx_transpose && any.bitfield.cpuamx_movrs)
any.bitfield.cpuamx_transpose = 1;
cpu = cpu_flags_and (all, any);
gas_assert (cpu_flags_equal (&cpu, &all));

View File

@ -1039,6 +1039,20 @@ process_i386_cpu_flag (FILE *table, char *flag,
all[Cpu64].value = 1;
output_cpu_flags(table, all, ARRAY_SIZE (all), -1, comma, indent, lineno);
/* For APX_F extension of multiple cpuid enabled insns, we cannot use
APX_F(cpuid_A&cpuid_B) in the opcode table, as the result would fail
to be parsed. Furthermore, the result also wouldn't be quite valid.
However, the assembler's cpu_flags_match() will simply propagate "any"
to "all", zapping "any" afterwards altogether. IOW in this situation
both masks have "&&" meaning. Set the missing flag here. */
if (all[CpuAMX_TRANSPOSE].value && all[CpuAMX_MOVRS].value)
{
if (!any[CpuAPX_F].value || !any[CpuAMX_MOVRS].value)
fail ("%s: %d: internal error: APX_F=%d AMX_MOVRS=%d\n",
filename, lineno, any[CpuAPX_F].value, any[CpuAMX_MOVRS].value);
any[CpuAMX_TRANSPOSE].value = 1;
}
}
output_cpu_flags (table, any, ARRAY_SIZE (any), name != NULL,

View File

@ -43193,7 +43193,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 1, 0, 1, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0 },
{ { 117, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 } },
{ { 117, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } },
{ { 117, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0 } },
{ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0 } },
{ { 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -43203,7 +43203,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 1, 0, 1, 0, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0 },
{ { 117, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 } },
{ { 117, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } },
{ { 117, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0 } },
{ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0 } },
{ { 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -43213,7 +43213,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 1, 0, 1, 1, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0 },
{ { 117, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 } },
{ { 117, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } },
{ { 117, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0 } },
{ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0 } },
{ { 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@ -43223,7 +43223,7 @@ static const insn_template i386_optab[] =
0, 0, 0, 1, 0, 1, 1, 4, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0 },
{ { 117, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0 } },
{ { 117, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 } },
{ { 117, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0 } },
{ { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0 } },
{ { 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,