Add __builtin_fma vector transform for i386.

From-SVN: r165857
This commit is contained in:
Richard Henderson 2010-10-22 12:43:21 -07:00 committed by Richard Henderson
parent 7a12785c5f
commit 834d5dce79
4 changed files with 220 additions and 1 deletions

View File

@ -1,3 +1,8 @@
2010-10-22 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (ix86_builtin_vectorized_function): Add
cases for __builtin_fma and __builtin_fmaf.
2010-10-22 Richard Henderson <rth@redhat.com>
* config/i386/i386.c (ix86_expand_fp_absneg_operator): Produce

View File

@ -26105,8 +26105,28 @@ ix86_builtin_vectorized_function (tree fndecl, tree type_out,
}
break;
case BUILT_IN_FMA:
if (out_mode == DFmode && in_mode == DFmode)
{
if (out_n == 2 && in_n == 2)
return ix86_builtins[IX86_BUILTIN_VFMADDPD];
if (out_n == 4 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_VFMADDPD256];
}
break;
case BUILT_IN_FMAF:
if (out_mode == SFmode && in_mode == SFmode)
{
if (out_n == 4 && in_n == 4)
return ix86_builtins[IX86_BUILTIN_VFMADDPS];
if (out_n == 8 && in_n == 8)
return ix86_builtins[IX86_BUILTIN_VFMADDPS256];
}
break;
default:
;
break;
}
/* Dispatch to a handler for a vectorization library. */

View File

@ -0,0 +1,97 @@
/* { dg-do compile } */
/* { dg-options "-O3 -mfma -mno-fma4" } */
#ifndef SIZE
#define SIZE 1024
#endif
double vda[SIZE] __attribute__((__aligned__(32)));
double vdb[SIZE] __attribute__((__aligned__(32)));
double vdc[SIZE] __attribute__((__aligned__(32)));
double vdd[SIZE] __attribute__((__aligned__(32)));
float vfa[SIZE] __attribute__((__aligned__(32)));
float vfb[SIZE] __attribute__((__aligned__(32)));
float vfc[SIZE] __attribute__((__aligned__(32)));
float vfd[SIZE] __attribute__((__aligned__(32)));
void
vector_fma (void)
{
int i;
for (i = 0; i < SIZE; i++)
vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]);
}
void
vector_fms (void)
{
int i;
for (i = 0; i < SIZE; i++)
vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]);
}
void
vector_fnma (void)
{
int i;
for (i = 0; i < SIZE; i++)
vda[i] = __builtin_fma (-vdb[i], vdc[i], vdd[i]);
}
void
vector_fnms (void)
{
int i;
for (i = 0; i < SIZE; i++)
vda[i] = __builtin_fma (-vdb[i], vdc[i], -vdd[i]);
}
void
vector_fmaf (void)
{
int i;
for (i = 0; i < SIZE; i++)
vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]);
}
void
vector_fmsf (void)
{
int i;
for (i = 0; i < SIZE; i++)
vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]);
}
void
vector_fnmaf (void)
{
int i;
for (i = 0; i < SIZE; i++)
vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], vfd[i]);
}
void
vector_fnmsf (void)
{
int i;
for (i = 0; i < SIZE; i++)
vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], -vfd[i]);
}
/* { dg-final { scan-assembler-times "vfmadd...ps" 1 } } */
/* { dg-final { scan-assembler-times "vfmadd...pd" 1 } } */
/* { dg-final { scan-assembler-times "vfmsub...ps" 1 } } */
/* { dg-final { scan-assembler-times "vfmsub...pd" 1 } } */
/* { dg-final { scan-assembler-times "vfnmadd...ps" 1 } } */
/* { dg-final { scan-assembler-times "vfnmadd...pd" 1 } } */
/* { dg-final { scan-assembler-times "vfnmsub...ps" 1 } } */
/* { dg-final { scan-assembler-times "vfnmsub...pd" 1 } } */

View File

@ -0,0 +1,97 @@
/* { dg-do compile } */
/* { dg-options "-O3 -mfma4" } */
#ifndef SIZE
#define SIZE 1024
#endif
double vda[SIZE] __attribute__((__aligned__(32)));
double vdb[SIZE] __attribute__((__aligned__(32)));
double vdc[SIZE] __attribute__((__aligned__(32)));
double vdd[SIZE] __attribute__((__aligned__(32)));
float vfa[SIZE] __attribute__((__aligned__(32)));
float vfb[SIZE] __attribute__((__aligned__(32)));
float vfc[SIZE] __attribute__((__aligned__(32)));
float vfd[SIZE] __attribute__((__aligned__(32)));
void
vector_fma (void)
{
int i;
for (i = 0; i < SIZE; i++)
vda[i] = __builtin_fma (vdb[i], vdc[i], vdd[i]);
}
void
vector_fms (void)
{
int i;
for (i = 0; i < SIZE; i++)
vda[i] = __builtin_fma (vdb[i], vdc[i], -vdd[i]);
}
void
vector_fnma (void)
{
int i;
for (i = 0; i < SIZE; i++)
vda[i] = __builtin_fma (-vdb[i], vdc[i], vdd[i]);
}
void
vector_fnms (void)
{
int i;
for (i = 0; i < SIZE; i++)
vda[i] = __builtin_fma (-vdb[i], vdc[i], -vdd[i]);
}
void
vector_fmaf (void)
{
int i;
for (i = 0; i < SIZE; i++)
vfa[i] = __builtin_fmaf (vfb[i], vfc[i], vfd[i]);
}
void
vector_fmsf (void)
{
int i;
for (i = 0; i < SIZE; i++)
vfa[i] = __builtin_fmaf (vfb[i], vfc[i], -vfd[i]);
}
void
vector_fnmaf (void)
{
int i;
for (i = 0; i < SIZE; i++)
vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], vfd[i]);
}
void
vector_fnmsf (void)
{
int i;
for (i = 0; i < SIZE; i++)
vfa[i] = __builtin_fmaf (-vfb[i], vfc[i], -vfd[i]);
}
/* { dg-final { scan-assembler-times "vfmaddps" 1 } } */
/* { dg-final { scan-assembler-times "vfmaddpd" 1 } } */
/* { dg-final { scan-assembler-times "vfmsubps" 1 } } */
/* { dg-final { scan-assembler-times "vfmsubpd" 1 } } */
/* { dg-final { scan-assembler-times "vfnmaddps" 1 } } */
/* { dg-final { scan-assembler-times "vfnmaddpd" 1 } } */
/* { dg-final { scan-assembler-times "vfnmsubps" 1 } } */
/* { dg-final { scan-assembler-times "vfnmsubpd" 1 } } */