From 76a2c904dff6263556807f1a3e39521bea73f76d Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 15 Nov 2011 19:45:20 +0100 Subject: [PATCH] sse.md (vec_pack_trunc_v2df): Optimize sequence for AVX. * config/i386/sse.md (vec_pack_trunc_v2df): Optimize sequence for AVX. (vec_pack_sfix_trunc_v2df): Ditto. (vec_pack_sfix_v2df): Ditto. (vec_pack_sfix_trunc_v4df): Generate fix_truncv4dfv4si2 and avx_vec_concatv8si patterns. (vec_pack_sfix_v4df): Generate avx_cvtpd2dq256 and avx_vec_concatv8si patterns. testsuite/ChangeLog: * gcc.target/i386/avx-floor-sfix-2-vec.c: New test. * gcc.target/i386/avx-ceil-sfix-2-vec.c: Ditto. * gcc.target/i386/avx-rint-sfix-2-vec.c: Ditto. * gcc.target/i386/avx-round-sfix-2-vec.c: Ditto. From-SVN: r181387 --- gcc/ChangeLog | 20 +++- gcc/config/i386/sse.md | 99 +++++++++++++------ gcc/testsuite/ChangeLog | 15 ++- .../gcc.target/i386/avx-ceil-sfix-2-vec.c | 62 ++++++++++++ .../gcc.target/i386/avx-floor-sfix-2-vec.c | 62 ++++++++++++ .../gcc.target/i386/avx-rint-sfix-2-vec.c | 62 ++++++++++++ .../gcc.target/i386/avx-rint-sfix-vec.c | 9 ++ .../gcc.target/i386/avx-rintf-sfix-vec.c | 9 ++ .../gcc.target/i386/avx-round-sfix-2-vec.c | 62 ++++++++++++ .../gcc.target/i386/sse4_1-rint-sfix-vec.c | 62 ++++++++++++ .../gcc.target/i386/sse4_1-rintf-sfix-vec.c | 62 ++++++++++++ 11 files changed, 486 insertions(+), 38 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-rint-sfix-2-vec.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-rint-sfix-vec.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-rintf-sfix-vec.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-round-sfix-2-vec.c create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-rint-sfix-vec.c create mode 100644 gcc/testsuite/gcc.target/i386/sse4_1-rintf-sfix-vec.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index ea952bd82ca1..c3abba44a4d1 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2011-11-15 Uros Bizjak + + * config/i386/sse.md (vec_pack_trunc_v2df): Optimize sequence for AVX. + (vec_pack_sfix_trunc_v2df): Ditto. + (vec_pack_sfix_v2df): Ditto. + (vec_pack_sfix_trunc_v4df): Generate fix_truncv4dfv4si2 and + avx_vec_concatv8si patterns. + (vec_pack_sfix_v4df): Generate avx_cvtpd2dq256 and + avx_vec_concatv8si patterns. + 2011-11-15 Torvald Riegel * c-parser.c (c_parser_transaction_expression): Require parentheses @@ -35,7 +45,7 @@ (avr_register_target_pragmas): New function. Register address space __pgm. (avr_cpu_cpp_builtins): Add built-in define __PGM. - + * config/avr/avr.c: Include "c-family/c-common.h". (TARGET_LEGITIMATE_ADDRESS_P): Remove define. (TARGET_LEGITIMIZE_ADDRESS): Remove define. @@ -71,7 +81,7 @@ cause (progmem or address space) when code wants to write to flash. (avr_section_type_flags): Unset section flag SECTION_BSS for data in progmem. - + * config/avr/predicates.md (nop_general_operand): New predicate. (nox_general_operand): New predicate. * config/avr/avr.md (LPM_REGNO): New define_constant. @@ -82,7 +92,8 @@ created MEM. (movqi_insn, *movhi, *movpsi, *movsi, *movsf): Change predicate #1 to nox_general_operand. - (ashrqi3, ashrhi3, ashrsi3): Change predicate #1 to nop_general_operand. + (ashrqi3, ashrhi3, ashrsi3): Change predicate #1 to + nop_general_operand. (ashlqi3, *ashlqi3, ashlhi3, ashlsi3): Ditto. (lshrqi3, *lshrqi3, lshrhi3, lshrsi3): Ditto. (split-lpmx): New split. @@ -137,7 +148,8 @@ 2011-11-14 Richard Henderson - * config/rs6000/rs6000.c (emit_load_locked): Assert the mode is handled. + * config/rs6000/rs6000.c (emit_load_locked): Assert the mode is + handled. (emit_store_conditional): Likewise. (rs6000_pre_atomic_barrier, rs6000_post_atomic_barrier): New. (rs6000_adjust_atomic_subword): New. diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b476752d6538..b8e821de90e8 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -3038,14 +3038,25 @@ (match_operand:V2DF 2 "nonimmediate_operand" "")] "TARGET_SSE2" { - rtx r1, r2; + rtx tmp0, tmp1; - r1 = gen_reg_rtx (V4SFmode); - r2 = gen_reg_rtx (V4SFmode); + if (TARGET_AVX && !TARGET_PREFER_AVX128) + { + tmp0 = gen_reg_rtx (V4DFmode); + tmp1 = force_reg (V2DFmode, operands[1]); - emit_insn (gen_sse2_cvtpd2ps (r1, operands[1])); - emit_insn (gen_sse2_cvtpd2ps (r2, operands[2])); - emit_insn (gen_sse_movlhps (operands[0], r1, r2)); + emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); + emit_insn (gen_avx_cvtpd2ps256 (operands[0], tmp0)); + } + else + { + tmp0 = gen_reg_rtx (V4SFmode); + tmp1 = gen_reg_rtx (V4SFmode); + + emit_insn (gen_sse2_cvtpd2ps (tmp0, operands[1])); + emit_insn (gen_sse2_cvtpd2ps (tmp1, operands[2])); + emit_insn (gen_sse_movlhps (operands[0], tmp0, tmp1)); + } DONE; }) @@ -3057,12 +3068,12 @@ { rtx r1, r2; - r1 = gen_reg_rtx (V8SImode); - r2 = gen_reg_rtx (V8SImode); + r1 = gen_reg_rtx (V4SImode); + r2 = gen_reg_rtx (V4SImode); - emit_insn (gen_avx_cvttpd2dq256_2 (r1, operands[1])); - emit_insn (gen_avx_cvttpd2dq256_2 (r2, operands[2])); - emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20))); + emit_insn (gen_fix_truncv4dfv4si2 (r1, operands[1])); + emit_insn (gen_fix_truncv4dfv4si2 (r2, operands[2])); + emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); DONE; }) @@ -3072,16 +3083,28 @@ (match_operand:V2DF 2 "nonimmediate_operand" "")] "TARGET_SSE2" { - rtx r1, r2; + rtx tmp0, tmp1; - r1 = gen_reg_rtx (V4SImode); - r2 = gen_reg_rtx (V4SImode); + if (TARGET_AVX && !TARGET_PREFER_AVX128) + { + tmp0 = gen_reg_rtx (V4DFmode); + tmp1 = force_reg (V2DFmode, operands[1]); - emit_insn (gen_sse2_cvttpd2dq (r1, operands[1])); - emit_insn (gen_sse2_cvttpd2dq (r2, operands[2])); - emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), - gen_lowpart (V2DImode, r1), - gen_lowpart (V2DImode, r2))); + emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); + emit_insn (gen_fix_truncv4dfv4si2 (operands[0], tmp0)); + } + else + { + tmp0 = gen_reg_rtx (V4SImode); + tmp1 = gen_reg_rtx (V4SImode); + + emit_insn (gen_sse2_cvttpd2dq (tmp0, operands[1])); + emit_insn (gen_sse2_cvttpd2dq (tmp1, operands[2])); + emit_insn + (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), + gen_lowpart (V2DImode, tmp0), + gen_lowpart (V2DImode, tmp1))); + } DONE; }) @@ -3126,12 +3149,12 @@ { rtx r1, r2; - r1 = gen_reg_rtx (V8SImode); - r2 = gen_reg_rtx (V8SImode); + r1 = gen_reg_rtx (V4SImode); + r2 = gen_reg_rtx (V4SImode); - emit_insn (gen_avx_cvtpd2dq256_2 (r1, operands[1])); - emit_insn (gen_avx_cvtpd2dq256_2 (r2, operands[2])); - emit_insn (gen_avx_vperm2f128v8si3 (operands[0], r1, r2, GEN_INT (0x20))); + emit_insn (gen_avx_cvtpd2dq256 (r1, operands[1])); + emit_insn (gen_avx_cvtpd2dq256 (r2, operands[2])); + emit_insn (gen_avx_vec_concatv8si (operands[0], r1, r2)); DONE; }) @@ -3141,16 +3164,28 @@ (match_operand:V2DF 2 "nonimmediate_operand" "")] "TARGET_SSE2" { - rtx r1, r2; + rtx tmp0, tmp1; - r1 = gen_reg_rtx (V4SImode); - r2 = gen_reg_rtx (V4SImode); + if (TARGET_AVX && !TARGET_PREFER_AVX128) + { + tmp0 = gen_reg_rtx (V4DFmode); + tmp1 = force_reg (V2DFmode, operands[1]); - emit_insn (gen_sse2_cvtpd2dq (r1, operands[1])); - emit_insn (gen_sse2_cvtpd2dq (r2, operands[2])); - emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), - gen_lowpart (V2DImode, r1), - gen_lowpart (V2DImode, r2))); + emit_insn (gen_avx_vec_concatv4df (tmp0, tmp1, operands[2])); + emit_insn (gen_avx_cvtpd2dq256 (operands[0], tmp0)); + } + else + { + tmp0 = gen_reg_rtx (V4SImode); + tmp1 = gen_reg_rtx (V4SImode); + + emit_insn (gen_sse2_cvtpd2dq (tmp0, operands[1])); + emit_insn (gen_sse2_cvtpd2dq (tmp1, operands[2])); + emit_insn + (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), + gen_lowpart (V2DImode, tmp0), + gen_lowpart (V2DImode, tmp1))); + } DONE; }) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index c7a33b10bc16..876d91897cbb 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2011-11-15 Uros Bizjak + + * gcc.target/i386/avx-floor-sfix-2-vec.c: New test. + * gcc.target/i386/avx-ceil-sfix-2-vec.c: Ditto. + * gcc.target/i386/avx-rint-sfix-2-vec.c: Ditto. + * gcc.target/i386/avx-round-sfix-2-vec.c: Ditto. + 2011-11-15 Iain Sandoe * lib/gcc-simulate-thread.exp (simulate-thread): Do not run on @@ -16,10 +23,10 @@ * gcc.target/i386/avx-recip-vec.c: New test. * gcc.target/i386/avx-lrintf-vec.c: Ditto. * gcc.target/i386/avx-lrint-vec.c: Ditto. - * gcc.target/i386/avx-ceilf-vec.c: Include sse4_1-ceilf-vec.c. - * gcc.target/i386/avx-ceil-vec.c: Include sse4_1-ceil-vec.c. * gcc.target/i386/avx-floorf-vec.c: Include sse4_1-floorf-vec.c. * gcc.target/i386/avx-floor-vec.c: Include sse4_1-floor-vec.c. + * gcc.target/i386/avx-ceilf-vec.c: Include sse4_1-ceilf-vec.c. + * gcc.target/i386/avx-ceil-vec.c: Include sse4_1-ceil-vec.c. * gcc.target/i386/avx-rintf-vec.c: Include sse4_1-rintf-vec.c. * gcc.target/i386/avx-rint-vec.c: Include sse4_1-rint-vec.c. * gcc.target/i386/avx-roundf-vec.c: Include sse4_1-roundf-vec.c. @@ -45,6 +52,10 @@ * gcc.target/i386/sse4_1-ceilf-sfix-vec.c: Ditto. * gcc.target/i386/avx-ceil-sfix-vec.c: Ditto. * gcc.target/i386/avx-ceilf-sfix-vec.c: Ditto. + * gcc.target/i386/sse4_1-rint-sfix-vec.c: Ditto. + * gcc.target/i386/sse4_1-rintf-sfix-vec.c: Ditto. + * gcc.target/i386/avx-rint-sfix-vec.c: Ditto. + * gcc.target/i386/avx-rintf-sfix-vec.c: Ditto. * gcc.target/i386/sse4_1-round-sfix-vec.c: Ditto. * gcc.target/i386/sse4_1-roundf-sfix-vec.c: Ditto. * gcc.target/i386/avx-round-sfix-vec.c: Ditto. diff --git a/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c b/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c new file mode 100644 index 000000000000..bf48b80717bb --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-ceil-sfix-2-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK_H + +#include + +extern double ceil (double); + +#define NUM 4 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) ceil (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) ceil (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c b/gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c new file mode 100644 index 000000000000..275199cf8f08 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-floor-sfix-2-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK_H + +#include + +extern double floor (double); + +#define NUM 4 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) floor (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) floor (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-rint-sfix-2-vec.c b/gcc/testsuite/gcc.target/i386/avx-rint-sfix-2-vec.c new file mode 100644 index 000000000000..9f273af5cbb1 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-rint-sfix-2-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK_H + +#include + +extern double rint (double); + +#define NUM 4 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) rint (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) rint (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/avx-rint-sfix-vec.c b/gcc/testsuite/gcc.target/i386/avx-rint-sfix-vec.c new file mode 100644 index 000000000000..824f2eb7d528 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-rint-sfix-vec.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-rint-sfix-vec.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-rintf-sfix-vec.c b/gcc/testsuite/gcc.target/i386/avx-rintf-sfix-vec.c new file mode 100644 index 000000000000..e5ddf790d75f --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-rintf-sfix-vec.c @@ -0,0 +1,9 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#define CHECK_H "avx-check.h" +#define TEST avx_test + +#include "sse4_1-rintf-sfix-vec.c" diff --git a/gcc/testsuite/gcc.target/i386/avx-round-sfix-2-vec.c b/gcc/testsuite/gcc.target/i386/avx-round-sfix-2-vec.c new file mode 100644 index 000000000000..ddb46d9252a4 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-round-sfix-2-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -mavx" } */ +/* { dg-require-effective-target avx } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#endif + +#ifndef TEST +#define TEST avx_test +#endif + +#include CHECK_H + +#include + +extern double round (double); + +#define NUM 4 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) round (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) round (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-rint-sfix-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-rint-sfix-vec.c new file mode 100644 index 000000000000..d9c2fbf2d37d --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-rint-sfix-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include + +extern double rint (double); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (double *src) +{ + int i, sign = 1; + double f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + double a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) rint (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) rint (a[i])) + abort(); +} diff --git a/gcc/testsuite/gcc.target/i386/sse4_1-rintf-sfix-vec.c b/gcc/testsuite/gcc.target/i386/sse4_1-rintf-sfix-vec.c new file mode 100644 index 000000000000..1d25f7669d53 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/sse4_1-rintf-sfix-vec.c @@ -0,0 +1,62 @@ +/* { dg-do run } */ +/* { dg-options "-O2 -ffast-math -ftree-vectorize -msse4.1" } */ +/* { dg-require-effective-target sse4 } */ +/* { dg-skip-if "no M_PI" { vxworks_kernel } } */ + +#ifndef CHECK_H +#define CHECK_H "sse4_1-check.h" +#endif + +#ifndef TEST +#define TEST sse4_1_test +#endif + +#include CHECK_H + +#include + +extern float rintf (float); + +#define NUM 64 + +static void +__attribute__((__target__("fpmath=sse"))) +init_src (float *src) +{ + int i, sign = 1; + float f = rand (); + + for (i = 0; i < NUM; i++) + { + src[i] = (i + 1) * f * M_PI * sign; + if (i < (NUM / 2)) + { + if ((i % 6) == 0) + f = f * src[i]; + } + else if (i == (NUM / 2)) + f = rand (); + else if ((i % 6) == 0) + f = 1 / (f * (i + 1) * src[i] * M_PI * sign); + sign = -sign; + } +} + +static void +__attribute__((__target__("fpmath=387"))) +TEST (void) +{ + float a[NUM]; + int r[NUM]; + int i; + + init_src (a); + + for (i = 0; i < NUM; i++) + r[i] = (int) rintf (a[i]); + + /* check results: */ + for (i = 0; i < NUM; i++) + if (r[i] != (int) rintf (a[i])) + abort(); +}