From 2c9fd13e905b68909db52d6868645790f935e35a Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 14 Dec 2007 13:43:57 +0100 Subject: [PATCH] sse-14.c (test_1, [...]): New macros to test macroized SSE intrinsics. * gcc.target/i386/sse-14.c (test_1, test_2, test_2x, test_4): New macros to test macroized SSE intrinsics. Use new macros to test macroized SSE intrinsics from ammintrin.h, smmintrin.h, tmmintrin.h, emmintrin.h, xmmintrin.h and bmmintrin.h * gcc.target/i386/sse-13.c (__builtin_ia32_pcmp?str*128): Redefine to test with immediate operand. From-SVN: r130930 --- gcc/config/i386/bmmintrin.h | 16 ++--- gcc/config/i386/emmintrin.h | 34 ++++----- gcc/config/i386/smmintrin.h | 6 +- gcc/config/i386/tmmintrin.h | 4 +- gcc/config/i386/xmmintrin.h | 12 ++-- gcc/testsuite/ChangeLog | 9 +++ gcc/testsuite/gcc.target/i386/sse-13.c | 30 +++++++- gcc/testsuite/gcc.target/i386/sse-14.c | 97 +++++++++++++++++++++++++- 8 files changed, 170 insertions(+), 38 deletions(-) diff --git a/gcc/config/i386/bmmintrin.h b/gcc/config/i386/bmmintrin.h index 48830f39fb2a..4254d0fc17c1 100644 --- a/gcc/config/i386/bmmintrin.h +++ b/gcc/config/i386/bmmintrin.h @@ -375,14 +375,14 @@ _mm_roti_epi64(__m128i __A, const int __B) return (__m128i) __builtin_ia32_protqi ((__v2di)__A, __B); } #else -#define _mm_roti_epi8(A, B) \ - ((__m128i) __builtin_ia32_protbi ((__v16qi)(__m128i)(A), (int)(B))) -#define _mm_roti_epi16(A, B) \ - ((__m128i) __builtin_ia32_protwi ((__v8hi)(__m128i)(A), (int)(B))) -#define _mm_roti_epi32(A, B) \ - ((__m128i) __builtin_ia32_protdi ((__v4si)(__m128i)(A), (int)(B))) -#define _mm_roti_epi64(A, B) \ - ((__m128i) __builtin_ia32_protqi ((__v2di)(__m128i)(A), (int)(B)) +#define _mm_roti_epi8(A, N) \ + ((__m128i) __builtin_ia32_protbi ((__v16qi)(__m128i)(A), (int)(N))) +#define _mm_roti_epi16(A, N) \ + ((__m128i) __builtin_ia32_protwi ((__v8hi)(__m128i)(A), (int)(N))) +#define _mm_roti_epi32(A, N) \ + ((__m128i) __builtin_ia32_protdi ((__v4si)(__m128i)(A), (int)(N))) +#define _mm_roti_epi64(A, N) \ + ((__m128i) __builtin_ia32_protqi ((__v2di)(__m128i)(A), (int)(N))) #endif /* pshl */ diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index 1a0affc77551..c5bbe8bf82ad 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -887,9 +887,9 @@ _mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) return (__m128d)__builtin_ia32_shufpd ((__v2df)__A, (__v2df)__B, __mask); } #else -#define _mm_shuffle_pd(__A, __B, __C) \ - ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)__A, \ - (__v2df)(__m128d)__B, (int)(__C))) +#define _mm_shuffle_pd(A, B, N) \ + ((__m128d)__builtin_ia32_shufpd ((__v2df)(__m128d)(A), \ + (__v2df)(__m128d)(B), (int)(N))) #endif static __inline __m128d __attribute__((__always_inline__, __artificial__)) @@ -1146,21 +1146,21 @@ _mm_srai_epi32 (__m128i __A, int __B) #ifdef __OPTIMIZE__ static __inline __m128i __attribute__((__always_inline__, __artificial__)) -_mm_srli_si128 (__m128i __A, const int __B) +_mm_srli_si128 (__m128i __A, const int __N) { - return (__m128i)__builtin_ia32_psrldqi128 (__A, __B * 8); + return (__m128i)__builtin_ia32_psrldqi128 (__A, __N * 8); } static __inline __m128i __attribute__((__always_inline__, __artificial__)) -_mm_slli_si128 (__m128i __A, const int __B) +_mm_slli_si128 (__m128i __A, const int __N) { - return (__m128i)__builtin_ia32_pslldqi128 (__A, __B * 8); + return (__m128i)__builtin_ia32_pslldqi128 (__A, __N * 8); } #else -#define _mm_srli_si128(__A, __B) \ - ((__m128i)__builtin_ia32_psrldqi128 (__A, (__B) * 8)) -#define _mm_slli_si128(__A, __B) \ - ((__m128i)__builtin_ia32_pslldqi128 (__A, (__B) * 8)) +#define _mm_srli_si128(A, N) \ + ((__m128i)__builtin_ia32_psrldqi128 ((__m128i)(A), (int)(N) * 8)) +#define _mm_slli_si128(A, N) \ + ((__m128i)__builtin_ia32_pslldqi128 ((__m128i)(A), (int)(N) * 8)) #endif static __inline __m128i __attribute__((__always_inline__, __artificial__)) @@ -1382,12 +1382,12 @@ _mm_shuffle_epi32 (__m128i __A, const int __mask) return (__m128i)__builtin_ia32_pshufd ((__v4si)__A, __mask); } #else -#define _mm_shufflehi_epi16(__A, __B) \ - ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)__A, (int)__B)) -#define _mm_shufflelo_epi16(__A, __B) \ - ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)__A, (int)__B)) -#define _mm_shuffle_epi32(__A, __B) \ - ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)__A, (int)__B)) +#define _mm_shufflehi_epi16(A, N) \ + ((__m128i)__builtin_ia32_pshufhw ((__v8hi)(__m128i)(A), (int)(N))) +#define _mm_shufflelo_epi16(A, N) \ + ((__m128i)__builtin_ia32_pshuflw ((__v8hi)(__m128i)(A), (int)(N))) +#define _mm_shuffle_epi32(A, N) \ + ((__m128i)__builtin_ia32_pshufd ((__v4si)(__m128i)(A), (int)(N))) #endif static __inline void __attribute__((__always_inline__, __artificial__)) diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h index 3989773e5739..e5b0ae04c0f1 100644 --- a/gcc/config/i386/smmintrin.h +++ b/gcc/config/i386/smmintrin.h @@ -341,9 +341,9 @@ _mm_extract_epi64 (__m128i __X, const int __N) #endif #else #define _mm_extract_epi8(X, N) \ - __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N)) + ((int) __builtin_ia32_vec_ext_v16qi ((__v16qi)(__m128i)(X), (int)(N))) #define _mm_extract_epi32(X, N) \ - __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N)) + ((int) __builtin_ia32_vec_ext_v4si ((__v4si)(__m128i)(X), (int)(N))) #ifdef __x86_64__ #define _mm_extract_epi64(X, N) \ @@ -544,7 +544,7 @@ _mm_cmpestri (__m128i __X, int __LX, __m128i __Y, int __LY, const int __M) #define _mm_cmpestri(X, LX, Y, LY, M) \ ((int) __builtin_ia32_pcmpestri128 ((__v16qi)(__m128i)(X), (int)(LX), \ (__v16qi)(__m128i)(Y), (int)(LY), \ - (int)(M)) + (int)(M))) #endif /* Intrinsics for text/string processing and reading values of diff --git a/gcc/config/i386/tmmintrin.h b/gcc/config/i386/tmmintrin.h index 6b389133079e..0e6a0d49e6f0 100644 --- a/gcc/config/i386/tmmintrin.h +++ b/gcc/config/i386/tmmintrin.h @@ -201,8 +201,8 @@ _mm_alignr_pi8(__m64 __X, __m64 __Y, const int __N) (__v2di)(__m128i)(Y), \ (int)(N) * 8)) #define _mm_alignr_pi8(X, Y, N) \ - ((__m64) __builtin_ia32_palignr ((long long)(__m64)(__X), \ - (long long)(__m64)(__Y), \ + ((__m64) __builtin_ia32_palignr ((long long)(__m64)(X), \ + (long long)(__m64)(Y), \ (int)(N) * 8)) #endif diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h index ab3acebd3a36..18b659b2483b 100644 --- a/gcc/config/i386/xmmintrin.h +++ b/gcc/config/i386/xmmintrin.h @@ -1007,8 +1007,8 @@ _m_pextrw (__m64 const __A, int const __N) #else #define _mm_extract_pi16(A, N) \ ((int) __builtin_ia32_vec_ext_v4hi ((__v4hi)(__m64)(A), (int)(N))) -#define _m_pextrw(A, N) \ - ((int) _mm_extract_pi16((__m64)(A),(int)(N))) + +#define _m_pextrw(A, N) _mm_extract_pi16(A, N) #endif /* Inserts word D into one of four words of A. The selector N must be @@ -1029,8 +1029,8 @@ _m_pinsrw (__m64 const __A, int const __D, int const __N) #define _mm_insert_pi16(A, D, N) \ ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(__m64)(A), \ (int)(D), (int)(N))) -#define _m_pinsrw(A, D, N) \ - ((__m64) _mm_insert_pi16((__m64)(A), (int)(D), (int)(N)) + +#define _m_pinsrw(A, D, N) _mm_insert_pi16(A, D, N) #endif /* Compute the element-wise maximum of signed 16-bit values. */ @@ -1129,8 +1129,8 @@ _m_pshufw (__m64 __A, int const __N) #else #define _mm_shuffle_pi16(A, N) \ ((__m64) __builtin_ia32_pshufw ((__v4hi)(__m64)(A), (int)(N))) -#define _m_pshufw(A, N) \ - ((__m64) _mm_shuffle_pi16 ((__m64)(A), (int)(N)) + +#define _m_pshufw(A, N) _mm_shuffle_pi16 (A, N) #endif /* Conditionally store byte elements of A into P. The high bit of each diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index f3aea33a7f8b..35c1880463ed 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,12 @@ +2007-12-14 Uros Bizjak + + * gcc.target/i386/sse-14.c (test_1, test_2, test_2x, test_4): New + macros to test macroized SSE intrinsics. Use new macros to test + macroized SSE intrinsics from ammintrin.h, smmintrin.h, tmmintrin.h, + emmintrin.h, xmmintrin.h and bmmintrin.h + * gcc.target/i386/sse-13.c (__builtin_ia32_pcmp?str*128): Redefine + to test with immediate operand. + 2007-12-13 John David Anglin PR target/34091 diff --git a/gcc/testsuite/gcc.target/i386/sse-13.c b/gcc/testsuite/gcc.target/i386/sse-13.c index ed3f99f93fa6..e6dda6904827 100644 --- a/gcc/testsuite/gcc.target/i386/sse-13.c +++ b/gcc/testsuite/gcc.target/i386/sse-13.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O2 -march=k8 -m3dnow -msse4.1 -msse5 " } */ +/* { dg-options "-O2 -march=k8 -m3dnow -msse4.1 -msse5" } */ /* Test that the intrinsics compile with optimization. All of them are defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h @@ -34,6 +34,34 @@ #define __builtin_ia32_roundps(V, M) __builtin_ia32_roundps(V, 1) #define __builtin_ia32_roundss(D, V, M) __builtin_ia32_roundss(D, V, 1) #define __builtin_ia32_mpsadbw128(X, Y, M) __builtin_ia32_mpsadbw128(X, Y, 1) +#define __builtin_ia32_pcmpistrm128(X, Y, M) \ + __builtin_ia32_pcmpistrm128(X, Y, 1) +#define __builtin_ia32_pcmpistri128(X, Y, M) \ + __builtin_ia32_pcmpistri128(X, Y, 1) +#define __builtin_ia32_pcmpestrm128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestrm128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestri128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestri128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpistria128(X, Y, M) \ + __builtin_ia32_pcmpistria128(X, Y, 1) +#define __builtin_ia32_pcmpistric128(X, Y, M) \ + __builtin_ia32_pcmpistric128(X, Y, 1) +#define __builtin_ia32_pcmpistrio128(X, Y, M) \ + __builtin_ia32_pcmpistrio128(X, Y, 1) +#define __builtin_ia32_pcmpistris128(X, Y, M) \ + __builtin_ia32_pcmpistris128(X, Y, 1) +#define __builtin_ia32_pcmpistriz128(X, Y, M) \ + __builtin_ia32_pcmpistriz128(X, Y, 1) +#define __builtin_ia32_pcmpestria128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestria128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestric128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestric128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestrio128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestrio128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestris128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestris128(X, LX, Y, LY, 1) +#define __builtin_ia32_pcmpestriz128(X, LX, Y, LY, M) \ + __builtin_ia32_pcmpestriz128(X, LX, Y, LY, 1) /* tmmintrin.h */ #define __builtin_ia32_palignr128(X, Y, N) __builtin_ia32_palignr128(X, Y, 8) diff --git a/gcc/testsuite/gcc.target/i386/sse-14.c b/gcc/testsuite/gcc.target/i386/sse-14.c index fc93f6de06ab..ff8a9ffa4882 100644 --- a/gcc/testsuite/gcc.target/i386/sse-14.c +++ b/gcc/testsuite/gcc.target/i386/sse-14.c @@ -1,5 +1,5 @@ /* { dg-do compile } */ -/* { dg-options "-O0 -march=k8 -m3dnow -msse4.1 -msse5" } */ +/* { dg-options "-O0 -march=k8 -m3dnow -msse4 -msse5" } */ /* Test that the intrinsics compile without optimization. All of them are defined as inline functions in {,x,e,p,t,s,a,b}mmintrin.h and mm3dnow.h @@ -12,3 +12,98 @@ #include #include #include + +#define _CONCAT(x,y) x ## y + +#define test_1(func, type, op1_type, imm) \ + type _CONCAT(_,func) (op1_type A, int const I) \ + { return func (A, imm); } + +#define test_1x(func, type, op1_type, imm1, imm2) \ + type _CONCAT(_,func) (op1_type A, int const I, int const L) \ + { return func (A, imm1, imm2); } + +#define test_2(func, type, op1_type, op2_type, imm) \ + type _CONCAT(_,func) (op1_type A, op2_type B, int const I) \ + { return func (A, B, imm); } + +#define test_2x(func, type, op1_type, op2_type, imm1, imm2) \ + type _CONCAT(_,func) (op1_type A, op2_type B, int const I, int const L) \ + { return func (A, B, imm1, imm2); } + +#define test_4(func, type, op1_type, op2_type, op3_type, op4_type, imm) \ + type _CONCAT(_,func) (op1_type A, op2_type B, \ + op3_type C, op4_type D, int const I) \ + { return func (A, B, C, D, imm); } + + +/* Following intrinsics require immediate arguments. They + are defined as macros for non-optimized compilations. */ + +/* ammintrin.h */ +test_1x (_mm_extracti_si64, __m128i, __m128i, 1, 1) +test_2x (_mm_inserti_si64, __m128i, __m128i, __m128i, 1, 1) + +/* smmintrin.h */ +test_2 (_mm_blend_epi16, __m128i, __m128i, __m128i, 1) +test_2 (_mm_blend_ps, __m128, __m128, __m128, 1) +test_2 (_mm_blend_pd, __m128d, __m128d, __m128d, 1) +test_2 (_mm_dp_ps, __m128, __m128, __m128, 1) +test_2 (_mm_dp_pd, __m128d, __m128d, __m128d, 1) +test_2 (_mm_insert_ps, __m128, __m128, __m128, 1) +test_1 (_mm_extract_ps, int, __m128, 1) +test_2 (_mm_insert_epi8, __m128i, __m128i, int, 1) +test_2 (_mm_insert_epi32, __m128i, __m128i, int, 1) +#ifdef __x86_64__ +test_2 (_mm_insert_epi64, __m128i, __m128i, long long, 1) +#endif +test_1 (_mm_extract_epi8, int, __m128i, 1) +test_1 (_mm_extract_epi32, int, __m128i, 1) +#ifdef __x86_64__ +test_1 (_mm_extract_epi64, long long, __m128i, 1) +#endif +test_2 (_mm_mpsadbw_epu8, __m128i, __m128i, __m128i, 1) +test_2 (_mm_cmpistrm, __m128i, __m128i, __m128i, 1) +test_2 (_mm_cmpistri, int, __m128i, __m128i, 1) +test_4 (_mm_cmpestrm, __m128i, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestri, int, __m128i, int, __m128i, int, 1) +test_2 (_mm_cmpistra, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistrc, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistro, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistrs, int, __m128i, __m128i, 1) +test_2 (_mm_cmpistrz, int, __m128i, __m128i, 1) +test_4 (_mm_cmpestra, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestrc, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestro, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestrs, int, __m128i, int, __m128i, int, 1) +test_4 (_mm_cmpestrz, int, __m128i, int, __m128i, int, 1) + +/* tmmintrin.h */ +test_2 (_mm_alignr_epi8, __m128i, __m128i, __m128i, 1) +test_2 (_mm_alignr_pi8, __m64, __m64, __m64, 1) + +/* emmintrin.h */ +test_2 (_mm_shuffle_pd, __m128d, __m128d, __m128d, 1) +test_1 (_mm_srli_si128, __m128i, __m128i, 1) +test_1 (_mm_slli_si128, __m128i, __m128i, 1) +test_1 (_mm_extract_epi16, int, __m128i, 1) +test_2 (_mm_insert_epi16, __m128i, __m128i, int, 1) +test_1 (_mm_shufflehi_epi16, __m128i, __m128i, 1) +test_1 (_mm_shufflelo_epi16, __m128i, __m128i, 1) +test_1 (_mm_shuffle_epi32, __m128i, __m128i, 1) + +/* xmmintrin.h */ +test_2 (_mm_shuffle_ps, __m128, __m128, __m128, 1) +test_1 (_mm_extract_pi16, int, __m64, 1) +test_1 (_m_pextrw, int, __m64, 1) +test_2 (_mm_insert_pi16, __m64, __m64, int, 1) +test_2 (_m_pinsrw, __m64, __m64, int, 1) +test_1 (_mm_shuffle_pi16, __m64, __m64, 1) +test_1 (_m_pshufw, __m64, __m64, 1) +test_1 (_mm_prefetch, void, void *, _MM_HINT_NTA) + +/* bmmintrin.h */ +test_1 (_mm_roti_epi8, __m128i, __m128i, 1) +test_1 (_mm_roti_epi16, __m128i, __m128i, 1) +test_1 (_mm_roti_epi32, __m128i, __m128i, 1) +test_1 (_mm_roti_epi64, __m128i, __m128i, 1)