bug #1069: fix AVX support on MSVC (use of non portable C-style cast)

2025-03-13 18:37:27 +08:00 · 2015-09-28 10:08:26 +02:00 · 2015-09-28 10:08:26 +02:00 · 75861f6650
commit 75861f6650
parent d16797cfc0
2 changed files with 21 additions and 21 deletions
--- a/Eigen/src/Core/arch/AVX/MathFunctions.h
+++ b/Eigen/src/Core/arch/AVX/MathFunctions.h
@ -38,10 +38,10 @@ psin<Packet8f>(const Packet8f& _x) {
  _EIGEN_DECLARE_CONST_Packet8f(two, 2.0f);
  _EIGEN_DECLARE_CONST_Packet8f(one_over_four, 0.25f);
  _EIGEN_DECLARE_CONST_Packet8f(one_over_pi, 3.183098861837907e-01f);
-  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00);
-  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04);
-  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07);
-  _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00);
+  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_first, -3.140625000000000e+00f);
+  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_second, -9.670257568359375e-04f);
+  _EIGEN_DECLARE_CONST_Packet8f(neg_pi_third, -6.278329571784980e-07f);
+  _EIGEN_DECLARE_CONST_Packet8f(four_over_pi, 1.273239544735163e+00f);

  // Map x from [-Pi/4,3*Pi/4] to z in [-1,3] and subtract the shifted period.
  Packet8f z = pmul(x, p8f_one_over_pi);
@ -55,14 +55,14 @@ psin<Packet8f>(const Packet8f& _x) {
  // is odd.
  Packet8i shift_ints = _mm256_cvtps_epi32(shift);
  Packet8i shift_isodd =
-      (__m256i)_mm256_and_ps((__m256)shift_ints, (__m256)p8i_one);
+      _mm256_castps_si256(_mm256_and_ps(_mm256_castsi256_ps(shift_ints), _mm256_castsi256_ps(p8i_one)));
 #ifdef EIGEN_VECTORIZE_AVX2
  Packet8i sign_flip_mask = _mm256_slli_epi32(shift_isodd, 31);
 #else
  __m128i lo =
-      _mm_slli_epi32(_mm256_extractf128_si256((__m256i)shift_isodd, 0), 31);
+      _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 0), 31);
  __m128i hi =
-      _mm_slli_epi32(_mm256_extractf128_si256((__m256i)shift_isodd, 1), 31);
+      _mm_slli_epi32(_mm256_extractf128_si256(shift_isodd, 1), 31);
  Packet8i sign_flip_mask = _mm256_setr_m128(lo, hi);
 #endif

@ -72,9 +72,9 @@ psin<Packet8f>(const Packet8f& _x) {

  // Evaluate the polynomial for the interval [1,3] in z.
  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_0, 9.999999724233232e-01f);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_2, -3.084242535619928e-01f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_4, 1.584991525700324e-02f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_right_6, -3.188805084631342e-04f);
  Packet8f z_minus_two = psub(z, p8f_two);
  Packet8f z_minus_two2 = pmul(z_minus_two, z_minus_two);
  Packet8f right = pmadd(p8f_coeff_right_6, z_minus_two2, p8f_coeff_right_4);
@ -82,10 +82,10 @@ psin<Packet8f>(const Packet8f& _x) {
  right = pmadd(right, z_minus_two2, p8f_coeff_right_0);

  // Evaluate the polynomial for the interval [-1,1] in z.
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03);
-  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_1, 7.853981525427295e-01f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_3, -8.074536727092352e-02f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_5, 2.489871967827018e-03f);
+  _EIGEN_DECLARE_CONST_Packet8f(coeff_left_7, -3.587725841214251e-05f);
  Packet8f z2 = pmul(z, z);
  Packet8f left = pmadd(p8f_coeff_left_7, z2, p8f_coeff_left_5);
  left = pmadd(left, z2, p8f_coeff_left_3);
@ -98,7 +98,7 @@ psin<Packet8f>(const Packet8f& _x) {
  Packet8f res = _mm256_or_ps(left, right);

  // Flip the sign on the odd intervals and return the result.
-  res = _mm256_xor_ps(res, (__m256)sign_flip_mask);
+  res = _mm256_xor_ps(res, _mm256_castsi256_ps(sign_flip_mask));
  return res;
 }

@ -145,10 +145,10 @@ plog<Packet8f>(const Packet8f& _x) {
 // Extract the shifted exponents (No bitwise shifting in regular AVX, so
 // convert to SSE and do it there).
 #ifdef EIGEN_VECTORIZE_AVX2
-  Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32((__m256i)x, 23));
+  Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_castps_si256(x), 23));
 #else
-  __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256((__m256i)x, 0), 23);
-  __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256((__m256i)x, 1), 23);
+  __m128i lo = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 0), 23);
+  __m128i hi = _mm_srli_epi32(_mm256_extractf128_si256(_mm256_castps_si256(x), 1), 23);
  Packet8f emm0 = _mm256_cvtepi32_ps(_mm256_setr_m128(lo, hi));
 #endif
  Packet8f e = _mm256_sub_ps(emm0, p8f_126f);
@ -348,7 +348,7 @@ pexp<Packet4d>(const Packet4d& _x) {

  // Construct the result 2^n * exp(g) = e * x. The max is used to catch
  // non-finite values in the input.
-  return pmax(pmul(x, Packet4d(e)), _x);
+  return pmax(pmul(x, _mm256_castsi256_pd(e)), _x);
 }

 // Functions for sqrt.
@ -393,7 +393,7 @@ Packet4d psqrt<Packet4d>(const Packet4d& x) {

 template<> EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_UNUSED
 Packet8f prsqrt<Packet8f>(const Packet8f& _x) {
- _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
+  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(inf, 0x7f800000);
  _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(nan, 0x7fc00000);
  _EIGEN_DECLARE_CONST_Packet8f(one_point_five, 1.5f);
  _EIGEN_DECLARE_CONST_Packet8f(minus_half, -0.5f);
--- a/Eigen/src/Core/arch/AVX/PacketMath.h
+++ b/Eigen/src/Core/arch/AVX/PacketMath.h
@ -43,7 +43,7 @@ template<> struct is_arithmetic<__m256d> { enum { value = true }; };
  const Packet4d p4d_##NAME = pset1<Packet4d>(X)

 #define _EIGEN_DECLARE_CONST_Packet8f_FROM_INT(NAME,X) \
-  const Packet8f p8f_##NAME = (__m256)pset1<Packet8i>(X)
+  const Packet8f p8f_##NAME = _mm256_castsi256_ps(pset1<Packet8i>(X))

 #define _EIGEN_DECLARE_CONST_Packet8i(NAME,X) \
  const Packet8i p8i_##NAME = pset1<Packet8i>(X)