diff --git a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h index 2e71b5fd4..280ebe512 100644 --- a/Eigen/src/Core/products/SelfadjointMatrixMatrix.h +++ b/Eigen/src/Core/products/SelfadjointMatrixMatrix.h @@ -185,11 +185,13 @@ struct ei_symm_pack_rhs count += 1; } - if(half==j2) + if(half==j2 && half > gebp_kernel; + ei_symm_pack_lhs pack_lhs; + ei_gemm_pack_rhs pack_rhs; + ei_gemm_pack_lhs pack_lhs_transposed; for(int k2=0; k2() - (blockB, &rhs(k2,0), rhsStride, alpha, actual_kc, cols); + pack_rhs(blockB, &rhs(k2,0), rhsStride, alpha, actual_kc, cols); // the select lhs's panel has to be split in three different parts: // 1 - the transposed panel above the diagonal block => transposed packed copy @@ -284,8 +288,7 @@ struct ei_product_selfadjoint_matrix() - (blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc); + pack_lhs_transposed(blockA, &lhs(k2, i2), lhsStride, actual_kc, actual_mc); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols); } @@ -293,8 +296,7 @@ struct ei_product_selfadjoint_matrix() - (blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc); + pack_lhs(blockA, &lhs(k2,k2), lhsStride, actual_kc, actual_mc); gebp_kernel(res+k2, resStride, blockA, blockB, actual_mc, actual_kc, cols); } @@ -346,20 +348,20 @@ struct ei_product_selfadjoint_matrix > gebp_kernel; + ei_gemm_pack_lhs pack_lhs; + ei_symm_pack_rhs pack_rhs; for(int k2=0; k2() - (blockB, _rhs, rhsStride, alpha, actual_kc, cols, k2); + pack_rhs(blockB, _rhs, rhsStride, alpha, actual_kc, cols, k2); // => GEPP for(int i2=0; i2() - (blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); + pack_lhs(blockA, &lhs(i2, k2), lhsStride, actual_kc, actual_mc); gebp_kernel(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols); }