mirror of
https://gitlab.com/libeigen/eigen.git
synced 2025-01-06 14:14:46 +08:00
9b411757ab
This change also contains a few minor cleanups: 1. Remove packet op pnot, which is not needed for anything other than pcmp_le_or_nan, which can be done in other ways. 2. Remove the "HasInsert" enum, which is no longer needed since we removed the corresponding packet ops. 3. Add faster pselect op for Packet4i when SSE4.1 is supported. Among other things, this makes the fast transposeInPlace() method available for Matrix<bool>. Run on ************** (72 X 2994 MHz CPUs); 2020-05-09T10:51:02.372347913-07:00 CPU: Intel Skylake Xeon with HyperThreading (36 cores) dL1:32KB dL2:1024KB dL3:24MB Benchmark Time(ns) CPU(ns) Iterations ----------------------------------------------------------------------- BM_TransposeInPlace<float>/4 9.77 9.77 71670320 BM_TransposeInPlace<float>/8 21.9 21.9 31929525 BM_TransposeInPlace<float>/16 66.6 66.6 10000000 BM_TransposeInPlace<float>/32 243 243 2879561 BM_TransposeInPlace<float>/59 844 844 829767 BM_TransposeInPlace<float>/64 933 933 750567 BM_TransposeInPlace<float>/128 3944 3945 177405 BM_TransposeInPlace<float>/256 16853 16853 41457 BM_TransposeInPlace<float>/512 204952 204968 3448 BM_TransposeInPlace<float>/1k 1053889 1053861 664 BM_TransposeInPlace<bool>/4 14.4 14.4 48637301 BM_TransposeInPlace<bool>/8 36.0 36.0 19370222 BM_TransposeInPlace<bool>/16 31.5 31.5 22178902 BM_TransposeInPlace<bool>/32 111 111 6272048 BM_TransposeInPlace<bool>/59 626 626 1000000 BM_TransposeInPlace<bool>/64 428 428 1632689 BM_TransposeInPlace<bool>/128 1677 1677 417377 BM_TransposeInPlace<bool>/256 7126 7126 96264 BM_TransposeInPlace<bool>/512 29021 29024 24165 BM_TransposeInPlace<bool>/1k 116321 116330 6068
779 lines
30 KiB
C++
779 lines
30 KiB
C++
// This file is part of Eigen, a lightweight C++ template library
|
|
// for linear algebra.
|
|
//
|
|
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
|
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
|
//
|
|
// This Source Code Form is subject to the terms of the Mozilla
|
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
#include "packetmath_test_shared.h"
|
|
|
|
template <typename T>
|
|
inline T REF_ADD(const T& a, const T& b) { return a + b;}
|
|
template <typename T>
|
|
inline T REF_SUB(const T& a, const T& b) { return a - b;}
|
|
template <typename T>
|
|
inline T REF_MUL(const T& a, const T& b) { return a * b;}
|
|
template <typename T>
|
|
inline T REF_DIV(const T& a, const T& b) { return a / b;}
|
|
template <typename T>
|
|
inline T REF_ABS_DIFF(const T& a, const T& b) { return a>b ? a - b : b-a;}
|
|
|
|
// Specializations for bool
|
|
template <>
|
|
inline bool REF_ADD(const bool& a, const bool& b) { return a || b;}
|
|
template <>
|
|
inline bool REF_SUB(const bool& a, const bool& b) { return a ^ b;}
|
|
template <>
|
|
inline bool REF_MUL(const bool& a, const bool& b) { return a && b;}
|
|
|
|
|
|
template<typename FromScalar, typename FromPacket, typename ToScalar, typename ToPacket, bool CanCast = false>
|
|
struct test_cast_helper;
|
|
|
|
template<typename FromScalar, typename FromPacket, typename ToScalar, typename ToPacket>
|
|
struct test_cast_helper<FromScalar, FromPacket, ToScalar, ToPacket, false> {
|
|
static void run() {}
|
|
};
|
|
|
|
template<typename FromScalar, typename FromPacket, typename ToScalar, typename ToPacket>
|
|
struct test_cast_helper<FromScalar, FromPacket, ToScalar, ToPacket, true> {
|
|
static void run() {
|
|
static const int PacketSize = internal::unpacket_traits<FromPacket>::size;
|
|
EIGEN_ALIGN_MAX FromScalar data1[PacketSize];
|
|
EIGEN_ALIGN_MAX ToScalar data2[PacketSize];
|
|
EIGEN_ALIGN_MAX ToScalar ref[PacketSize];
|
|
|
|
// Construct a packet of scalars that will not overflow when casting
|
|
for (int i=0; i<PacketSize; ++i) {
|
|
const FromScalar from_scalar = Array<FromScalar,1,1>::Random().value();
|
|
const ToScalar to_scalar = Array<ToScalar,1,1>::Random().value();
|
|
const FromScalar c = sizeof(ToScalar) > sizeof(FromScalar) ? static_cast<FromScalar>(to_scalar) : from_scalar;
|
|
data1[i] = (NumTraits<FromScalar>::IsSigned && !NumTraits<ToScalar>::IsSigned) ? numext::abs(c) : c;
|
|
}
|
|
|
|
for (int i=0; i<PacketSize; ++i)
|
|
ref[i] = static_cast<const ToScalar>(data1[i]);
|
|
internal::pstore(data2, internal::pcast<FromPacket, ToPacket>(internal::pload<FromPacket>(data1)));
|
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::pcast<>");
|
|
}
|
|
};
|
|
|
|
template<typename FromPacket, typename ToScalar>
|
|
void test_cast() {
|
|
typedef typename internal::unpacket_traits<FromPacket>::type FromScalar;
|
|
typedef typename internal::packet_traits<FromScalar> FromPacketTraits;
|
|
typedef typename internal::packet_traits<ToScalar>::type Full;
|
|
typedef typename internal::unpacket_traits<Full>::half Half;
|
|
typedef typename internal::unpacket_traits<typename internal::unpacket_traits<Full>::half>::half Quarter;
|
|
|
|
static const int PacketSize = internal::unpacket_traits<FromPacket>::size;
|
|
static const bool CanCast =
|
|
FromPacketTraits::HasCast &&
|
|
(PacketSize == internal::unpacket_traits<Full>::size ||
|
|
PacketSize == internal::unpacket_traits<Half>::size ||
|
|
PacketSize == internal::unpacket_traits<Quarter>::size);
|
|
|
|
typedef typename internal::conditional<internal::unpacket_traits<Quarter>::size == PacketSize, Quarter,
|
|
typename internal::conditional<internal::unpacket_traits<Half>::size == PacketSize, Half, Full>::type>::type
|
|
ToPacket;
|
|
|
|
test_cast_helper<FromScalar, FromPacket, ToScalar, ToPacket, CanCast>::run();
|
|
}
|
|
|
|
template<typename Scalar,typename Packet>
|
|
void packetmath_boolean_mask_ops()
|
|
{
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
const int size = 2*PacketSize;
|
|
EIGEN_ALIGN_MAX Scalar data1[size];
|
|
EIGEN_ALIGN_MAX Scalar data2[size];
|
|
EIGEN_ALIGN_MAX Scalar ref[size];
|
|
|
|
for (int i=0; i<size; ++i)
|
|
{
|
|
data1[i] = internal::random<Scalar>();
|
|
}
|
|
CHECK_CWISE1(internal::ptrue, internal::ptrue);
|
|
CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
data1[i] = Scalar(i);
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
}
|
|
CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
|
|
}
|
|
|
|
template<>
|
|
void packetmath_boolean_mask_ops<bool, internal::Packet16b>()
|
|
{
|
|
}
|
|
|
|
template<typename Scalar,typename Packet> void packetmath()
|
|
{
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
|
|
|
if (g_first_pass)
|
|
std::cerr << "=== Testing packet of type '" << typeid(Packet).name()
|
|
<< "' and scalar type '" << typeid(Scalar).name()
|
|
<< "' and size '" << PacketSize << "' ===\n" ;
|
|
|
|
const int max_size = PacketSize > 4 ? PacketSize : 4;
|
|
const int size = PacketSize*max_size;
|
|
EIGEN_ALIGN_MAX Scalar data1[size];
|
|
EIGEN_ALIGN_MAX Scalar data2[size];
|
|
EIGEN_ALIGN_MAX Scalar data3[size];
|
|
EIGEN_ALIGN_MAX Scalar ref[size];
|
|
RealScalar refvalue = RealScalar(0);
|
|
for (int i=0; i<size; ++i)
|
|
{
|
|
data1[i] = internal::random<Scalar>()/RealScalar(PacketSize);
|
|
data2[i] = internal::random<Scalar>()/RealScalar(PacketSize);
|
|
refvalue = (std::max)(refvalue, numext::abs(data1[i]));
|
|
}
|
|
|
|
internal::pstore(data2, internal::pload<Packet>(data1));
|
|
VERIFY(test::areApprox(data1, data2, PacketSize) && "aligned load/store");
|
|
|
|
for (int offset=0; offset<PacketSize; ++offset)
|
|
{
|
|
internal::pstore(data2, internal::ploadu<Packet>(data1+offset));
|
|
VERIFY(test::areApprox(data1+offset, data2, PacketSize) && "internal::ploadu");
|
|
}
|
|
|
|
for (int offset=0; offset<PacketSize; ++offset)
|
|
{
|
|
internal::pstoreu(data2+offset, internal::pload<Packet>(data1));
|
|
VERIFY(test::areApprox(data1, data2+offset, PacketSize) && "internal::pstoreu");
|
|
}
|
|
|
|
if (internal::unpacket_traits<Packet>::masked_load_available)
|
|
{
|
|
test::packet_helper<internal::unpacket_traits<Packet>::masked_load_available, Packet> h;
|
|
unsigned long long max_umask = (0x1ull << PacketSize);
|
|
|
|
for (int offset=0; offset<PacketSize; ++offset)
|
|
{
|
|
for (unsigned long long umask=0; umask<max_umask; ++umask)
|
|
{
|
|
h.store(data2, h.load(data1+offset, umask));
|
|
for (int k=0; k<PacketSize; ++k)
|
|
data3[k] = ((umask & ( 0x1ull << k )) >> k) ? data1[k+offset] : Scalar(0);
|
|
VERIFY(test::areApprox(data3, data2, PacketSize) && "internal::ploadu masked");
|
|
}
|
|
}
|
|
}
|
|
|
|
if (internal::unpacket_traits<Packet>::masked_store_available)
|
|
{
|
|
test::packet_helper<internal::unpacket_traits<Packet>::masked_store_available, Packet> h;
|
|
unsigned long long max_umask = (0x1ull << PacketSize);
|
|
|
|
for (int offset=0; offset<PacketSize; ++offset)
|
|
{
|
|
for (unsigned long long umask=0; umask<max_umask; ++umask)
|
|
{
|
|
internal::pstore(data2, internal::pset1<Packet>(Scalar(0)));
|
|
h.store(data2, h.loadu(data1+offset), umask);
|
|
for (int k=0; k<PacketSize; ++k)
|
|
data3[k] = ((umask & ( 0x1ull << k )) >> k) ? data1[k+offset] : Scalar(0);
|
|
VERIFY(test::areApprox(data3, data2, PacketSize) && "internal::pstoreu masked");
|
|
}
|
|
}
|
|
}
|
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasAdd);
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub);
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul);
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasAdd, REF_ADD, internal::padd);
|
|
CHECK_CWISE2_IF(PacketTraits::HasSub, REF_SUB, internal::psub);
|
|
CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
|
|
CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
|
|
|
|
if (PacketTraits::HasNegate)
|
|
CHECK_CWISE1(internal::negate, internal::pnegate);
|
|
CHECK_CWISE1(numext::conj, internal::pconj);
|
|
|
|
for(int offset=0;offset<3;++offset)
|
|
{
|
|
for (int i=0; i<PacketSize; ++i)
|
|
ref[i] = data1[offset];
|
|
internal::pstore(data2, internal::pset1<Packet>(data1[offset]));
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::pset1");
|
|
}
|
|
|
|
{
|
|
for (int i=0; i<PacketSize*4; ++i)
|
|
ref[i] = data1[i/PacketSize];
|
|
Packet A0, A1, A2, A3;
|
|
internal::pbroadcast4<Packet>(data1, A0, A1, A2, A3);
|
|
internal::pstore(data2+0*PacketSize, A0);
|
|
internal::pstore(data2+1*PacketSize, A1);
|
|
internal::pstore(data2+2*PacketSize, A2);
|
|
internal::pstore(data2+3*PacketSize, A3);
|
|
VERIFY(test::areApprox(ref, data2, 4*PacketSize) && "internal::pbroadcast4");
|
|
}
|
|
|
|
{
|
|
for (int i=0; i<PacketSize*2; ++i)
|
|
ref[i] = data1[i/PacketSize];
|
|
Packet A0, A1;
|
|
internal::pbroadcast2<Packet>(data1, A0, A1);
|
|
internal::pstore(data2+0*PacketSize, A0);
|
|
internal::pstore(data2+1*PacketSize, A1);
|
|
VERIFY(test::areApprox(ref, data2, 2*PacketSize) && "internal::pbroadcast2");
|
|
}
|
|
|
|
VERIFY(internal::isApprox(data1[0], internal::pfirst(internal::pload<Packet>(data1))) && "internal::pfirst");
|
|
|
|
if(PacketSize>1)
|
|
{
|
|
// apply different offsets to check that ploaddup is robust to unaligned inputs
|
|
for(int offset=0;offset<4;++offset)
|
|
{
|
|
for(int i=0;i<PacketSize/2;++i)
|
|
ref[2*i+0] = ref[2*i+1] = data1[offset+i];
|
|
internal::pstore(data2,internal::ploaddup<Packet>(data1+offset));
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "ploaddup");
|
|
}
|
|
}
|
|
|
|
if(PacketSize>2)
|
|
{
|
|
// apply different offsets to check that ploadquad is robust to unaligned inputs
|
|
for(int offset=0;offset<4;++offset)
|
|
{
|
|
for(int i=0;i<PacketSize/4;++i)
|
|
ref[4*i+0] = ref[4*i+1] = ref[4*i+2] = ref[4*i+3] = data1[offset+i];
|
|
internal::pstore(data2,internal::ploadquad<Packet>(data1+offset));
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "ploadquad");
|
|
}
|
|
}
|
|
|
|
ref[0] = Scalar(0);
|
|
for (int i=0; i<PacketSize; ++i)
|
|
ref[0] += data1[i];
|
|
VERIFY(test::isApproxAbs(ref[0], internal::predux(internal::pload<Packet>(data1)), refvalue) && "internal::predux");
|
|
|
|
if(PacketSize==8 && internal::unpacket_traits<typename internal::unpacket_traits<Packet>::half>::size ==4) // so far, predux_half_downto4 is only required in such a case
|
|
{
|
|
int HalfPacketSize = PacketSize>4 ? PacketSize/2 : PacketSize;
|
|
for (int i=0; i<HalfPacketSize; ++i)
|
|
ref[i] = Scalar(0);
|
|
for (int i=0; i<PacketSize; ++i)
|
|
ref[i%HalfPacketSize] += data1[i];
|
|
internal::pstore(data2, internal::predux_half_dowto4(internal::pload<Packet>(data1)));
|
|
VERIFY(test::areApprox(ref, data2, HalfPacketSize) && "internal::predux_half_dowto4");
|
|
}
|
|
|
|
ref[0] = Scalar(1);
|
|
for (int i=0; i<PacketSize; ++i)
|
|
ref[0] = REF_MUL(ref[0], data1[i]);
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul");
|
|
|
|
for (int i=0; i<PacketSize; ++i)
|
|
ref[i] = data1[PacketSize-i-1];
|
|
internal::pstore(data2, internal::preverse(internal::pload<Packet>(data1)));
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::preverse");
|
|
|
|
internal::PacketBlock<Packet> kernel;
|
|
for (int i=0; i<PacketSize; ++i) {
|
|
kernel.packet[i] = internal::pload<Packet>(data1+i*PacketSize);
|
|
}
|
|
ptranspose(kernel);
|
|
for (int i=0; i<PacketSize; ++i) {
|
|
internal::pstore(data2, kernel.packet[i]);
|
|
for (int j = 0; j < PacketSize; ++j) {
|
|
VERIFY(test::isApproxAbs(data2[j], data1[i+j*PacketSize], refvalue) && "ptranspose");
|
|
}
|
|
}
|
|
|
|
|
|
if (PacketTraits::HasBlend) {
|
|
Packet thenPacket = internal::pload<Packet>(data1);
|
|
Packet elsePacket = internal::pload<Packet>(data2);
|
|
EIGEN_ALIGN_MAX internal::Selector<PacketSize> selector;
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
selector.select[i] = i;
|
|
}
|
|
|
|
Packet blend = internal::pblend(selector, thenPacket, elsePacket);
|
|
EIGEN_ALIGN_MAX Scalar result[size];
|
|
internal::pstore(result, blend);
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
VERIFY(test::isApproxAbs(result[i], (selector.select[i] ? data1[i] : data2[i]), refvalue));
|
|
}
|
|
}
|
|
|
|
{
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
// "if" mask
|
|
unsigned char v = internal::random<bool>() ? 0xff : 0;
|
|
char* bytes = (char*)(data1+i);
|
|
for(int k=0; k<int(sizeof(Scalar)); ++k) {
|
|
bytes[k] = v;
|
|
}
|
|
// "then" packet
|
|
data1[i+PacketSize] = internal::random<Scalar>();
|
|
// "else" packet
|
|
data1[i+2*PacketSize] = internal::random<Scalar>();
|
|
}
|
|
CHECK_CWISE3_IF(true, internal::pselect, internal::pselect);
|
|
}
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
|
|
|
|
for (int i=0; i<size; ++i)
|
|
{
|
|
data1[i] = internal::random<Scalar>();
|
|
}
|
|
CHECK_CWISE1(internal::pzero, internal::pzero);
|
|
CHECK_CWISE2_IF(true, internal::por, internal::por);
|
|
CHECK_CWISE2_IF(true, internal::pxor, internal::pxor);
|
|
CHECK_CWISE2_IF(true, internal::pand, internal::pand);
|
|
|
|
packetmath_boolean_mask_ops<Scalar, Packet>();
|
|
}
|
|
|
|
|
|
|
|
template<typename Scalar,typename Packet> void packetmath_real()
|
|
{
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
const int size = PacketSize*4;
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize*4];
|
|
EIGEN_ALIGN_MAX Scalar data2[PacketSize*4];
|
|
EIGEN_ALIGN_MAX Scalar ref[PacketSize*4];
|
|
|
|
for (int i=0; i<size; ++i)
|
|
{
|
|
data1[i] = internal::random<Scalar>(0,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
|
|
data2[i] = internal::random<Scalar>(0,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
|
|
}
|
|
|
|
if(internal::random<float>(0,1)<0.1f)
|
|
data1[internal::random<int>(0, PacketSize)] = 0;
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
|
|
CHECK_CWISE1_IF(PacketTraits::HasRsqrt, Scalar(1)/std::sqrt, internal::prsqrt);
|
|
|
|
for (int i=0; i<size; ++i)
|
|
{
|
|
data1[i] = internal::random<Scalar>(-1,1) * std::pow(Scalar(10), internal::random<Scalar>(-3,3));
|
|
data2[i] = internal::random<Scalar>(-1,1) * std::pow(Scalar(10), internal::random<Scalar>(-3,3));
|
|
}
|
|
CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin);
|
|
CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos);
|
|
CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan);
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
|
|
CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
|
|
CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
|
|
CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print);
|
|
|
|
// See bug 1785.
|
|
for (int i=0; i<size; ++i)
|
|
{
|
|
data1[i] = -1.5 + i;
|
|
data2[i] = -1.5 + i;
|
|
}
|
|
CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
|
|
CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print);
|
|
|
|
for (int i=0; i<size; ++i)
|
|
{
|
|
data1[i] = internal::random<Scalar>(-1,1);
|
|
data2[i] = internal::random<Scalar>(-1,1);
|
|
}
|
|
CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin);
|
|
CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos);
|
|
|
|
for (int i=0; i<size; ++i)
|
|
{
|
|
data1[i] = internal::random<Scalar>(-87,88);
|
|
data2[i] = internal::random<Scalar>(-87,88);
|
|
}
|
|
CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp);
|
|
for (int i=0; i<size; ++i)
|
|
{
|
|
data1[i] = internal::random<Scalar>(-1,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
|
|
data2[i] = internal::random<Scalar>(-1,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
|
|
}
|
|
data1[0] = 1e-20;
|
|
CHECK_CWISE1_IF(PacketTraits::HasTanh, std::tanh, internal::ptanh);
|
|
if(PacketTraits::HasExp && PacketSize>=2)
|
|
{
|
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
|
data1[1] = std::numeric_limits<Scalar>::epsilon();
|
|
test::packet_helper<PacketTraits::HasExp,Packet> h;
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::epsilon()), data2[1]);
|
|
|
|
data1[0] = -std::numeric_limits<Scalar>::epsilon();
|
|
data1[1] = 0;
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
|
VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::epsilon()), data2[0]);
|
|
VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]);
|
|
|
|
data1[0] = (std::numeric_limits<Scalar>::min)();
|
|
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
|
VERIFY_IS_EQUAL(std::exp((std::numeric_limits<Scalar>::min)()), data2[0]);
|
|
VERIFY_IS_EQUAL(std::exp(-(std::numeric_limits<Scalar>::min)()), data2[1]);
|
|
|
|
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
|
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
|
VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
|
VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::denorm_min()), data2[1]);
|
|
}
|
|
|
|
if (PacketTraits::HasTanh) {
|
|
// NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details.
|
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
|
test::packet_helper<internal::packet_traits<Scalar>::HasTanh,Packet> h;
|
|
h.store(data2, internal::ptanh(h.load(data1)));
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
}
|
|
|
|
#if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L)
|
|
data1[0] = std::numeric_limits<Scalar>::infinity();
|
|
data1[1] = Scalar(-1);
|
|
CHECK_CWISE1_IF(PacketTraits::HasLog1p, std::log1p, internal::plog1p);
|
|
data1[0] = std::numeric_limits<Scalar>::infinity();
|
|
data1[1] = -std::numeric_limits<Scalar>::infinity();
|
|
CHECK_CWISE1_IF(PacketTraits::HasExpm1, std::expm1, internal::pexpm1);
|
|
#endif
|
|
|
|
if(PacketSize>=2)
|
|
{
|
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
|
data1[1] = std::numeric_limits<Scalar>::epsilon();
|
|
if(PacketTraits::HasLog)
|
|
{
|
|
test::packet_helper<PacketTraits::HasLog,Packet> h;
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::epsilon()), data2[1]);
|
|
|
|
data1[0] = -std::numeric_limits<Scalar>::epsilon();
|
|
data1[1] = 0;
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]);
|
|
|
|
data1[0] = (std::numeric_limits<Scalar>::min)();
|
|
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
VERIFY_IS_EQUAL(std::log((std::numeric_limits<Scalar>::min)()), data2[0]);
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
|
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
data1[0] = Scalar(-1.0f);
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
data1[0] = std::numeric_limits<Scalar>::infinity();
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
VERIFY((numext::isinf)(data2[0]));
|
|
}
|
|
if(PacketTraits::HasLog1p) {
|
|
test::packet_helper<PacketTraits::HasLog1p,Packet> h;
|
|
data1[0] = Scalar(-2);
|
|
data1[1] = -std::numeric_limits<Scalar>::infinity();
|
|
h.store(data2, internal::plog1p(h.load(data1)));
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
}
|
|
if(PacketTraits::HasSqrt)
|
|
{
|
|
test::packet_helper<PacketTraits::HasSqrt,Packet> h;
|
|
data1[0] = Scalar(-1.0f);
|
|
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
|
h.store(data2, internal::psqrt(h.load(data1)));
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
}
|
|
if(PacketTraits::HasCos)
|
|
{
|
|
test::packet_helper<PacketTraits::HasCos,Packet> h;
|
|
for(Scalar k = 1; k<Scalar(10000)/std::numeric_limits<Scalar>::epsilon(); k*=2)
|
|
{
|
|
for(int k1=0;k1<=1; ++k1)
|
|
{
|
|
data1[0] = (2*k+k1 )*Scalar(EIGEN_PI)/2 * internal::random<Scalar>(0.8,1.2);
|
|
data1[1] = (2*k+2+k1)*Scalar(EIGEN_PI)/2 * internal::random<Scalar>(0.8,1.2);
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
h.store(data2+PacketSize, internal::psin(h.load(data1)));
|
|
VERIFY(data2[0]<=Scalar(1.) && data2[0]>=Scalar(-1.));
|
|
VERIFY(data2[1]<=Scalar(1.) && data2[1]>=Scalar(-1.));
|
|
VERIFY(data2[PacketSize+0]<=Scalar(1.) && data2[PacketSize+0]>=Scalar(-1.));
|
|
VERIFY(data2[PacketSize+1]<=Scalar(1.) && data2[PacketSize+1]>=Scalar(-1.));
|
|
|
|
VERIFY_IS_APPROX(numext::abs2(data2[0])+numext::abs2(data2[PacketSize+0]), Scalar(1));
|
|
VERIFY_IS_APPROX(numext::abs2(data2[1])+numext::abs2(data2[PacketSize+1]), Scalar(1));
|
|
}
|
|
}
|
|
|
|
data1[0] = std::numeric_limits<Scalar>::infinity();
|
|
data1[1] = -std::numeric_limits<Scalar>::infinity();
|
|
h.store(data2, internal::psin(h.load(data1)));
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
|
h.store(data2, internal::psin(h.load(data1)));
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
data1[0] = -Scalar(0.);
|
|
h.store(data2, internal::psin(h.load(data1)));
|
|
VERIFY( internal::biteq(data2[0], data1[0]) );
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
VERIFY_IS_EQUAL(data2[0], Scalar(1));
|
|
}
|
|
}
|
|
}
|
|
|
|
template<typename Scalar,typename Packet> void packetmath_notcomplex()
|
|
{
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize*4];
|
|
EIGEN_ALIGN_MAX Scalar data2[PacketSize*4];
|
|
EIGEN_ALIGN_MAX Scalar ref[PacketSize*4];
|
|
|
|
Array<Scalar,Dynamic,1>::Map(data1, PacketSize*4).setRandom();
|
|
|
|
if (PacketTraits::HasCast) {
|
|
test_cast<Packet, float>();
|
|
test_cast<Packet, double>();
|
|
test_cast<Packet, int8_t>();
|
|
test_cast<Packet, uint8_t>();
|
|
test_cast<Packet, int16_t>();
|
|
test_cast<Packet, uint16_t>();
|
|
test_cast<Packet, int32_t>();
|
|
test_cast<Packet, uint32_t>();
|
|
test_cast<Packet, int64_t>();
|
|
test_cast<Packet, uint64_t>();
|
|
}
|
|
|
|
ref[0] = data1[0];
|
|
for (int i=0; i<PacketSize; ++i)
|
|
ref[0] = (std::min)(ref[0],data1[i]);
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload<Packet>(data1))) && "internal::predux_min");
|
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin);
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax);
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin);
|
|
CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax);
|
|
CHECK_CWISE1(numext::abs, internal::pabs);
|
|
CHECK_CWISE2_IF(PacketTraits::HasAbsDiff, REF_ABS_DIFF, internal::pabsdiff);
|
|
|
|
ref[0] = data1[0];
|
|
for (int i=0; i<PacketSize; ++i)
|
|
ref[0] = (std::max)(ref[0],data1[i]);
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_max(internal::pload<Packet>(data1))) && "internal::predux_max");
|
|
|
|
for (int i=0; i<PacketSize; ++i)
|
|
ref[i] = data1[0]+Scalar(i);
|
|
internal::pstore(data2, internal::plset<Packet>(data1[0]));
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::plset");
|
|
|
|
{
|
|
unsigned char* data1_bits = reinterpret_cast<unsigned char*>(data1);
|
|
// predux_all - not needed yet
|
|
// for (unsigned int i=0; i<PacketSize*sizeof(Scalar); ++i) data1_bits[i] = 0xff;
|
|
// VERIFY(internal::predux_all(internal::pload<Packet>(data1)) && "internal::predux_all(1111)");
|
|
// for(int k=0; k<PacketSize; ++k)
|
|
// {
|
|
// for (unsigned int i=0; i<sizeof(Scalar); ++i) data1_bits[k*sizeof(Scalar)+i] = 0x0;
|
|
// VERIFY( (!internal::predux_all(internal::pload<Packet>(data1))) && "internal::predux_all(0101)");
|
|
// for (unsigned int i=0; i<sizeof(Scalar); ++i) data1_bits[k*sizeof(Scalar)+i] = 0xff;
|
|
// }
|
|
|
|
// predux_any
|
|
for (unsigned int i=0; i<PacketSize*sizeof(Scalar); ++i) data1_bits[i] = 0x0;
|
|
VERIFY( (!internal::predux_any(internal::pload<Packet>(data1))) && "internal::predux_any(0000)");
|
|
for(int k=0; k<PacketSize; ++k)
|
|
{
|
|
for (unsigned int i=0; i<sizeof(Scalar); ++i) data1_bits[k*sizeof(Scalar)+i] = 0xff;
|
|
VERIFY( internal::predux_any(internal::pload<Packet>(data1)) && "internal::predux_any(0101)");
|
|
for (unsigned int i=0; i<sizeof(Scalar); ++i) data1_bits[k*sizeof(Scalar)+i] = 0x00;
|
|
}
|
|
}
|
|
}
|
|
|
|
template<typename Scalar,typename Packet,bool ConjLhs,bool ConjRhs> void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval)
|
|
{
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
internal::conj_if<ConjLhs> cj0;
|
|
internal::conj_if<ConjRhs> cj1;
|
|
internal::conj_helper<Scalar,Scalar,ConjLhs,ConjRhs> cj;
|
|
internal::conj_helper<Packet,Packet,ConjLhs,ConjRhs> pcj;
|
|
|
|
for(int i=0;i<PacketSize;++i)
|
|
{
|
|
ref[i] = cj0(data1[i]) * cj1(data2[i]);
|
|
VERIFY(internal::isApprox(ref[i], cj.pmul(data1[i],data2[i])) && "conj_helper pmul");
|
|
}
|
|
internal::pstore(pval,pcj.pmul(internal::pload<Packet>(data1),internal::pload<Packet>(data2)));
|
|
VERIFY(test::areApprox(ref, pval, PacketSize) && "conj_helper pmul");
|
|
|
|
for(int i=0;i<PacketSize;++i)
|
|
{
|
|
Scalar tmp = ref[i];
|
|
ref[i] += cj0(data1[i]) * cj1(data2[i]);
|
|
VERIFY(internal::isApprox(ref[i], cj.pmadd(data1[i],data2[i],tmp)) && "conj_helper pmadd");
|
|
}
|
|
internal::pstore(pval,pcj.pmadd(internal::pload<Packet>(data1),internal::pload<Packet>(data2),internal::pload<Packet>(pval)));
|
|
VERIFY(test::areApprox(ref, pval, PacketSize) && "conj_helper pmadd");
|
|
}
|
|
|
|
template<typename Scalar,typename Packet> void packetmath_complex()
|
|
{
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
const int size = PacketSize*4;
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize*4];
|
|
EIGEN_ALIGN_MAX Scalar data2[PacketSize*4];
|
|
EIGEN_ALIGN_MAX Scalar ref[PacketSize*4];
|
|
EIGEN_ALIGN_MAX Scalar pval[PacketSize*4];
|
|
|
|
for (int i=0; i<size; ++i)
|
|
{
|
|
data1[i] = internal::random<Scalar>() * Scalar(1e2);
|
|
data2[i] = internal::random<Scalar>() * Scalar(1e2);
|
|
}
|
|
|
|
test_conj_helper<Scalar,Packet,false,false> (data1,data2,ref,pval);
|
|
test_conj_helper<Scalar,Packet,false,true> (data1,data2,ref,pval);
|
|
test_conj_helper<Scalar,Packet,true,false> (data1,data2,ref,pval);
|
|
test_conj_helper<Scalar,Packet,true,true> (data1,data2,ref,pval);
|
|
|
|
{
|
|
for(int i=0;i<PacketSize;++i)
|
|
ref[i] = Scalar(std::imag(data1[i]),std::real(data1[i]));
|
|
internal::pstore(pval,internal::pcplxflip(internal::pload<Packet>(data1)));
|
|
VERIFY(test::areApprox(ref, pval, PacketSize) && "pcplxflip");
|
|
}
|
|
}
|
|
|
|
template<typename Scalar,typename Packet> void packetmath_scatter_gather()
|
|
{
|
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize];
|
|
RealScalar refvalue = 0;
|
|
for (int i=0; i<PacketSize; ++i) {
|
|
data1[i] = internal::random<Scalar>()/RealScalar(PacketSize);
|
|
}
|
|
|
|
int stride = internal::random<int>(1,20);
|
|
|
|
EIGEN_ALIGN_MAX Scalar buffer[PacketSize*20];
|
|
memset(buffer, 0, 20*PacketSize*sizeof(Scalar));
|
|
Packet packet = internal::pload<Packet>(data1);
|
|
internal::pscatter<Scalar, Packet>(buffer, packet, stride);
|
|
|
|
for (int i = 0; i < PacketSize*20; ++i) {
|
|
if ((i%stride) == 0 && i<stride*PacketSize) {
|
|
VERIFY(
|
|
test::isApproxAbs(buffer[i], data1[i/stride], refvalue) && "pscatter");
|
|
} else {
|
|
VERIFY(
|
|
test::isApproxAbs(buffer[i], Scalar(0), refvalue) && "pscatter");
|
|
}
|
|
}
|
|
|
|
for (int i=0; i<PacketSize*7; ++i) {
|
|
buffer[i] = internal::random<Scalar>()/RealScalar(PacketSize);
|
|
}
|
|
packet = internal::pgather<Scalar, Packet>(buffer, 7);
|
|
internal::pstore(data1, packet);
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
VERIFY(test::isApproxAbs(data1[i], buffer[i*7], refvalue) && "pgather");
|
|
}
|
|
}
|
|
|
|
namespace Eigen {
|
|
namespace test {
|
|
|
|
template<typename Scalar,typename PacketType>
|
|
struct runall<Scalar,PacketType,false,false> { // i.e. float or double
|
|
static void run() {
|
|
packetmath<Scalar,PacketType>();
|
|
packetmath_scatter_gather<Scalar,PacketType>();
|
|
packetmath_notcomplex<Scalar,PacketType>();
|
|
packetmath_real<Scalar,PacketType>();
|
|
}
|
|
};
|
|
|
|
template<typename Scalar,typename PacketType>
|
|
struct runall<Scalar,PacketType,false,true> { // i.e. int
|
|
static void run() {
|
|
packetmath<Scalar,PacketType>();
|
|
packetmath_scatter_gather<Scalar,PacketType>();
|
|
packetmath_notcomplex<Scalar,PacketType>();
|
|
}
|
|
};
|
|
|
|
template<typename Scalar,typename PacketType>
|
|
struct runall<Scalar,PacketType,true,false> { // i.e. complex
|
|
static void run() {
|
|
packetmath<Scalar,PacketType>();
|
|
packetmath_scatter_gather<Scalar,PacketType>();
|
|
packetmath_complex<Scalar,PacketType>();
|
|
}
|
|
};
|
|
|
|
}
|
|
}
|
|
|
|
|
|
EIGEN_DECLARE_TEST(packetmath)
|
|
{
|
|
g_first_pass = true;
|
|
for(int i = 0; i < g_repeat; i++) {
|
|
|
|
CALL_SUBTEST_1( test::runner<float>::run() );
|
|
CALL_SUBTEST_2( test::runner<double>::run() );
|
|
CALL_SUBTEST_3( test::runner<int8_t>::run() );
|
|
CALL_SUBTEST_4( test::runner<uint8_t>::run() );
|
|
CALL_SUBTEST_5( test::runner<int16_t>::run() );
|
|
CALL_SUBTEST_6( test::runner<uint16_t>::run() );
|
|
CALL_SUBTEST_7( test::runner<int32_t>::run() );
|
|
CALL_SUBTEST_8( test::runner<uint32_t>::run() );
|
|
CALL_SUBTEST_9( test::runner<int64_t>::run() );
|
|
CALL_SUBTEST_10( test::runner<uint64_t>::run() );
|
|
CALL_SUBTEST_11( test::runner<std::complex<float> >::run() );
|
|
CALL_SUBTEST_12( test::runner<std::complex<double> >::run() );
|
|
CALL_SUBTEST_13(( packetmath<half,internal::packet_traits<half>::type>() ));
|
|
#ifdef EIGEN_PACKET_MATH_SSE_H
|
|
CALL_SUBTEST_14(( packetmath<bool,internal::packet_traits<bool>::type>() ));
|
|
#endif
|
|
g_first_pass = false;
|
|
}
|
|
}
|