2008-08-21 04:08:38 +08:00
|
|
|
// This file is part of Eigen, a lightweight C++ template library
|
2009-05-23 02:25:33 +08:00
|
|
|
// for linear algebra.
|
2008-08-21 04:08:38 +08:00
|
|
|
//
|
2010-06-25 05:21:58 +08:00
|
|
|
// Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
|
2008-11-24 21:40:43 +08:00
|
|
|
// Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
|
2008-08-21 04:08:38 +08:00
|
|
|
//
|
2012-07-14 02:42:47 +08:00
|
|
|
// This Source Code Form is subject to the terms of the Mozilla
|
|
|
|
// Public License v. 2.0. If a copy of the MPL was not distributed
|
|
|
|
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
2008-08-21 04:08:38 +08:00
|
|
|
|
2020-01-11 18:31:21 +08:00
|
|
|
#include "packetmath_test_shared.h"
|
2020-06-26 05:31:16 +08:00
|
|
|
#include "random_without_cast_overflow.h"
|
2019-06-21 02:47:49 +08:00
|
|
|
|
2020-05-12 04:23:31 +08:00
|
|
|
template <typename T>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline T REF_ADD(const T& a, const T& b) {
|
|
|
|
return a + b;
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
template <typename T>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline T REF_SUB(const T& a, const T& b) {
|
|
|
|
return a - b;
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
template <typename T>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline T REF_MUL(const T& a, const T& b) {
|
|
|
|
return a * b;
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
template <typename T>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline T REF_DIV(const T& a, const T& b) {
|
|
|
|
return a / b;
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
template <typename T>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline T REF_ABS_DIFF(const T& a, const T& b) {
|
|
|
|
return a > b ? a - b : b - a;
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
|
2020-05-20 07:21:56 +08:00
|
|
|
// Specializations for bool.
|
2020-05-12 04:23:31 +08:00
|
|
|
template <>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline bool REF_ADD(const bool& a, const bool& b) {
|
|
|
|
return a || b;
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
template <>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline bool REF_SUB(const bool& a, const bool& b) {
|
|
|
|
return a ^ b;
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
template <>
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
inline bool REF_MUL(const bool& a, const bool& b) {
|
|
|
|
return a && b;
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
|
2020-05-29 01:41:28 +08:00
|
|
|
// Uses pcast to cast from one array to another.
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
|
|
|
|
struct pcast_array;
|
|
|
|
|
|
|
|
template <typename SrcPacket, typename TgtPacket, int TgtCoeffRatio>
|
|
|
|
struct pcast_array<SrcPacket, TgtPacket, 1, TgtCoeffRatio> {
|
|
|
|
typedef typename internal::unpacket_traits<SrcPacket>::type SrcScalar;
|
|
|
|
typedef typename internal::unpacket_traits<TgtPacket>::type TgtScalar;
|
|
|
|
static void cast(const SrcScalar* src, size_t size, TgtScalar* dst) {
|
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
2020-05-29 01:41:28 +08:00
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
size_t i;
|
|
|
|
for (i = 0; i < size && i + SrcPacketSize <= size; i += TgtPacketSize) {
|
|
|
|
internal::pstoreu(dst + i, internal::pcast<SrcPacket, TgtPacket>(internal::ploadu<SrcPacket>(src + i)));
|
|
|
|
}
|
|
|
|
// Leftovers that cannot be loaded into a packet.
|
|
|
|
for (; i < size; ++i) {
|
|
|
|
dst[i] = static_cast<TgtScalar>(src[i]);
|
2020-05-29 01:41:28 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket>
|
|
|
|
struct pcast_array<SrcPacket, TgtPacket, 2, 1> {
|
|
|
|
static void cast(const typename internal::unpacket_traits<SrcPacket>::type* src, size_t size,
|
|
|
|
typename internal::unpacket_traits<TgtPacket>::type* dst) {
|
2020-05-29 01:41:28 +08:00
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (size_t i = 0; i < size; i += TgtPacketSize) {
|
|
|
|
SrcPacket a = internal::ploadu<SrcPacket>(src + i);
|
|
|
|
SrcPacket b = internal::ploadu<SrcPacket>(src + i + SrcPacketSize);
|
|
|
|
internal::pstoreu(dst + i, internal::pcast<SrcPacket, TgtPacket>(a, b));
|
2020-05-29 01:41:28 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket>
|
|
|
|
struct pcast_array<SrcPacket, TgtPacket, 4, 1> {
|
|
|
|
static void cast(const typename internal::unpacket_traits<SrcPacket>::type* src, size_t size,
|
|
|
|
typename internal::unpacket_traits<TgtPacket>::type* dst) {
|
2020-05-29 01:41:28 +08:00
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (size_t i = 0; i < size; i += TgtPacketSize) {
|
|
|
|
SrcPacket a = internal::ploadu<SrcPacket>(src + i);
|
|
|
|
SrcPacket b = internal::ploadu<SrcPacket>(src + i + SrcPacketSize);
|
|
|
|
SrcPacket c = internal::ploadu<SrcPacket>(src + i + 2 * SrcPacketSize);
|
|
|
|
SrcPacket d = internal::ploadu<SrcPacket>(src + i + 3 * SrcPacketSize);
|
|
|
|
internal::pstoreu(dst + i, internal::pcast<SrcPacket, TgtPacket>(a, b, c, d));
|
2020-05-29 01:41:28 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket>
|
|
|
|
struct pcast_array<SrcPacket, TgtPacket, 8, 1> {
|
|
|
|
static void cast(const typename internal::unpacket_traits<SrcPacket>::type* src, size_t size,
|
|
|
|
typename internal::unpacket_traits<TgtPacket>::type* dst) {
|
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
|
|
|
for (size_t i = 0; i < size; i += TgtPacketSize) {
|
|
|
|
SrcPacket a = internal::ploadu<SrcPacket>(src + i);
|
|
|
|
SrcPacket b = internal::ploadu<SrcPacket>(src + i + SrcPacketSize);
|
|
|
|
SrcPacket c = internal::ploadu<SrcPacket>(src + i + 2 * SrcPacketSize);
|
|
|
|
SrcPacket d = internal::ploadu<SrcPacket>(src + i + 3 * SrcPacketSize);
|
|
|
|
SrcPacket e = internal::ploadu<SrcPacket>(src + i + 4 * SrcPacketSize);
|
|
|
|
SrcPacket f = internal::ploadu<SrcPacket>(src + i + 5 * SrcPacketSize);
|
|
|
|
SrcPacket g = internal::ploadu<SrcPacket>(src + i + 6 * SrcPacketSize);
|
|
|
|
SrcPacket h = internal::ploadu<SrcPacket>(src + i + 7 * SrcPacketSize);
|
|
|
|
internal::pstoreu(dst + i, internal::pcast<SrcPacket, TgtPacket>(a, b, c, d, e, f, g, h));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
template <typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio, bool CanCast = false>
|
2020-03-27 04:18:19 +08:00
|
|
|
struct test_cast_helper;
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
|
2020-05-29 01:41:28 +08:00
|
|
|
struct test_cast_helper<SrcPacket, TgtPacket, SrcCoeffRatio, TgtCoeffRatio, false> {
|
2020-03-27 04:18:19 +08:00
|
|
|
static void run() {}
|
|
|
|
};
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket, int SrcCoeffRatio, int TgtCoeffRatio>
|
2020-05-29 01:41:28 +08:00
|
|
|
struct test_cast_helper<SrcPacket, TgtPacket, SrcCoeffRatio, TgtCoeffRatio, true> {
|
2020-03-27 04:18:19 +08:00
|
|
|
static void run() {
|
2020-05-29 01:41:28 +08:00
|
|
|
typedef typename internal::unpacket_traits<SrcPacket>::type SrcScalar;
|
|
|
|
typedef typename internal::unpacket_traits<TgtPacket>::type TgtScalar;
|
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
static const int BlockSize = SrcPacketSize * SrcCoeffRatio;
|
|
|
|
eigen_assert(BlockSize == TgtPacketSize * TgtCoeffRatio && "Packet sizes and cast ratios are mismatched.");
|
2020-05-29 01:41:28 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
static const int DataSize = 10 * BlockSize;
|
2020-05-29 01:41:28 +08:00
|
|
|
EIGEN_ALIGN_MAX SrcScalar data1[DataSize];
|
|
|
|
EIGEN_ALIGN_MAX TgtScalar data2[DataSize];
|
|
|
|
EIGEN_ALIGN_MAX TgtScalar ref[DataSize];
|
2020-03-27 04:18:19 +08:00
|
|
|
|
|
|
|
// Construct a packet of scalars that will not overflow when casting
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < DataSize; ++i) {
|
2020-06-26 05:31:16 +08:00
|
|
|
data1[i] = internal::random_without_cast_overflow<SrcScalar, TgtScalar>::value();
|
2020-03-27 04:18:19 +08:00
|
|
|
}
|
|
|
|
|
2020-06-26 05:31:16 +08:00
|
|
|
for (int i = 0; i < DataSize; ++i) {
|
|
|
|
ref[i] = static_cast<const TgtScalar>(data1[i]);
|
|
|
|
}
|
2020-03-27 04:18:19 +08:00
|
|
|
|
2020-05-29 01:41:28 +08:00
|
|
|
pcast_array<SrcPacket, TgtPacket, SrcCoeffRatio, TgtCoeffRatio>::cast(data1, DataSize, data2);
|
|
|
|
|
|
|
|
VERIFY(test::areApprox(ref, data2, DataSize) && "internal::pcast<>");
|
2020-03-27 04:18:19 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtPacket>
|
|
|
|
struct test_cast {
|
|
|
|
static void run() {
|
|
|
|
typedef typename internal::unpacket_traits<SrcPacket>::type SrcScalar;
|
|
|
|
typedef typename internal::unpacket_traits<TgtPacket>::type TgtScalar;
|
|
|
|
typedef typename internal::type_casting_traits<SrcScalar, TgtScalar> TypeCastingTraits;
|
|
|
|
static const int SrcCoeffRatio = TypeCastingTraits::SrcCoeffRatio;
|
|
|
|
static const int TgtCoeffRatio = TypeCastingTraits::TgtCoeffRatio;
|
|
|
|
static const int SrcPacketSize = internal::unpacket_traits<SrcPacket>::size;
|
|
|
|
static const int TgtPacketSize = internal::unpacket_traits<TgtPacket>::size;
|
|
|
|
static const bool HasCast =
|
|
|
|
internal::unpacket_traits<SrcPacket>::vectorizable && internal::unpacket_traits<TgtPacket>::vectorizable &&
|
|
|
|
TypeCastingTraits::VectorizedCast && (SrcPacketSize * SrcCoeffRatio == TgtPacketSize * TgtCoeffRatio);
|
|
|
|
test_cast_helper<SrcPacket, TgtPacket, SrcCoeffRatio, TgtCoeffRatio, HasCast>::run();
|
|
|
|
}
|
|
|
|
};
|
2020-05-29 01:41:28 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtScalar,
|
|
|
|
typename TgtPacket = typename internal::packet_traits<TgtScalar>::type,
|
|
|
|
bool Vectorized = internal::packet_traits<TgtScalar>::Vectorizable,
|
|
|
|
bool HasHalf = !internal::is_same<typename internal::unpacket_traits<TgtPacket>::half, TgtPacket>::value>
|
|
|
|
struct test_cast_runner;
|
2020-05-29 01:41:28 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtScalar, typename TgtPacket>
|
|
|
|
struct test_cast_runner<SrcPacket, TgtScalar, TgtPacket, true, false> {
|
|
|
|
static void run() { test_cast<SrcPacket, TgtPacket>::run(); }
|
|
|
|
};
|
2020-05-29 01:41:28 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename SrcPacket, typename TgtScalar, typename TgtPacket>
|
|
|
|
struct test_cast_runner<SrcPacket, TgtScalar, TgtPacket, true, true> {
|
|
|
|
static void run() {
|
|
|
|
test_cast<SrcPacket, TgtPacket>::run();
|
|
|
|
test_cast_runner<SrcPacket, TgtScalar, typename internal::unpacket_traits<TgtPacket>::half>::run();
|
2020-05-29 01:41:28 +08:00
|
|
|
}
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
template <typename SrcPacket, typename TgtScalar, typename TgtPacket>
|
|
|
|
struct test_cast_runner<SrcPacket, TgtScalar, TgtPacket, false, false> {
|
|
|
|
static void run() {}
|
|
|
|
};
|
|
|
|
|
2020-06-26 05:31:16 +08:00
|
|
|
template <typename Scalar, typename Packet, typename EnableIf = void>
|
|
|
|
struct packetmath_pcast_ops_runner {
|
|
|
|
static void run() {
|
|
|
|
test_cast_runner<Packet, float>::run();
|
|
|
|
test_cast_runner<Packet, double>::run();
|
|
|
|
test_cast_runner<Packet, int8_t>::run();
|
|
|
|
test_cast_runner<Packet, uint8_t>::run();
|
|
|
|
test_cast_runner<Packet, int16_t>::run();
|
|
|
|
test_cast_runner<Packet, uint16_t>::run();
|
|
|
|
test_cast_runner<Packet, int32_t>::run();
|
|
|
|
test_cast_runner<Packet, uint32_t>::run();
|
|
|
|
test_cast_runner<Packet, int64_t>::run();
|
|
|
|
test_cast_runner<Packet, uint64_t>::run();
|
|
|
|
test_cast_runner<Packet, bool>::run();
|
2020-07-02 02:41:59 +08:00
|
|
|
test_cast_runner<Packet, std::complex<float> >::run();
|
|
|
|
test_cast_runner<Packet, std::complex<double> >::run();
|
2020-06-26 05:31:16 +08:00
|
|
|
test_cast_runner<Packet, half>::run();
|
|
|
|
test_cast_runner<Packet, bfloat16>::run();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
// Only some types support cast from std::complex<>.
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
2020-06-26 05:31:16 +08:00
|
|
|
struct packetmath_pcast_ops_runner<Scalar, Packet, typename internal::enable_if<NumTraits<Scalar>::IsComplex>::type> {
|
|
|
|
static void run() {
|
2020-07-02 02:41:59 +08:00
|
|
|
test_cast_runner<Packet, std::complex<float> >::run();
|
|
|
|
test_cast_runner<Packet, std::complex<double> >::run();
|
2020-06-26 05:31:16 +08:00
|
|
|
test_cast_runner<Packet, half>::run();
|
|
|
|
test_cast_runner<Packet, bfloat16>::run();
|
|
|
|
}
|
|
|
|
};
|
2020-03-27 04:18:19 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
void packetmath_boolean_mask_ops() {
|
2020-04-21 04:16:28 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
const int size = 2 * PacketSize;
|
2020-04-21 04:16:28 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar data1[size];
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[size];
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[size];
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2020-04-21 04:16:28 +08:00
|
|
|
data1[i] = internal::random<Scalar>();
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
CHECK_CWISE1(internal::ptrue, internal::ptrue);
|
|
|
|
CHECK_CWISE2_IF(true, internal::pandnot, internal::pandnot);
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
data1[i] = Scalar(i);
|
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
}
|
|
|
|
CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
|
2020-06-25 04:27:26 +08:00
|
|
|
|
|
|
|
//Test (-0) == (0) for signed operations
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
data1[i] = Scalar(-0.0);
|
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
}
|
|
|
|
CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
|
|
|
|
|
|
|
|
//Test NaN
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
data1[i] = std::numeric_limits<Scalar>::quiet_NaN();
|
|
|
|
data1[i + PacketSize] = internal::random<bool>() ? data1[i] : Scalar(0);
|
|
|
|
}
|
|
|
|
CHECK_CWISE2_IF(true, internal::pcmp_eq, internal::pcmp_eq);
|
2020-05-12 04:23:31 +08:00
|
|
|
}
|
|
|
|
|
2020-05-20 07:21:56 +08:00
|
|
|
// Packet16b representing bool does not support ptrue, pandnot or pcmp_eq, since the scalar path
|
|
|
|
// (for some compilers) compute the bitwise and with 0x1 of the results to keep the value in [0,1].
|
2020-06-26 05:31:16 +08:00
|
|
|
template<>
|
2020-07-02 02:41:59 +08:00
|
|
|
void packetmath_boolean_mask_ops<bool, internal::packet_traits<bool>::type>() {}
|
2020-04-21 04:16:28 +08:00
|
|
|
|
2020-06-25 04:27:26 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
void packetmath_minus_zero_add() {
|
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
const int size = 2 * PacketSize;
|
|
|
|
EIGEN_ALIGN_MAX Scalar data1[size];
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[size];
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[size];
|
|
|
|
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
data1[i] = Scalar(-0.0);
|
|
|
|
data1[i + PacketSize] = Scalar(-0.0);
|
|
|
|
}
|
|
|
|
CHECK_CWISE2_IF(internal::packet_traits<Scalar>::HasAdd, REF_ADD, internal::padd);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
void packetmath() {
|
2015-10-13 15:53:46 +08:00
|
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2010-07-05 16:54:24 +08:00
|
|
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
2008-08-21 04:08:38 +08:00
|
|
|
|
2018-11-26 21:10:07 +08:00
|
|
|
if (g_first_pass)
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
std::cerr << "=== Testing packet of type '" << typeid(Packet).name() << "' and scalar type '"
|
|
|
|
<< typeid(Scalar).name() << "' and size '" << PacketSize << "' ===\n";
|
2018-11-26 21:10:07 +08:00
|
|
|
|
2014-01-30 03:43:05 +08:00
|
|
|
const int max_size = PacketSize > 4 ? PacketSize : 4;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
const int size = PacketSize * max_size;
|
2015-07-29 17:11:23 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar data1[size];
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[size];
|
Adding lowlevel APIs for optimized RHS packet load in TensorFlow
SpatialConvolution
Low-level APIs are added in order to optimized packet load in gemm_pack_rhs
in TensorFlow SpatialConvolution. The optimization is for scenario when a
packet is split across 2 adjacent columns. In this case we read it as two
'partial' packets and then merge these into 1. Currently this only works for
Packet16f (AVX512) and Packet8f (AVX2). We plan to add this for other
packet types (such as Packet8d) also.
This optimization shows significant speedup in SpatialConvolution with
certain parameters. Some examples are below.
Benchmark parameters are specified as:
Batch size, Input dim, Depth, Num of filters, Filter dim
Speedup numbers are specified for number of threads 1, 2, 4, 8, 16.
AVX512:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 |2.18X, 2.13X, 1.73X, 1.64X, 1.66X
128, 24x24, 1, 64, 8x8 |2.00X, 1.98X, 1.93X, 1.91X, 1.91X
32, 24x24, 3, 64, 5x5 |2.26X, 2.14X, 2.17X, 2.22X, 2.33X
128, 24x24, 3, 64, 3x3 |1.51X, 1.45X, 1.45X, 1.67X, 1.57X
32, 14x14, 24, 64, 5x5 |1.21X, 1.19X, 1.16X, 1.70X, 1.17X
128, 128x128, 3, 96, 11x11 |2.17X, 2.18X, 2.19X, 2.20X, 2.18X
AVX2:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 | 1.66X, 1.65X, 1.61X, 1.56X, 1.49X
32, 24x24, 3, 64, 5x5 | 1.71X, 1.63X, 1.77X, 1.58X, 1.68X
128, 24x24, 1, 64, 5x5 | 1.44X, 1.40X, 1.38X, 1.37X, 1.33X
128, 24x24, 3, 64, 3x3 | 1.68X, 1.63X, 1.58X, 1.56X, 1.62X
128, 128x128, 3, 96, 11x11 | 1.36X, 1.36X, 1.37X, 1.37X, 1.37X
In the higher level benchmark cifar10, we observe a runtime improvement
of around 6% for AVX512 on Intel Skylake server (8 cores).
On lower level PackRhs micro-benchmarks specified in TensorFlow
tensorflow/core/kernels/eigen_spatial_convolutions_test.cc, we observe
the following runtime numbers:
AVX512:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 41350 | 15073 | 2.74X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 7277 | 7341 | 0.99X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 8675 | 8681 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 24155 | 16079 | 1.50X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 25052 | 17152 | 1.46X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 18269 | 18345 | 1.00X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 19468 | 19872 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 156060 | 42432 | 3.68X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 132701 | 36944 | 3.59X
AVX2:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 26233 | 12393 | 2.12X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 6091 | 6062 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 7427 | 7408 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 23453 | 20826 | 1.13X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 23167 | 22091 | 1.09X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 23422 | 23682 | 0.99X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 23165 | 23663 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 72689 | 44969 | 1.62X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 61732 | 39779 | 1.55X
All benchmarks on Intel Skylake server with 8 cores.
2019-04-20 14:46:43 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar data3[size];
|
2015-07-29 17:11:23 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar ref[size];
|
2018-07-06 23:13:36 +08:00
|
|
|
RealScalar refvalue = RealScalar(0);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = internal::random<Scalar>() / RealScalar(PacketSize);
|
|
|
|
data2[i] = internal::random<Scalar>() / RealScalar(PacketSize);
|
2020-02-10 22:58:37 +08:00
|
|
|
refvalue = (std::max)(refvalue, numext::abs(data1[i]));
|
2008-08-21 04:08:38 +08:00
|
|
|
}
|
|
|
|
|
2010-10-25 22:15:22 +08:00
|
|
|
internal::pstore(data2, internal::pload<Packet>(data1));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(data1, data2, PacketSize) && "aligned load/store");
|
2008-08-21 04:08:38 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < PacketSize; ++offset) {
|
|
|
|
internal::pstore(data2, internal::ploadu<Packet>(data1 + offset));
|
|
|
|
VERIFY(test::areApprox(data1 + offset, data2, PacketSize) && "internal::ploadu");
|
2008-08-21 04:08:38 +08:00
|
|
|
}
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < PacketSize; ++offset) {
|
|
|
|
internal::pstoreu(data2 + offset, internal::pload<Packet>(data1));
|
|
|
|
VERIFY(test::areApprox(data1, data2 + offset, PacketSize) && "internal::pstoreu");
|
2008-08-21 04:08:38 +08:00
|
|
|
}
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (internal::unpacket_traits<Packet>::masked_load_available) {
|
2020-01-11 18:31:21 +08:00
|
|
|
test::packet_helper<internal::unpacket_traits<Packet>::masked_load_available, Packet> h;
|
Adding lowlevel APIs for optimized RHS packet load in TensorFlow
SpatialConvolution
Low-level APIs are added in order to optimized packet load in gemm_pack_rhs
in TensorFlow SpatialConvolution. The optimization is for scenario when a
packet is split across 2 adjacent columns. In this case we read it as two
'partial' packets and then merge these into 1. Currently this only works for
Packet16f (AVX512) and Packet8f (AVX2). We plan to add this for other
packet types (such as Packet8d) also.
This optimization shows significant speedup in SpatialConvolution with
certain parameters. Some examples are below.
Benchmark parameters are specified as:
Batch size, Input dim, Depth, Num of filters, Filter dim
Speedup numbers are specified for number of threads 1, 2, 4, 8, 16.
AVX512:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 |2.18X, 2.13X, 1.73X, 1.64X, 1.66X
128, 24x24, 1, 64, 8x8 |2.00X, 1.98X, 1.93X, 1.91X, 1.91X
32, 24x24, 3, 64, 5x5 |2.26X, 2.14X, 2.17X, 2.22X, 2.33X
128, 24x24, 3, 64, 3x3 |1.51X, 1.45X, 1.45X, 1.67X, 1.57X
32, 14x14, 24, 64, 5x5 |1.21X, 1.19X, 1.16X, 1.70X, 1.17X
128, 128x128, 3, 96, 11x11 |2.17X, 2.18X, 2.19X, 2.20X, 2.18X
AVX2:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 | 1.66X, 1.65X, 1.61X, 1.56X, 1.49X
32, 24x24, 3, 64, 5x5 | 1.71X, 1.63X, 1.77X, 1.58X, 1.68X
128, 24x24, 1, 64, 5x5 | 1.44X, 1.40X, 1.38X, 1.37X, 1.33X
128, 24x24, 3, 64, 3x3 | 1.68X, 1.63X, 1.58X, 1.56X, 1.62X
128, 128x128, 3, 96, 11x11 | 1.36X, 1.36X, 1.37X, 1.37X, 1.37X
In the higher level benchmark cifar10, we observe a runtime improvement
of around 6% for AVX512 on Intel Skylake server (8 cores).
On lower level PackRhs micro-benchmarks specified in TensorFlow
tensorflow/core/kernels/eigen_spatial_convolutions_test.cc, we observe
the following runtime numbers:
AVX512:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 41350 | 15073 | 2.74X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 7277 | 7341 | 0.99X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 8675 | 8681 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 24155 | 16079 | 1.50X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 25052 | 17152 | 1.46X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 18269 | 18345 | 1.00X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 19468 | 19872 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 156060 | 42432 | 3.68X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 132701 | 36944 | 3.59X
AVX2:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 26233 | 12393 | 2.12X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 6091 | 6062 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 7427 | 7408 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 23453 | 20826 | 1.13X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 23167 | 22091 | 1.09X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 23422 | 23682 | 0.99X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 23165 | 23663 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 72689 | 44969 | 1.62X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 61732 | 39779 | 1.55X
All benchmarks on Intel Skylake server with 8 cores.
2019-04-20 14:46:43 +08:00
|
|
|
unsigned long long max_umask = (0x1ull << PacketSize);
|
2019-05-03 04:14:18 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < PacketSize; ++offset) {
|
|
|
|
for (unsigned long long umask = 0; umask < max_umask; ++umask) {
|
|
|
|
h.store(data2, h.load(data1 + offset, umask));
|
|
|
|
for (int k = 0; k < PacketSize; ++k) data3[k] = ((umask & (0x1ull << k)) >> k) ? data1[k + offset] : Scalar(0);
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(data3, data2, PacketSize) && "internal::ploadu masked");
|
Adding lowlevel APIs for optimized RHS packet load in TensorFlow
SpatialConvolution
Low-level APIs are added in order to optimized packet load in gemm_pack_rhs
in TensorFlow SpatialConvolution. The optimization is for scenario when a
packet is split across 2 adjacent columns. In this case we read it as two
'partial' packets and then merge these into 1. Currently this only works for
Packet16f (AVX512) and Packet8f (AVX2). We plan to add this for other
packet types (such as Packet8d) also.
This optimization shows significant speedup in SpatialConvolution with
certain parameters. Some examples are below.
Benchmark parameters are specified as:
Batch size, Input dim, Depth, Num of filters, Filter dim
Speedup numbers are specified for number of threads 1, 2, 4, 8, 16.
AVX512:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 |2.18X, 2.13X, 1.73X, 1.64X, 1.66X
128, 24x24, 1, 64, 8x8 |2.00X, 1.98X, 1.93X, 1.91X, 1.91X
32, 24x24, 3, 64, 5x5 |2.26X, 2.14X, 2.17X, 2.22X, 2.33X
128, 24x24, 3, 64, 3x3 |1.51X, 1.45X, 1.45X, 1.67X, 1.57X
32, 14x14, 24, 64, 5x5 |1.21X, 1.19X, 1.16X, 1.70X, 1.17X
128, 128x128, 3, 96, 11x11 |2.17X, 2.18X, 2.19X, 2.20X, 2.18X
AVX2:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 | 1.66X, 1.65X, 1.61X, 1.56X, 1.49X
32, 24x24, 3, 64, 5x5 | 1.71X, 1.63X, 1.77X, 1.58X, 1.68X
128, 24x24, 1, 64, 5x5 | 1.44X, 1.40X, 1.38X, 1.37X, 1.33X
128, 24x24, 3, 64, 3x3 | 1.68X, 1.63X, 1.58X, 1.56X, 1.62X
128, 128x128, 3, 96, 11x11 | 1.36X, 1.36X, 1.37X, 1.37X, 1.37X
In the higher level benchmark cifar10, we observe a runtime improvement
of around 6% for AVX512 on Intel Skylake server (8 cores).
On lower level PackRhs micro-benchmarks specified in TensorFlow
tensorflow/core/kernels/eigen_spatial_convolutions_test.cc, we observe
the following runtime numbers:
AVX512:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 41350 | 15073 | 2.74X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 7277 | 7341 | 0.99X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 8675 | 8681 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 24155 | 16079 | 1.50X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 25052 | 17152 | 1.46X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 18269 | 18345 | 1.00X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 19468 | 19872 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 156060 | 42432 | 3.68X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 132701 | 36944 | 3.59X
AVX2:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 26233 | 12393 | 2.12X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 6091 | 6062 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 7427 | 7408 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 23453 | 20826 | 1.13X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 23167 | 22091 | 1.09X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 23422 | 23682 | 0.99X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 23165 | 23663 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 72689 | 44969 | 1.62X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 61732 | 39779 | 1.55X
All benchmarks on Intel Skylake server with 8 cores.
2019-04-20 14:46:43 +08:00
|
|
|
}
|
|
|
|
}
|
2019-05-03 05:52:58 +08:00
|
|
|
}
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (internal::unpacket_traits<Packet>::masked_store_available) {
|
2020-01-11 18:31:21 +08:00
|
|
|
test::packet_helper<internal::unpacket_traits<Packet>::masked_store_available, Packet> h;
|
2019-05-03 05:52:58 +08:00
|
|
|
unsigned long long max_umask = (0x1ull << PacketSize);
|
2019-05-03 04:14:18 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < PacketSize; ++offset) {
|
|
|
|
for (unsigned long long umask = 0; umask < max_umask; ++umask) {
|
2019-05-03 04:14:18 +08:00
|
|
|
internal::pstore(data2, internal::pset1<Packet>(Scalar(0)));
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
h.store(data2, h.loadu(data1 + offset), umask);
|
|
|
|
for (int k = 0; k < PacketSize; ++k) data3[k] = ((umask & (0x1ull << k)) >> k) ? data1[k + offset] : Scalar(0);
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(data3, data2, PacketSize) && "internal::pstoreu masked");
|
2019-05-03 04:14:18 +08:00
|
|
|
}
|
|
|
|
}
|
Adding lowlevel APIs for optimized RHS packet load in TensorFlow
SpatialConvolution
Low-level APIs are added in order to optimized packet load in gemm_pack_rhs
in TensorFlow SpatialConvolution. The optimization is for scenario when a
packet is split across 2 adjacent columns. In this case we read it as two
'partial' packets and then merge these into 1. Currently this only works for
Packet16f (AVX512) and Packet8f (AVX2). We plan to add this for other
packet types (such as Packet8d) also.
This optimization shows significant speedup in SpatialConvolution with
certain parameters. Some examples are below.
Benchmark parameters are specified as:
Batch size, Input dim, Depth, Num of filters, Filter dim
Speedup numbers are specified for number of threads 1, 2, 4, 8, 16.
AVX512:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 |2.18X, 2.13X, 1.73X, 1.64X, 1.66X
128, 24x24, 1, 64, 8x8 |2.00X, 1.98X, 1.93X, 1.91X, 1.91X
32, 24x24, 3, 64, 5x5 |2.26X, 2.14X, 2.17X, 2.22X, 2.33X
128, 24x24, 3, 64, 3x3 |1.51X, 1.45X, 1.45X, 1.67X, 1.57X
32, 14x14, 24, 64, 5x5 |1.21X, 1.19X, 1.16X, 1.70X, 1.17X
128, 128x128, 3, 96, 11x11 |2.17X, 2.18X, 2.19X, 2.20X, 2.18X
AVX2:
Parameters | Speedup (Num of threads: 1, 2, 4, 8, 16)
----------------------------|------------------------------------------
128, 24x24, 3, 64, 5x5 | 1.66X, 1.65X, 1.61X, 1.56X, 1.49X
32, 24x24, 3, 64, 5x5 | 1.71X, 1.63X, 1.77X, 1.58X, 1.68X
128, 24x24, 1, 64, 5x5 | 1.44X, 1.40X, 1.38X, 1.37X, 1.33X
128, 24x24, 3, 64, 3x3 | 1.68X, 1.63X, 1.58X, 1.56X, 1.62X
128, 128x128, 3, 96, 11x11 | 1.36X, 1.36X, 1.37X, 1.37X, 1.37X
In the higher level benchmark cifar10, we observe a runtime improvement
of around 6% for AVX512 on Intel Skylake server (8 cores).
On lower level PackRhs micro-benchmarks specified in TensorFlow
tensorflow/core/kernels/eigen_spatial_convolutions_test.cc, we observe
the following runtime numbers:
AVX512:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 41350 | 15073 | 2.74X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 7277 | 7341 | 0.99X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 8675 | 8681 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 24155 | 16079 | 1.50X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 25052 | 17152 | 1.46X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 18269 | 18345 | 1.00X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 19468 | 19872 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 156060 | 42432 | 3.68X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 132701 | 36944 | 3.59X
AVX2:
Parameters | Runtime without patch (ns) | Runtime with patch (ns) | Speedup
---------------------------------------------------------------|----------------------------|-------------------------|---------
BM_RHS_NAME(PackRhs, 128, 24, 24, 3, 64, 5, 5, 1, 1, 256, 56) | 26233 | 12393 | 2.12X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 1, 1, 256, 56) | 6091 | 6062 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 32, 64, 5, 5, 2, 2, 256, 56) | 7427 | 7408 | 1.00X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 1, 1, 256, 56) | 23453 | 20826 | 1.13X
BM_RHS_NAME(PackRhs, 32, 64, 64, 30, 64, 5, 5, 2, 2, 256, 56) | 23167 | 22091 | 1.09X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 1, 1, 256, 56) | 23422 | 23682 | 0.99X
BM_RHS_NAME(PackRhs, 32, 256, 256, 4, 16, 8, 8, 2, 4, 256, 56) | 23165 | 23663 | 0.98X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 1, 1, 36, 432) | 72689 | 44969 | 1.62X
BM_RHS_NAME(PackRhs, 32, 64, 64, 4, 16, 3, 3, 2, 2, 36, 432) | 61732 | 39779 | 1.55X
All benchmarks on Intel Skylake server with 8 cores.
2019-04-20 14:46:43 +08:00
|
|
|
}
|
|
|
|
|
2015-10-13 15:53:46 +08:00
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasAdd);
|
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub);
|
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul);
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasAdd, REF_ADD, internal::padd);
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasSub, REF_SUB, internal::psub);
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
|
2015-10-13 15:53:46 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketTraits::HasNegate) CHECK_CWISE1(internal::negate, internal::pnegate);
|
2013-06-11 05:40:56 +08:00
|
|
|
CHECK_CWISE1(numext::conj, internal::pconj);
|
2008-08-21 04:08:38 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < 3; ++offset) {
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[offset];
|
2011-02-24 02:24:26 +08:00
|
|
|
internal::pstore(data2, internal::pset1<Packet>(data1[offset]));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::pset1");
|
2011-02-24 02:24:26 +08:00
|
|
|
}
|
2016-03-28 22:58:02 +08:00
|
|
|
|
2014-04-25 17:21:18 +08:00
|
|
|
{
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize * 4; ++i) ref[i] = data1[i / PacketSize];
|
2014-04-25 17:21:18 +08:00
|
|
|
Packet A0, A1, A2, A3;
|
2014-04-25 17:46:22 +08:00
|
|
|
internal::pbroadcast4<Packet>(data1, A0, A1, A2, A3);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
internal::pstore(data2 + 0 * PacketSize, A0);
|
|
|
|
internal::pstore(data2 + 1 * PacketSize, A1);
|
|
|
|
internal::pstore(data2 + 2 * PacketSize, A2);
|
|
|
|
internal::pstore(data2 + 3 * PacketSize, A3);
|
|
|
|
VERIFY(test::areApprox(ref, data2, 4 * PacketSize) && "internal::pbroadcast4");
|
2014-04-25 17:21:18 +08:00
|
|
|
}
|
2016-10-05 05:22:56 +08:00
|
|
|
|
2014-04-25 17:21:18 +08:00
|
|
|
{
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize * 2; ++i) ref[i] = data1[i / PacketSize];
|
2014-05-05 21:03:29 +08:00
|
|
|
Packet A0, A1;
|
2014-04-25 17:46:22 +08:00
|
|
|
internal::pbroadcast2<Packet>(data1, A0, A1);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
internal::pstore(data2 + 0 * PacketSize, A0);
|
|
|
|
internal::pstore(data2 + 1 * PacketSize, A1);
|
|
|
|
VERIFY(test::areApprox(ref, data2, 2 * PacketSize) && "internal::pbroadcast2");
|
2014-04-25 17:21:18 +08:00
|
|
|
}
|
2016-10-05 05:22:56 +08:00
|
|
|
|
2010-10-25 22:15:22 +08:00
|
|
|
VERIFY(internal::isApprox(data1[0], internal::pfirst(internal::pload<Packet>(data1))) && "internal::pfirst");
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketSize > 1) {
|
2018-09-28 22:57:32 +08:00
|
|
|
// apply different offsets to check that ploaddup is robust to unaligned inputs
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < 4; ++offset) {
|
|
|
|
for (int i = 0; i < PacketSize / 2; ++i) ref[2 * i + 0] = ref[2 * i + 1] = data1[offset + i];
|
|
|
|
internal::pstore(data2, internal::ploaddup<Packet>(data1 + offset));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "ploaddup");
|
2011-02-24 05:22:10 +08:00
|
|
|
}
|
2011-02-23 23:20:55 +08:00
|
|
|
}
|
2016-03-28 06:47:49 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketSize > 2) {
|
2018-09-28 22:57:32 +08:00
|
|
|
// apply different offsets to check that ploadquad is robust to unaligned inputs
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int offset = 0; offset < 4; ++offset) {
|
|
|
|
for (int i = 0; i < PacketSize / 4; ++i)
|
|
|
|
ref[4 * i + 0] = ref[4 * i + 1] = ref[4 * i + 2] = ref[4 * i + 3] = data1[offset + i];
|
|
|
|
internal::pstore(data2, internal::ploadquad<Packet>(data1 + offset));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "ploadquad");
|
2014-04-17 22:27:22 +08:00
|
|
|
}
|
|
|
|
}
|
2008-08-21 04:08:38 +08:00
|
|
|
|
2018-07-06 23:13:36 +08:00
|
|
|
ref[0] = Scalar(0);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] += data1[i];
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::isApproxAbs(ref[0], internal::predux(internal::pload<Packet>(data1)), refvalue) && "internal::predux");
|
2016-03-28 22:58:02 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketSize == 8 && internal::unpacket_traits<typename internal::unpacket_traits<Packet>::half>::size ==
|
|
|
|
4) // so far, predux_half_downto4 is only required in such a case
|
2014-04-17 22:27:22 +08:00
|
|
|
{
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
int HalfPacketSize = PacketSize > 4 ? PacketSize / 2 : PacketSize;
|
|
|
|
for (int i = 0; i < HalfPacketSize; ++i) ref[i] = Scalar(0);
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[i % HalfPacketSize] += data1[i];
|
2018-04-03 20:28:38 +08:00
|
|
|
internal::pstore(data2, internal::predux_half_dowto4(internal::pload<Packet>(data1)));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, data2, HalfPacketSize) && "internal::predux_half_dowto4");
|
2014-04-17 22:27:22 +08:00
|
|
|
}
|
2009-03-10 02:40:09 +08:00
|
|
|
|
2018-07-06 23:13:36 +08:00
|
|
|
ref[0] = Scalar(1);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = REF_MUL(ref[0], data1[i]);
|
2010-10-25 22:15:22 +08:00
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul");
|
2009-03-10 02:40:09 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[PacketSize - i - 1];
|
2010-10-25 22:15:22 +08:00
|
|
|
internal::pstore(data2, internal::preverse(internal::pload<Packet>(data1)));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::preverse");
|
2014-03-27 10:03:07 +08:00
|
|
|
|
2014-04-25 16:56:18 +08:00
|
|
|
internal::PacketBlock<Packet> kernel;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
kernel.packet[i] = internal::pload<Packet>(data1 + i * PacketSize);
|
2014-03-27 10:03:07 +08:00
|
|
|
}
|
|
|
|
ptranspose(kernel);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2014-03-27 10:03:07 +08:00
|
|
|
internal::pstore(data2, kernel.packet[i]);
|
|
|
|
for (int j = 0; j < PacketSize; ++j) {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY(test::isApproxAbs(data2[j], data1[i + j * PacketSize], refvalue) && "ptranspose");
|
2014-03-27 10:03:07 +08:00
|
|
|
}
|
|
|
|
}
|
2014-06-07 11:18:44 +08:00
|
|
|
|
2015-10-13 15:53:46 +08:00
|
|
|
if (PacketTraits::HasBlend) {
|
2014-06-07 11:18:44 +08:00
|
|
|
Packet thenPacket = internal::pload<Packet>(data1);
|
|
|
|
Packet elsePacket = internal::pload<Packet>(data2);
|
2015-07-29 17:11:23 +08:00
|
|
|
EIGEN_ALIGN_MAX internal::Selector<PacketSize> selector;
|
2014-06-07 11:18:44 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
selector.select[i] = i;
|
|
|
|
}
|
|
|
|
|
|
|
|
Packet blend = internal::pblend(selector, thenPacket, elsePacket);
|
2015-07-29 17:11:23 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar result[size];
|
2014-06-07 11:18:44 +08:00
|
|
|
internal::pstore(result, blend);
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::isApproxAbs(result[i], (selector.select[i] ? data1[i] : data2[i]), refvalue));
|
2014-06-07 11:18:44 +08:00
|
|
|
}
|
|
|
|
}
|
2016-10-25 22:48:49 +08:00
|
|
|
|
2019-06-21 02:47:49 +08:00
|
|
|
{
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
// "if" mask
|
|
|
|
unsigned char v = internal::random<bool>() ? 0xff : 0;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
char* bytes = (char*)(data1 + i);
|
|
|
|
for (int k = 0; k < int(sizeof(Scalar)); ++k) {
|
2019-06-21 02:47:49 +08:00
|
|
|
bytes[k] = v;
|
|
|
|
}
|
|
|
|
// "then" packet
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
data1[i + PacketSize] = internal::random<Scalar>();
|
2019-06-21 02:47:49 +08:00
|
|
|
// "else" packet
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
data1[i + 2 * PacketSize] = internal::random<Scalar>();
|
2019-06-21 02:47:49 +08:00
|
|
|
}
|
|
|
|
CHECK_CWISE3_IF(true, internal::pselect, internal::pselect);
|
|
|
|
}
|
2019-01-08 08:53:36 +08:00
|
|
|
|
2020-03-20 01:05:13 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasSqrt, numext::sqrt, internal::psqrt);
|
2020-04-21 04:16:28 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2020-04-21 04:16:28 +08:00
|
|
|
data1[i] = internal::random<Scalar>();
|
|
|
|
}
|
2020-05-12 04:23:31 +08:00
|
|
|
CHECK_CWISE1(internal::pzero, internal::pzero);
|
|
|
|
CHECK_CWISE2_IF(true, internal::por, internal::por);
|
|
|
|
CHECK_CWISE2_IF(true, internal::pxor, internal::pxor);
|
|
|
|
CHECK_CWISE2_IF(true, internal::pand, internal::pand);
|
2020-04-21 04:16:28 +08:00
|
|
|
|
2020-05-12 04:23:31 +08:00
|
|
|
packetmath_boolean_mask_ops<Scalar, Packet>();
|
2020-06-26 05:31:16 +08:00
|
|
|
packetmath_pcast_ops_runner<Scalar, Packet>::run();
|
2020-06-25 04:27:26 +08:00
|
|
|
packetmath_minus_zero_add<Scalar, Packet>();
|
2008-08-21 04:08:38 +08:00
|
|
|
}
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
void packetmath_real() {
|
2015-10-13 15:53:46 +08:00
|
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2009-03-25 20:26:13 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
const int size = PacketSize * 4;
|
|
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4];
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4];
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
|
2010-07-05 16:54:24 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = internal::random<Scalar>(0, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
|
|
|
|
data2[i] = internal::random<Scalar>(0, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
|
2020-01-11 18:31:21 +08:00
|
|
|
}
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (internal::random<float>(0, 1) < 0.1f) data1[internal::random<int>(0, PacketSize)] = 0;
|
2020-01-11 18:31:21 +08:00
|
|
|
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasRsqrt, Scalar(1) / std::sqrt, internal::prsqrt);
|
2020-01-11 18:31:21 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-3, 3));
|
|
|
|
data2[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-3, 3));
|
2009-03-25 20:26:13 +08:00
|
|
|
}
|
2015-10-13 15:53:46 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin);
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos);
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan);
|
2015-11-05 05:15:57 +08:00
|
|
|
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
|
2019-12-17 05:00:35 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print);
|
2016-10-05 05:22:56 +08:00
|
|
|
|
2019-12-13 03:04:56 +08:00
|
|
|
// See bug 1785.
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = -1.5 + i;
|
|
|
|
data2[i] = -1.5 + i;
|
|
|
|
}
|
2019-12-13 03:04:56 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
|
2019-12-17 05:00:35 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasRint, numext::rint, internal::print);
|
2019-12-13 03:04:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = internal::random<Scalar>(-1, 1);
|
|
|
|
data2[i] = internal::random<Scalar>(-1, 1);
|
2011-02-18 00:37:11 +08:00
|
|
|
}
|
2015-10-13 15:53:46 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin);
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos);
|
2010-07-05 16:54:24 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = internal::random<Scalar>(-87, 88);
|
|
|
|
data2[i] = internal::random<Scalar>(-87, 88);
|
2009-03-25 20:26:13 +08:00
|
|
|
}
|
2015-10-13 15:53:46 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
|
|
|
|
data2[i] = internal::random<Scalar>(-1, 1) * std::pow(Scalar(10), internal::random<Scalar>(-6, 6));
|
2016-02-11 09:41:47 +08:00
|
|
|
}
|
2019-12-17 05:33:42 +08:00
|
|
|
data1[0] = 1e-20;
|
2016-02-11 09:41:47 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasTanh, std::tanh, internal::ptanh);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketTraits::HasExp && PacketSize >= 2) {
|
2014-10-20 17:38:51 +08:00
|
|
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
2015-06-24 10:12:46 +08:00
|
|
|
data1[1] = std::numeric_limits<Scalar>::epsilon();
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
test::packet_helper<PacketTraits::HasExp, Packet> h;
|
2015-06-24 10:12:46 +08:00
|
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
2015-08-16 20:00:02 +08:00
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
2015-06-24 10:12:46 +08:00
|
|
|
VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::epsilon()), data2[1]);
|
|
|
|
|
|
|
|
data1[0] = -std::numeric_limits<Scalar>::epsilon();
|
|
|
|
data1[1] = 0;
|
|
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
|
|
|
VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::epsilon()), data2[0]);
|
2015-12-11 09:17:42 +08:00
|
|
|
VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]);
|
2015-06-24 10:12:46 +08:00
|
|
|
|
|
|
|
data1[0] = (std::numeric_limits<Scalar>::min)();
|
|
|
|
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
|
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
|
|
|
VERIFY_IS_EQUAL(std::exp((std::numeric_limits<Scalar>::min)()), data2[0]);
|
|
|
|
VERIFY_IS_EQUAL(std::exp(-(std::numeric_limits<Scalar>::min)()), data2[1]);
|
|
|
|
|
|
|
|
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
|
|
|
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
|
|
|
h.store(data2, internal::pexp(h.load(data1)));
|
|
|
|
VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
|
|
|
VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::denorm_min()), data2[1]);
|
2014-10-20 17:38:51 +08:00
|
|
|
}
|
2010-07-05 16:54:24 +08:00
|
|
|
|
2016-05-11 07:21:43 +08:00
|
|
|
if (PacketTraits::HasTanh) {
|
2016-09-22 17:18:52 +08:00
|
|
|
// NOTE this test migh fail with GCC prior to 6.3, see MathFunctionsImpl.h for details.
|
2016-05-11 07:21:43 +08:00
|
|
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
test::packet_helper<internal::packet_traits<Scalar>::HasTanh, Packet> h;
|
2016-05-11 07:21:43 +08:00
|
|
|
h.store(data2, internal::ptanh(h.load(data1)));
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
}
|
|
|
|
|
2020-06-05 07:24:47 +08:00
|
|
|
if (PacketTraits::HasExp) {
|
2020-05-31 06:53:37 +08:00
|
|
|
internal::scalar_logistic_op<Scalar> logistic;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = internal::random<Scalar>(-20, 20);
|
2020-05-31 06:53:37 +08:00
|
|
|
}
|
2020-06-05 07:24:47 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
test::packet_helper<PacketTraits::HasExp, Packet> h;
|
2020-06-05 07:24:47 +08:00
|
|
|
h.store(data2, logistic.packetOp(h.load(data1)));
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
VERIFY_IS_APPROX(data2[i], logistic(data1[i]));
|
2020-05-31 06:53:37 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-05-20 20:58:19 +08:00
|
|
|
#if EIGEN_HAS_C99_MATH && (__cplusplus > 199711L)
|
2019-08-29 03:20:21 +08:00
|
|
|
data1[0] = std::numeric_limits<Scalar>::infinity();
|
|
|
|
data1[1] = Scalar(-1);
|
2019-08-13 04:53:28 +08:00
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasLog1p, std::log1p, internal::plog1p);
|
2019-08-29 03:20:21 +08:00
|
|
|
data1[0] = std::numeric_limits<Scalar>::infinity();
|
|
|
|
data1[1] = -std::numeric_limits<Scalar>::infinity();
|
|
|
|
CHECK_CWISE1_IF(PacketTraits::HasExpm1, std::expm1, internal::pexpm1);
|
2015-12-08 08:38:48 +08:00
|
|
|
#endif
|
2015-12-08 07:24:49 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketSize >= 2) {
|
2014-10-20 19:13:43 +08:00
|
|
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
2015-06-30 01:49:55 +08:00
|
|
|
data1[1] = std::numeric_limits<Scalar>::epsilon();
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketTraits::HasLog) {
|
|
|
|
test::packet_helper<PacketTraits::HasLog, Packet> h;
|
2018-04-26 16:47:39 +08:00
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::epsilon()), data2[1]);
|
|
|
|
|
|
|
|
data1[0] = -std::numeric_limits<Scalar>::epsilon();
|
|
|
|
data1[1] = 0;
|
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]);
|
|
|
|
|
|
|
|
data1[0] = (std::numeric_limits<Scalar>::min)();
|
|
|
|
data1[1] = -(std::numeric_limits<Scalar>::min)();
|
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
|
|
VERIFY_IS_EQUAL(std::log((std::numeric_limits<Scalar>::min)()), data2[0]);
|
|
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
|
2020-06-23 23:58:34 +08:00
|
|
|
// Note: 32-bit arm always flushes denorms to zero.
|
|
|
|
#if !EIGEN_ARCH_ARM
|
2020-06-30 02:35:46 +08:00
|
|
|
if (std::numeric_limits<Scalar>::has_denorm == std::denorm_present) {
|
2020-06-23 23:58:34 +08:00
|
|
|
data1[0] = std::numeric_limits<Scalar>::denorm_min();
|
|
|
|
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
|
|
// VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
|
|
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
}
|
|
|
|
#endif
|
2018-04-26 16:47:39 +08:00
|
|
|
|
|
|
|
data1[0] = Scalar(-1.0f);
|
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
2018-12-23 22:40:52 +08:00
|
|
|
|
|
|
|
data1[0] = std::numeric_limits<Scalar>::infinity();
|
|
|
|
h.store(data2, internal::plog(h.load(data1)));
|
|
|
|
VERIFY((numext::isinf)(data2[0]));
|
2018-04-26 16:47:39 +08:00
|
|
|
}
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketTraits::HasLog1p) {
|
|
|
|
test::packet_helper<PacketTraits::HasLog1p, Packet> h;
|
2019-08-29 03:20:21 +08:00
|
|
|
data1[0] = Scalar(-2);
|
|
|
|
data1[1] = -std::numeric_limits<Scalar>::infinity();
|
|
|
|
h.store(data2, internal::plog1p(h.load(data1)));
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
}
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketTraits::HasSqrt) {
|
|
|
|
test::packet_helper<PacketTraits::HasSqrt, Packet> h;
|
2018-04-26 16:47:39 +08:00
|
|
|
data1[0] = Scalar(-1.0f);
|
2018-12-27 18:20:47 +08:00
|
|
|
data1[1] = -std::numeric_limits<Scalar>::denorm_min();
|
2018-04-26 16:47:39 +08:00
|
|
|
h.store(data2, internal::psqrt(h.load(data1)));
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
}
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
if (PacketTraits::HasCos) {
|
|
|
|
test::packet_helper<PacketTraits::HasCos, Packet> h;
|
|
|
|
for (Scalar k = 1; k < Scalar(10000) / std::numeric_limits<Scalar>::epsilon(); k *= 2) {
|
|
|
|
for (int k1 = 0; k1 <= 1; ++k1) {
|
|
|
|
data1[0] = (2 * k + k1) * Scalar(EIGEN_PI) / 2 * internal::random<Scalar>(0.8, 1.2);
|
|
|
|
data1[1] = (2 * k + 2 + k1) * Scalar(EIGEN_PI) / 2 * internal::random<Scalar>(0.8, 1.2);
|
|
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
|
|
h.store(data2 + PacketSize, internal::psin(h.load(data1)));
|
|
|
|
VERIFY(data2[0] <= Scalar(1.) && data2[0] >= Scalar(-1.));
|
|
|
|
VERIFY(data2[1] <= Scalar(1.) && data2[1] >= Scalar(-1.));
|
|
|
|
VERIFY(data2[PacketSize + 0] <= Scalar(1.) && data2[PacketSize + 0] >= Scalar(-1.));
|
|
|
|
VERIFY(data2[PacketSize + 1] <= Scalar(1.) && data2[PacketSize + 1] >= Scalar(-1.));
|
|
|
|
|
2020-06-23 23:58:34 +08:00
|
|
|
VERIFY_IS_APPROX(data2[0], std::cos(data1[0]));
|
|
|
|
VERIFY_IS_APPROX(data2[1], std::cos(data1[1]));
|
|
|
|
VERIFY_IS_APPROX(data2[PacketSize + 0], std::sin(data1[0]));
|
|
|
|
VERIFY_IS_APPROX(data2[PacketSize + 1], std::sin(data1[1]));
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY_IS_APPROX(numext::abs2(data2[0]) + numext::abs2(data2[PacketSize + 0]), Scalar(1));
|
|
|
|
VERIFY_IS_APPROX(numext::abs2(data2[1]) + numext::abs2(data2[PacketSize + 1]), Scalar(1));
|
2019-01-09 22:25:17 +08:00
|
|
|
}
|
2018-12-23 23:13:24 +08:00
|
|
|
}
|
2018-12-24 00:26:21 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
data1[0] = std::numeric_limits<Scalar>::infinity();
|
2018-12-24 00:26:21 +08:00
|
|
|
data1[1] = -std::numeric_limits<Scalar>::infinity();
|
|
|
|
h.store(data2, internal::psin(h.load(data1)));
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
|
|
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
VERIFY((numext::isnan)(data2[1]));
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
|
2018-12-24 00:26:21 +08:00
|
|
|
h.store(data2, internal::psin(h.load(data1)));
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
|
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
|
|
VERIFY((numext::isnan)(data2[0]));
|
2019-01-09 22:25:17 +08:00
|
|
|
|
|
|
|
data1[0] = -Scalar(0.);
|
|
|
|
h.store(data2, internal::psin(h.load(data1)));
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY(internal::biteq(data2[0], data1[0]));
|
2019-01-09 22:25:17 +08:00
|
|
|
h.store(data2, internal::pcos(h.load(data1)));
|
|
|
|
VERIFY_IS_EQUAL(data2[0], Scalar(1));
|
2018-12-23 23:13:24 +08:00
|
|
|
}
|
2014-10-20 19:13:43 +08:00
|
|
|
}
|
2013-03-21 01:28:40 +08:00
|
|
|
}
|
|
|
|
|
2020-08-22 06:52:34 +08:00
|
|
|
#define CAST_CHECK_CWISE1_IF(COND, REFOP, POP, SCALAR, REFTYPE) if(COND) { \
|
|
|
|
test::packet_helper<COND,Packet> h; \
|
|
|
|
for (int i=0; i<PacketSize; ++i) \
|
|
|
|
ref[i] = SCALAR(REFOP(static_cast<REFTYPE>(data1[i]))); \
|
|
|
|
h.store(data2, POP(h.load(data1))); \
|
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && #POP); \
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
void packetmath_real<bfloat16, typename internal::packet_traits<bfloat16>::type>(){
|
|
|
|
typedef internal::packet_traits<bfloat16> PacketTraits;
|
|
|
|
typedef internal::packet_traits<bfloat16>::type Packet;
|
|
|
|
|
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
const int size = PacketSize * 4;
|
|
|
|
EIGEN_ALIGN_MAX bfloat16 data1[PacketSize * 4];
|
|
|
|
EIGEN_ALIGN_MAX bfloat16 data2[PacketSize * 4];
|
|
|
|
EIGEN_ALIGN_MAX bfloat16 ref[PacketSize * 4];
|
|
|
|
|
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = bfloat16(internal::random<float>(0, 1) * std::pow(float(10), internal::random<float>(-6, 6)));
|
|
|
|
data2[i] = bfloat16(internal::random<float>(0, 1) * std::pow(float(10), internal::random<float>(-6, 6)));
|
|
|
|
data1[i] = bfloat16(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (internal::random<float>(0, 1) < 0.1f) data1[internal::random<int>(0, PacketSize)] = bfloat16(0);
|
|
|
|
|
|
|
|
CAST_CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog, bfloat16, float);
|
|
|
|
CAST_CHECK_CWISE1_IF(PacketTraits::HasRsqrt, float(1) / std::sqrt, internal::prsqrt, bfloat16, float);
|
|
|
|
|
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = bfloat16(internal::random<float>(-1, 1) * std::pow(float(10), internal::random<float>(-3, 3)));
|
|
|
|
data2[i] = bfloat16(internal::random<float>(-1, 1) * std::pow(float(10), internal::random<float>(-3, 3)));
|
|
|
|
}
|
|
|
|
CAST_CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin, bfloat16, float);
|
|
|
|
CAST_CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos, bfloat16, float);
|
|
|
|
CAST_CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan, bfloat16, float);
|
|
|
|
|
|
|
|
CAST_CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround, bfloat16, float);
|
|
|
|
CAST_CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil, bfloat16, float);
|
|
|
|
CAST_CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor, bfloat16, float);
|
|
|
|
|
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = bfloat16(-1.5 + i);
|
|
|
|
data2[i] = bfloat16(-1.5 + i);
|
|
|
|
}
|
|
|
|
CAST_CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround, bfloat16, float);
|
|
|
|
|
|
|
|
for (int i = 0; i < size; ++i) {
|
|
|
|
data1[i] = bfloat16(internal::random<float>(-87, 88));
|
|
|
|
data2[i] = bfloat16(internal::random<float>(-87, 88));
|
|
|
|
}
|
|
|
|
CAST_CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp, bfloat16, float);
|
|
|
|
|
|
|
|
}
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
void packetmath_notcomplex() {
|
2015-10-13 15:53:46 +08:00
|
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2013-03-21 01:28:40 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4];
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4];
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
Array<Scalar, Dynamic, 1>::Map(data1, PacketSize * 4).setRandom();
|
2010-07-05 22:18:09 +08:00
|
|
|
|
|
|
|
ref[0] = data1[0];
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = (std::min)(ref[0], data1[i]);
|
2010-10-25 22:15:22 +08:00
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload<Packet>(data1))) && "internal::predux_min");
|
2010-07-05 22:18:09 +08:00
|
|
|
|
2015-10-13 15:53:46 +08:00
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin);
|
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax);
|
|
|
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin);
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax);
|
2020-02-10 22:58:37 +08:00
|
|
|
CHECK_CWISE1(numext::abs, internal::pabs);
|
2020-03-20 01:45:20 +08:00
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasAbsDiff, REF_ABS_DIFF, internal::pabsdiff);
|
2010-07-05 22:18:09 +08:00
|
|
|
|
|
|
|
ref[0] = data1[0];
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = (std::max)(ref[0], data1[i]);
|
2010-10-25 22:15:22 +08:00
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_max(internal::pload<Packet>(data1))) && "internal::predux_max");
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[i] = data1[0] + Scalar(i);
|
2015-08-08 01:27:59 +08:00
|
|
|
internal::pstore(data2, internal::plset<Packet>(data1[0]));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, data2, PacketSize) && "internal::plset");
|
2019-01-09 22:25:17 +08:00
|
|
|
|
|
|
|
{
|
|
|
|
unsigned char* data1_bits = reinterpret_cast<unsigned char*>(data1);
|
|
|
|
// predux_all - not needed yet
|
|
|
|
// for (unsigned int i=0; i<PacketSize*sizeof(Scalar); ++i) data1_bits[i] = 0xff;
|
|
|
|
// VERIFY(internal::predux_all(internal::pload<Packet>(data1)) && "internal::predux_all(1111)");
|
|
|
|
// for(int k=0; k<PacketSize; ++k)
|
|
|
|
// {
|
|
|
|
// for (unsigned int i=0; i<sizeof(Scalar); ++i) data1_bits[k*sizeof(Scalar)+i] = 0x0;
|
|
|
|
// VERIFY( (!internal::predux_all(internal::pload<Packet>(data1))) && "internal::predux_all(0101)");
|
|
|
|
// for (unsigned int i=0; i<sizeof(Scalar); ++i) data1_bits[k*sizeof(Scalar)+i] = 0xff;
|
|
|
|
// }
|
|
|
|
|
|
|
|
// predux_any
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (unsigned int i = 0; i < PacketSize * sizeof(Scalar); ++i) data1_bits[i] = 0x0;
|
|
|
|
VERIFY((!internal::predux_any(internal::pload<Packet>(data1))) && "internal::predux_any(0000)");
|
|
|
|
for (int k = 0; k < PacketSize; ++k) {
|
|
|
|
for (unsigned int i = 0; i < sizeof(Scalar); ++i) data1_bits[k * sizeof(Scalar) + i] = 0xff;
|
|
|
|
VERIFY(internal::predux_any(internal::pload<Packet>(data1)) && "internal::predux_any(0101)");
|
|
|
|
for (unsigned int i = 0; i < sizeof(Scalar); ++i) data1_bits[k * sizeof(Scalar) + i] = 0x00;
|
2019-01-09 22:25:17 +08:00
|
|
|
}
|
|
|
|
}
|
2009-03-25 20:26:13 +08:00
|
|
|
}
|
|
|
|
|
2020-08-22 06:52:34 +08:00
|
|
|
template <>
|
|
|
|
void packetmath_notcomplex<bfloat16, typename internal::packet_traits<bfloat16>::type>(){
|
|
|
|
typedef bfloat16 Scalar;
|
|
|
|
typedef internal::packet_traits<bfloat16>::type Packet;
|
|
|
|
typedef internal::packet_traits<Scalar> PacketTraits;
|
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
|
|
|
|
|
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4];
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4];
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
|
|
|
|
Array<Scalar, Dynamic, 1>::Map(data1, PacketSize * 4).setRandom();
|
|
|
|
|
|
|
|
ref[0] = data1[0];
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = (std::min)(ref[0], data1[i]);
|
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload<Packet>(data1))) && "internal::predux_min");
|
|
|
|
|
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin);
|
|
|
|
VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax);
|
|
|
|
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin);
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax);
|
|
|
|
CHECK_CWISE1(numext::abs, internal::pabs);
|
|
|
|
CHECK_CWISE2_IF(PacketTraits::HasAbsDiff, REF_ABS_DIFF, internal::pabsdiff);
|
|
|
|
|
|
|
|
ref[0] = data1[0];
|
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[0] = (std::max)(ref[0], data1[i]);
|
|
|
|
VERIFY(internal::isApprox(ref[0], internal::predux_max(internal::pload<Packet>(data1))) && "internal::predux_max");
|
|
|
|
|
|
|
|
{
|
|
|
|
unsigned char* data1_bits = reinterpret_cast<unsigned char*>(data1);
|
|
|
|
// predux_any
|
|
|
|
for (unsigned int i = 0; i < PacketSize * sizeof(Scalar); ++i) data1_bits[i] = 0x0;
|
|
|
|
VERIFY((!internal::predux_any(internal::pload<Packet>(data1))) && "internal::predux_any(0000)");
|
|
|
|
for (int k = 0; k < PacketSize; ++k) {
|
|
|
|
for (unsigned int i = 0; i < sizeof(Scalar); ++i) data1_bits[k * sizeof(Scalar) + i] = 0xff;
|
|
|
|
VERIFY(internal::predux_any(internal::pload<Packet>(data1)) && "internal::predux_any(0101)");
|
|
|
|
for (unsigned int i = 0; i < sizeof(Scalar); ++i) data1_bits[k * sizeof(Scalar) + i] = 0x00;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet, bool ConjLhs, bool ConjRhs>
|
|
|
|
void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval) {
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2016-10-05 05:22:56 +08:00
|
|
|
|
2011-02-24 02:24:26 +08:00
|
|
|
internal::conj_if<ConjLhs> cj0;
|
|
|
|
internal::conj_if<ConjRhs> cj1;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
internal::conj_helper<Scalar, Scalar, ConjLhs, ConjRhs> cj;
|
|
|
|
internal::conj_helper<Packet, Packet, ConjLhs, ConjRhs> pcj;
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2011-02-24 02:24:26 +08:00
|
|
|
ref[i] = cj0(data1[i]) * cj1(data2[i]);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY(internal::isApprox(ref[i], cj.pmul(data1[i], data2[i])) && "conj_helper pmul");
|
2011-02-24 02:24:26 +08:00
|
|
|
}
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
internal::pstore(pval, pcj.pmul(internal::pload<Packet>(data1), internal::pload<Packet>(data2)));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, pval, PacketSize) && "conj_helper pmul");
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
2011-02-24 02:24:26 +08:00
|
|
|
Scalar tmp = ref[i];
|
|
|
|
ref[i] += cj0(data1[i]) * cj1(data2[i]);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY(internal::isApprox(ref[i], cj.pmadd(data1[i], data2[i], tmp)) && "conj_helper pmadd");
|
2011-02-24 02:24:26 +08:00
|
|
|
}
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
internal::pstore(
|
|
|
|
pval, pcj.pmadd(internal::pload<Packet>(data1), internal::pload<Packet>(data2), internal::pload<Packet>(pval)));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, pval, PacketSize) && "conj_helper pmadd");
|
2011-02-24 02:24:26 +08:00
|
|
|
}
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
void packetmath_complex() {
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2010-07-07 02:54:14 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
const int size = PacketSize * 4;
|
|
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize * 4];
|
|
|
|
EIGEN_ALIGN_MAX Scalar data2[PacketSize * 4];
|
|
|
|
EIGEN_ALIGN_MAX Scalar ref[PacketSize * 4];
|
|
|
|
EIGEN_ALIGN_MAX Scalar pval[PacketSize * 4];
|
2010-07-07 02:54:14 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < size; ++i) {
|
2010-10-25 22:15:22 +08:00
|
|
|
data1[i] = internal::random<Scalar>() * Scalar(1e2);
|
|
|
|
data2[i] = internal::random<Scalar>() * Scalar(1e2);
|
2010-07-07 02:54:14 +08:00
|
|
|
}
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
test_conj_helper<Scalar, Packet, false, false>(data1, data2, ref, pval);
|
|
|
|
test_conj_helper<Scalar, Packet, false, true>(data1, data2, ref, pval);
|
|
|
|
test_conj_helper<Scalar, Packet, true, false>(data1, data2, ref, pval);
|
|
|
|
test_conj_helper<Scalar, Packet, true, true>(data1, data2, ref, pval);
|
2016-10-05 05:22:56 +08:00
|
|
|
|
2011-02-23 21:20:33 +08:00
|
|
|
{
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) ref[i] = Scalar(std::imag(data1[i]), std::real(data1[i]));
|
|
|
|
internal::pstore(pval, internal::pcplxflip(internal::pload<Packet>(data1)));
|
2020-01-11 18:31:21 +08:00
|
|
|
VERIFY(test::areApprox(ref, pval, PacketSize) && "pcplxflip");
|
2011-02-23 21:20:33 +08:00
|
|
|
}
|
2014-03-28 07:03:03 +08:00
|
|
|
}
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename Packet>
|
|
|
|
void packetmath_scatter_gather() {
|
2014-03-28 07:03:03 +08:00
|
|
|
typedef typename NumTraits<Scalar>::Real RealScalar;
|
2018-11-26 21:10:07 +08:00
|
|
|
const int PacketSize = internal::unpacket_traits<Packet>::size;
|
2015-07-29 17:11:23 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar data1[PacketSize];
|
2020-08-22 06:52:34 +08:00
|
|
|
RealScalar refvalue = RealScalar(0);
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
|
|
|
data1[i] = internal::random<Scalar>() / RealScalar(PacketSize);
|
2014-03-28 07:03:03 +08:00
|
|
|
}
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
int stride = internal::random<int>(1, 20);
|
2016-10-05 05:22:56 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
EIGEN_ALIGN_MAX Scalar buffer[PacketSize * 20];
|
|
|
|
memset(buffer, 0, 20 * PacketSize * sizeof(Scalar));
|
2014-03-28 07:03:03 +08:00
|
|
|
Packet packet = internal::pload<Packet>(data1);
|
2014-07-09 22:01:24 +08:00
|
|
|
internal::pscatter<Scalar, Packet>(buffer, packet, stride);
|
2014-03-28 07:03:03 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize * 20; ++i) {
|
|
|
|
if ((i % stride) == 0 && i < stride * PacketSize) {
|
|
|
|
VERIFY(test::isApproxAbs(buffer[i], data1[i / stride], refvalue) && "pscatter");
|
2014-03-28 07:03:03 +08:00
|
|
|
} else {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY(test::isApproxAbs(buffer[i], Scalar(0), refvalue) && "pscatter");
|
2014-03-28 07:03:03 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < PacketSize * 7; ++i) {
|
|
|
|
buffer[i] = internal::random<Scalar>() / RealScalar(PacketSize);
|
2014-03-28 07:03:03 +08:00
|
|
|
}
|
|
|
|
packet = internal::pgather<Scalar, Packet>(buffer, 7);
|
|
|
|
internal::pstore(data1, packet);
|
|
|
|
for (int i = 0; i < PacketSize; ++i) {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
VERIFY(test::isApproxAbs(data1[i], buffer[i * 7], refvalue) && "pgather");
|
2014-03-28 07:03:03 +08:00
|
|
|
}
|
2010-07-07 02:54:14 +08:00
|
|
|
}
|
|
|
|
|
2020-01-11 18:31:21 +08:00
|
|
|
namespace Eigen {
|
|
|
|
namespace test {
|
2018-11-26 21:10:07 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename PacketType>
|
|
|
|
struct runall<Scalar, PacketType, false, false> { // i.e. float or double
|
2018-11-26 21:10:07 +08:00
|
|
|
static void run() {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
packetmath<Scalar, PacketType>();
|
|
|
|
packetmath_scatter_gather<Scalar, PacketType>();
|
|
|
|
packetmath_notcomplex<Scalar, PacketType>();
|
|
|
|
packetmath_real<Scalar, PacketType>();
|
2018-11-26 21:10:07 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename PacketType>
|
|
|
|
struct runall<Scalar, PacketType, false, true> { // i.e. int
|
2018-11-26 21:10:07 +08:00
|
|
|
static void run() {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
packetmath<Scalar, PacketType>();
|
|
|
|
packetmath_scatter_gather<Scalar, PacketType>();
|
|
|
|
packetmath_notcomplex<Scalar, PacketType>();
|
2018-11-26 21:10:07 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
template <typename Scalar, typename PacketType>
|
|
|
|
struct runall<Scalar, PacketType, true, false> { // i.e. complex
|
2018-11-26 21:10:07 +08:00
|
|
|
static void run() {
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
packetmath<Scalar, PacketType>();
|
|
|
|
packetmath_scatter_gather<Scalar, PacketType>();
|
|
|
|
packetmath_complex<Scalar, PacketType>();
|
2018-11-26 21:10:07 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
} // namespace test
|
|
|
|
} // namespace Eigen
|
2018-11-26 21:10:07 +08:00
|
|
|
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
EIGEN_DECLARE_TEST(packetmath) {
|
2018-11-26 21:10:07 +08:00
|
|
|
g_first_pass = true;
|
Added missing NEON pcasts, update packetmath tests.
The NEON `pcast` operators are all implemented and tested for existing
packets. This requires adding a `pcast(a,b,c,d,e,f,g,h)` for casting
between `int64_t` and `int8_t` in `GenericPacketMath.h`.
Removed incorrect `HasHalfPacket` definition for NEON's
`Packet2l`/`Packet2ul`.
Adjustments were also made to the `packetmath` tests. These include
- minor bug fixes for cast tests (i.e. 4:1 casts, only casting for
packets that are vectorizable)
- added 8:1 cast tests
- random number generation
- original had uninteresting 0 to 0 casts for many casts between
floating-point and integers, and exhibited signed overflow
undefined behavior
Tested:
```
$ aarch64-linux-gnu-g++ -static -I./ '-DEIGEN_TEST_PART_ALL=1' test/packetmath.cpp -o packetmath
$ adb push packetmath /data/local/tmp/
$ adb shell "/data/local/tmp/packetmath"
```
2020-06-20 07:07:05 +08:00
|
|
|
for (int i = 0; i < g_repeat; i++) {
|
|
|
|
CALL_SUBTEST_1(test::runner<float>::run());
|
|
|
|
CALL_SUBTEST_2(test::runner<double>::run());
|
|
|
|
CALL_SUBTEST_3(test::runner<int8_t>::run());
|
|
|
|
CALL_SUBTEST_4(test::runner<uint8_t>::run());
|
|
|
|
CALL_SUBTEST_5(test::runner<int16_t>::run());
|
|
|
|
CALL_SUBTEST_6(test::runner<uint16_t>::run());
|
|
|
|
CALL_SUBTEST_7(test::runner<int32_t>::run());
|
|
|
|
CALL_SUBTEST_8(test::runner<uint32_t>::run());
|
|
|
|
CALL_SUBTEST_9(test::runner<int64_t>::run());
|
|
|
|
CALL_SUBTEST_10(test::runner<uint64_t>::run());
|
|
|
|
CALL_SUBTEST_11(test::runner<std::complex<float> >::run());
|
|
|
|
CALL_SUBTEST_12(test::runner<std::complex<double> >::run());
|
|
|
|
CALL_SUBTEST_13((packetmath<half, internal::packet_traits<half>::type>()));
|
|
|
|
CALL_SUBTEST_14((packetmath<bool, internal::packet_traits<bool>::type>()));
|
2020-08-22 06:52:34 +08:00
|
|
|
CALL_SUBTEST_15(test::runner<bfloat16>::run());
|
2018-11-26 21:10:07 +08:00
|
|
|
g_first_pass = false;
|
2008-08-21 04:08:38 +08:00
|
|
|
}
|
|
|
|
}
|