Implemented the ptranspose function on half floats

This commit is contained in:
Benoit Steiner 2016-02-21 12:44:53 -08:00
parent e644f60907
commit 584832cb3c

View File

@ -226,10 +226,12 @@ template<> EIGEN_DEVICE_FUNC inline half2 pabs<half2>(const half2& a) {
EIGEN_DEVICE_FUNC inline void
ptranspose(PacketBlock<half2,2>& kernel) {
assert(false && "tbd");
// half tmp = kernel.packet[0].y;
// kernel.packet[0].y = kernel.packet[1].x;
// kernel.packet[1].x = tmp;
half a1 = __low2half(kernel.packet[0]);
half a2 = __high2half(kernel.packet[0]);
half b1 = __low2half(kernel.packet[1]);
half b2 = __high2half(kernel.packet[1]);
kernel.packet[0] = __halves2half2(a1, b1);
kernel.packet[1] = __halves2half2(a2, b2);
}
} // end namespace internal