Break loop dependence in TensorGenerator block access

This commit is contained in:
Eugene Zhulenev 2019-11-11 10:32:57 -08:00
parent ebf04fb3e8
commit c952b8dfda

View File

@ -269,20 +269,35 @@ struct TensorEvaluator<const TensorGeneratorOp<Generator, ArgType>, Device>
CoeffReturnType* block_buffer = block_storage.data(); CoeffReturnType* block_buffer = block_storage.data();
static const int packet_size = PacketType<CoeffReturnType, Device>::size;
static const int inner_dim = is_col_major ? 0 : NumDims - 1;
const Index inner_dim_size = it[0].size;
const Index inner_dim_vectorized = inner_dim_size - packet_size;
while (it[NumDims - 1].count < it[NumDims - 1].size) { while (it[NumDims - 1].count < it[NumDims - 1].size) {
// Generate data for the inner-most dimension. Index i = 0;
for (Index i = 0; i < it[0].size; ++i) { // Generate data for the vectorized part of the inner-most dimension.
*(block_buffer + offset + i) = m_generator(coords); for (; i <= inner_dim_vectorized; i += packet_size) {
coords[is_col_major ? 0 : NumDims - 1]++; for (Index j = 0; j < packet_size; ++j) {
array<Index, NumDims> j_coords = coords; // Break loop dependence.
j_coords[inner_dim] += j;
*(block_buffer + offset + i + j) = m_generator(j_coords);
}
coords[inner_dim] += packet_size;
} }
coords[is_col_major ? 0 : NumDims - 1] = // Finalize non-vectorized part of the inner-most dimension.
initial_coords[is_col_major ? 0 : NumDims - 1]; for (; i < inner_dim_size; ++i) {
*(block_buffer + offset + i) = m_generator(coords);
coords[inner_dim]++;
}
coords[inner_dim] = initial_coords[inner_dim];
// For the 1d tensor we need to generate only one inner-most dimension. // For the 1d tensor we need to generate only one inner-most dimension.
if (NumDims == 1) break; if (NumDims == 1) break;
// Update offset. // Update offset.
for (Index i = 1; i < NumDims; ++i) { for (i = 1; i < NumDims; ++i) {
if (++it[i].count < it[i].size) { if (++it[i].count < it[i].size) {
offset += it[i].stride; offset += it[i].stride;
coords[is_col_major ? i : NumDims - 1 - i]++; coords[is_col_major ? i : NumDims - 1 - i]++;