Fixed a boundary condition bug in the outer reduction kernel

This commit is contained in:
Benoit Steiner 2016-01-14 09:29:48 -08:00
parent 9f013a9d86
commit 8fe2532e70

View File

@ -241,7 +241,7 @@ __global__ void OuterReductionKernel(Reducer reducer, const Self input, Index nu
}
// Do the reduction.
const Index max_iter = divup<Index>(num_coeffs_to_reduce, NumPerThread) * num_preserved_coeffs;
const Index max_iter = num_preserved_coeffs * numext::maxi<Index>(1, (num_coeffs_to_reduce - NumPerThread + 1));
for (Index i = thread_id; i < max_iter; i += num_threads) {
const Index input_col = i % num_preserved_coeffs;
const Index input_row = (i / num_preserved_coeffs) * NumPerThread;