Fix shorten-64-to-32 warning. Use regular memcpy if num_threads==0.

This commit is contained in:
Rasmus Munk Larsen 2018-12-12 14:45:31 -08:00
parent f582ea3579
commit dd6d65898a

View File

@ -87,13 +87,13 @@ struct ThreadPoolDevice {
const size_t kMinBlockSize = 32768;
typedef TensorCostModel<ThreadPoolDevice> CostModel;
const size_t num_threads = CostModel::numThreads(n, TensorOpCost(1.0, 1.0, 0), 4);
if (n <= kMinBlockSize || num_threads == 1) {
if (n <= kMinBlockSize || num_threads < 2) {
::memcpy(dst, src, n);
} else {
const char* src_ptr = static_cast<const char*>(src);
char* dst_ptr = static_cast<char*>(dst);
const size_t blocksize = (n + (num_threads - 1)) / num_threads;
Barrier barrier(num_threads - 1);
Barrier barrier(static_cast<int>(num_threads - 1));
// Launch the last 3 blocks on worker threads.
for (size_t i = 1; i < num_threads; ++i) {
enqueue_with_barrier(&barrier, [n, i, src_ptr, dst_ptr, blocksize] {