Merged in yuefengz/eigen (pull request PR-370)

Use device's allocate function instead of internal::aligned_malloc.
This commit is contained in:
Benoit Steiner 2018-07-31 22:38:28 +00:00
commit edf46bd7a2

View File

@ -317,7 +317,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align; divup<size_t>(bm_ * bk_ * sizeof(LhsScalar), align) * align;
size_t rhs_size = size_t rhs_size =
divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align; divup<size_t>(bn_ * bk_ * sizeof(RhsScalar), align) * align;
packed_mem_ = static_cast<char*>(internal::aligned_malloc( packed_mem_ = static_cast<char*>(device_.allocate(
(nm0_ * lhs_size + nn0_ * rhs_size) * std::min<size_t>(nk_, P - 1))); (nm0_ * lhs_size + nn0_ * rhs_size) * std::min<size_t>(nk_, P - 1)));
char* mem = static_cast<char*>(packed_mem_); char* mem = static_cast<char*>(packed_mem_);
for (Index x = 0; x < numext::mini<Index>(nk_, P - 1); x++) { for (Index x = 0; x < numext::mini<Index>(nk_, P - 1); x++) {
@ -339,7 +339,7 @@ struct TensorEvaluator<const TensorContractionOp<Indices, LeftArgType, RightArgT
for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m]; for (Index m = 0; m < nm_; m++) delete[] state_kernel_[x][m];
delete[] state_kernel_[x]; delete[] state_kernel_[x];
} }
internal::aligned_free(packed_mem_); device_.deallocate(packed_mem_);
} }
void run() { void run() {