Converting fixed float type into template type for TensorContraction.

This commit is contained in:
Mehdi Goli 2017-02-24 18:13:30 +00:00
parent 89dfd51fae
commit 0b7875f137

View File

@ -230,13 +230,13 @@ typename HostExpr::Index LocalThreadSizeM, typename HostExpr::Index LocalThreadS
const Index nGroupId = itemID.get_group(1); // Work-group ID localCol const Index nGroupId = itemID.get_group(1); // Work-group ID localCol
const Index linearLocalThreadId = nLocalThreadId*LocalThreadSizeM + mLocalThreadId; // linear local thread ID const Index linearLocalThreadId = nLocalThreadId*LocalThreadSizeM + mLocalThreadId; // linear local thread ID
// Allocate register space // Allocate register space
float privateLhs; LhsScalar privateLhs;
float privateRhs[WorkLoadPerThreadN]; RhsScalar privateRhs[WorkLoadPerThreadN];
float privateRes[WorkLoadPerThreadM][WorkLoadPerThreadN]; OutScalar privateRes[WorkLoadPerThreadM][WorkLoadPerThreadN];
// Initialise the privateResumulation registers // Initialise the privateResumulation registers
for (Index wLPTM=0; wLPTM<WorkLoadPerThreadM; wLPTM++) { for (Index wLPTM=0; wLPTM<WorkLoadPerThreadM; wLPTM++) {
for (Index wLPTN=0; wLPTN<WorkLoadPerThreadN; wLPTN++) { for (Index wLPTN=0; wLPTN<WorkLoadPerThreadN; wLPTN++) {
privateRes[wLPTM][wLPTN] = 0.0f; privateRes[wLPTM][wLPTN] = static_cast<OutScalar>(0);
} }
} }