mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-15 07:10:37 +08:00
add a 2D parallelizer
This commit is contained in:
parent
022e2f5ef4
commit
a1e1103328
@ -109,6 +109,8 @@ static void run(int rows, int cols, int depth,
|
||||
// Everything is packed, we can now call the block * panel kernel:
|
||||
ei_gebp_kernel<Scalar, Blocking::mr, Blocking::nr, ei_conj_helper<ConjugateLhs,ConjugateRhs> >()
|
||||
(res+i2, resStride, blockA, blockB, actual_mc, actual_kc, cols);
|
||||
|
||||
// sgemm_kernel(actual_mc, cols, actual_kc, alpha, blockA, allocatedBlockB, res+i2, resStride);
|
||||
}
|
||||
}
|
||||
|
||||
@ -137,12 +139,14 @@ struct ei_gemm_functor
|
||||
: m_lhs(lhs), m_rhs(rhs), m_dest(dest), m_actualAlpha(actualAlpha)
|
||||
{}
|
||||
|
||||
void operator() (int start, int size) const
|
||||
void operator() (int col, int cols, int row=0, int rows=-1) const
|
||||
{
|
||||
Gemm::run(m_lhs.rows(), size, m_lhs.cols(),
|
||||
(const Scalar*)&(m_lhs.const_cast_derived().coeffRef(0,0)), m_lhs.stride(),
|
||||
(const Scalar*)&(m_rhs.const_cast_derived().coeffRef(0,start)), m_rhs.stride(),
|
||||
(Scalar*)&(m_dest.coeffRef(0,start)), m_dest.stride(),
|
||||
if(rows==-1)
|
||||
rows = m_lhs.rows();
|
||||
Gemm::run(rows, cols, m_lhs.cols(),
|
||||
(const Scalar*)&(m_lhs.const_cast_derived().coeffRef(row,0)), m_lhs.stride(),
|
||||
(const Scalar*)&(m_rhs.const_cast_derived().coeffRef(0,col)), m_rhs.stride(),
|
||||
(Scalar*)&(m_dest.coeffRef(row,col)), m_dest.stride(),
|
||||
m_actualAlpha);
|
||||
}
|
||||
|
||||
@ -187,7 +191,7 @@ class GeneralProduct<Lhs, Rhs, GemmProduct>
|
||||
_ActualRhsType,
|
||||
Dest> Functor;
|
||||
|
||||
ei_run_parallel_1d<true>(Functor(lhs, rhs, dst, actualAlpha), this->cols());
|
||||
ei_run_parallel_2d<true>(Functor(lhs, rhs, dst, actualAlpha), this->cols(), this->rows());
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -47,4 +47,44 @@ void ei_run_parallel_1d(const Functor& func, int size)
|
||||
#endif
|
||||
}
|
||||
|
||||
template<bool Parallelize,typename Functor>
|
||||
void ei_run_parallel_2d(const Functor& func, int size1, int size2)
|
||||
{
|
||||
#ifndef EIGEN_HAS_OPENMP
|
||||
func(0,size1, 0,size2);
|
||||
#else
|
||||
if(!Parallelize)
|
||||
return func(0,size1, 0,size2);
|
||||
|
||||
// 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
static const int divide1[17] = { 0, 1, 2, 3, 2, 5, 3, 7, 4, 3, 5, 11, 4, 13, 7, 5, 4};
|
||||
static const int divide2[17] = { 0, 1, 1, 1, 2, 1, 2, 1, 2, 3, 2, 1, 3, 1, 2, 3, 4};
|
||||
|
||||
int threads = omp_get_num_procs();
|
||||
ei_assert(threads<=16 && "too many threads !");
|
||||
int blockSize1 = size1 / divide1[threads];
|
||||
int blockSize2 = size2 / divide2[threads];
|
||||
|
||||
Matrix<int,4,Dynamic> ranges(4,threads);
|
||||
int k = 0;
|
||||
for(int i1=0; i1<divide1[threads]; ++i1)
|
||||
{
|
||||
int blockStart1 = i1*blockSize1;
|
||||
int actualBlockSize1 = std::min(blockSize1, size1 - blockStart1);
|
||||
for(int i2=0; i2<divide2[threads]; ++i2)
|
||||
{
|
||||
int blockStart2 = i2*blockSize2;
|
||||
int actualBlockSize2 = std::min(blockSize2, size2 - blockStart2);
|
||||
ranges.col(k++) << blockStart1, actualBlockSize1, blockStart2, actualBlockSize2;
|
||||
}
|
||||
}
|
||||
|
||||
#pragma omp parallel for schedule(static,1)
|
||||
for(int i=0; i<threads; ++i)
|
||||
{
|
||||
func(ranges.col(i)[0],ranges.col(i)[1],ranges.col(i)[2],ranges.col(i)[3]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // EIGEN_GENERAL_MATRIX_MATRIX_H
|
||||
|
Loading…
Reference in New Issue
Block a user