mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-27 07:29:52 +08:00
bugfix
This commit is contained in:
parent
c4e47630b1
commit
270c4e1ecd
@ -13,6 +13,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
|
||||
@ -46,3 +47,4 @@ void StartBenchmarkTiming();
|
||||
#define BENCHMARK(f) \
|
||||
static ::testing::Benchmark* _benchmark_##f __attribute__((unused)) = \
|
||||
(new ::testing::Benchmark(#f, f))
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include <regex.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <string>
|
||||
#include <inttypes.h>
|
||||
#include <time.h>
|
||||
@ -27,8 +28,14 @@ static int64_t g_benchmark_total_time_ns;
|
||||
static int64_t g_benchmark_start_time_ns;
|
||||
typedef std::map<std::string, ::testing::Benchmark*> BenchmarkMap;
|
||||
typedef BenchmarkMap::iterator BenchmarkMapIt;
|
||||
|
||||
BenchmarkMap& gBenchmarks() {
|
||||
static BenchmarkMap g_benchmarks;
|
||||
return g_benchmarks;
|
||||
}
|
||||
|
||||
static int g_name_column_width = 20;
|
||||
|
||||
static int Round(int n) {
|
||||
int base = 1;
|
||||
while (base*10 < n) {
|
||||
@ -101,7 +108,7 @@ void Benchmark::Register(const char* name, void (*fn)(int), void (*fn_range)(int
|
||||
fprintf(stderr, "%s: missing function\n", name_);
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
g_benchmarks.insert(std::make_pair(name, this));
|
||||
gBenchmarks().insert(std::make_pair(name, this));
|
||||
}
|
||||
void Benchmark::Run() {
|
||||
if (fn_ != NULL) {
|
||||
@ -183,16 +190,16 @@ void StartBenchmarkTiming() {
|
||||
}
|
||||
}
|
||||
int main(int argc, char* argv[]) {
|
||||
if (g_benchmarks.empty()) {
|
||||
if (gBenchmarks().empty()) {
|
||||
fprintf(stderr, "No benchmarks registered!\n");
|
||||
exit(EXIT_FAILURE);
|
||||
}
|
||||
for (BenchmarkMapIt it = g_benchmarks.begin(); it != g_benchmarks.end(); ++it) {
|
||||
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
|
||||
int name_width = static_cast<int>(strlen(it->second->Name()));
|
||||
g_name_column_width = std::max(g_name_column_width, name_width);
|
||||
}
|
||||
bool need_header = true;
|
||||
for (BenchmarkMapIt it = g_benchmarks.begin(); it != g_benchmarks.end(); ++it) {
|
||||
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
|
||||
::testing::Benchmark* b = it->second;
|
||||
if (b->ShouldRun(argc, argv)) {
|
||||
if (need_header) {
|
||||
@ -206,7 +213,7 @@ int main(int argc, char* argv[]) {
|
||||
if (need_header) {
|
||||
fprintf(stderr, "No matching benchmarks!\n");
|
||||
fprintf(stderr, "Available benchmarks:\n");
|
||||
for (BenchmarkMapIt it = g_benchmarks.begin(); it != g_benchmarks.end(); ++it) {
|
||||
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
|
||||
fprintf(stderr, " %s\n", it->second->Name());
|
||||
}
|
||||
exit(EXIT_FAILURE);
|
||||
|
@ -10,13 +10,6 @@ typedef int TensorIndex;
|
||||
#define BENCHMARK_RANGE(bench, lo, hi) \
|
||||
BENCHMARK(bench)->Range(lo, hi)
|
||||
|
||||
template <typename... Args>
|
||||
std::string StrCat(const Args... args) {
|
||||
std::stringstream ss;
|
||||
StrCatRecursive(ss, args...);
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
using Eigen::Tensor;
|
||||
using Eigen::TensorMap;
|
||||
|
||||
@ -305,9 +298,9 @@ template <typename Device> class BenchmarkSuite {
|
||||
}
|
||||
|
||||
|
||||
size_t m_;
|
||||
size_t k_;
|
||||
size_t n_;
|
||||
TensorIndex m_;
|
||||
TensorIndex k_;
|
||||
TensorIndex n_;
|
||||
float* a_;
|
||||
float* b_;
|
||||
float* c_;
|
||||
|
@ -10,13 +10,11 @@
|
||||
#define BM_FuncGPU(FUNC) \
|
||||
static void BM_##FUNC(int iters, int N) { \
|
||||
StopBenchmarkTiming(); \
|
||||
cudaStream_t stream; \
|
||||
cudaStreamCreate(&stream); \
|
||||
Eigen::CudaStreamDevice stream; \
|
||||
Eigen::GpuDevice device(&stream); \
|
||||
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
|
||||
cudaDeviceSynchronize(); \
|
||||
suite.FUNC(iters); \
|
||||
cudaStreamDestroy(stream); \
|
||||
} \
|
||||
BENCHMARK_RANGE(BM_##FUNC, 10, 5000);
|
||||
|
||||
@ -35,13 +33,11 @@ BM_FuncGPU(reduction);
|
||||
#define BM_FuncWithInputDimsGPU(FUNC, D1, D2, D3) \
|
||||
static void BM_##FUNC##_##D1##x##D2##x##D3(int iters, int N) { \
|
||||
StopBenchmarkTiming(); \
|
||||
cudaStream_t stream; \
|
||||
cudaStreamCreate(&stream); \
|
||||
Eigen::CudaStreamDevice stream; \
|
||||
Eigen::GpuDevice device(&stream); \
|
||||
BenchmarkSuite<Eigen::GpuDevice> suite(device, D1, D2, D3); \
|
||||
cudaDeviceSynchronize(); \
|
||||
suite.FUNC(iters); \
|
||||
cudaStreamDestroy(stream); \
|
||||
} \
|
||||
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000);
|
||||
|
||||
@ -55,13 +51,11 @@ BM_FuncWithInputDimsGPU(contraction, N, 64, N);
|
||||
#define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \
|
||||
static void BM_##FUNC##_##DIM1##x##DIM2(int iters, int N) { \
|
||||
StopBenchmarkTiming(); \
|
||||
cudaStream_t stream; \
|
||||
cudaStreamCreate(&stream); \
|
||||
Eigen::CudaStreamDevice stream; \
|
||||
Eigen::GpuDevice device(&stream); \
|
||||
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
|
||||
cudaDeviceSynchronize(); \
|
||||
suite.FUNC(iters, DIM1, DIM2); \
|
||||
cudaStreamDestroy(stream); \
|
||||
} \
|
||||
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2, 128, 5000);
|
||||
|
Loading…
Reference in New Issue
Block a user