This commit is contained in:
Yangqing Jia 2016-01-28 11:11:45 -08:00
parent c4e47630b1
commit 270c4e1ecd
4 changed files with 23 additions and 27 deletions

View File

@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include <stddef.h>
#include <stdint.h>
#include <vector>
@ -45,4 +46,5 @@ void StopBenchmarkTiming();
void StartBenchmarkTiming();
#define BENCHMARK(f) \
static ::testing::Benchmark* _benchmark_##f __attribute__((unused)) = \
(new ::testing::Benchmark(#f, f))
(new ::testing::Benchmark(#f, f))

View File

@ -17,6 +17,7 @@
#include <regex.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <string>
#include <inttypes.h>
#include <time.h>
@ -27,8 +28,14 @@ static int64_t g_benchmark_total_time_ns;
static int64_t g_benchmark_start_time_ns;
typedef std::map<std::string, ::testing::Benchmark*> BenchmarkMap;
typedef BenchmarkMap::iterator BenchmarkMapIt;
static BenchmarkMap g_benchmarks;
BenchmarkMap& gBenchmarks() {
static BenchmarkMap g_benchmarks;
return g_benchmarks;
}
static int g_name_column_width = 20;
static int Round(int n) {
int base = 1;
while (base*10 < n) {
@ -101,7 +108,7 @@ void Benchmark::Register(const char* name, void (*fn)(int), void (*fn_range)(int
fprintf(stderr, "%s: missing function\n", name_);
exit(EXIT_FAILURE);
}
g_benchmarks.insert(std::make_pair(name, this));
gBenchmarks().insert(std::make_pair(name, this));
}
void Benchmark::Run() {
if (fn_ != NULL) {
@ -183,16 +190,16 @@ void StartBenchmarkTiming() {
}
}
int main(int argc, char* argv[]) {
if (g_benchmarks.empty()) {
if (gBenchmarks().empty()) {
fprintf(stderr, "No benchmarks registered!\n");
exit(EXIT_FAILURE);
}
for (BenchmarkMapIt it = g_benchmarks.begin(); it != g_benchmarks.end(); ++it) {
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
int name_width = static_cast<int>(strlen(it->second->Name()));
g_name_column_width = std::max(g_name_column_width, name_width);
}
bool need_header = true;
for (BenchmarkMapIt it = g_benchmarks.begin(); it != g_benchmarks.end(); ++it) {
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
::testing::Benchmark* b = it->second;
if (b->ShouldRun(argc, argv)) {
if (need_header) {
@ -206,10 +213,10 @@ int main(int argc, char* argv[]) {
if (need_header) {
fprintf(stderr, "No matching benchmarks!\n");
fprintf(stderr, "Available benchmarks:\n");
for (BenchmarkMapIt it = g_benchmarks.begin(); it != g_benchmarks.end(); ++it) {
for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
fprintf(stderr, " %s\n", it->second->Name());
}
exit(EXIT_FAILURE);
}
return 0;
}
}

View File

@ -10,13 +10,6 @@ typedef int TensorIndex;
#define BENCHMARK_RANGE(bench, lo, hi) \
BENCHMARK(bench)->Range(lo, hi)
template <typename... Args>
std::string StrCat(const Args... args) {
std::stringstream ss;
StrCatRecursive(ss, args...);
return ss.str();
}
using Eigen::Tensor;
using Eigen::TensorMap;
@ -305,9 +298,9 @@ template <typename Device> class BenchmarkSuite {
}
size_t m_;
size_t k_;
size_t n_;
TensorIndex m_;
TensorIndex k_;
TensorIndex n_;
float* a_;
float* b_;
float* c_;

View File

@ -10,13 +10,11 @@
#define BM_FuncGPU(FUNC) \
static void BM_##FUNC(int iters, int N) { \
StopBenchmarkTiming(); \
cudaStream_t stream; \
cudaStreamCreate(&stream); \
Eigen::CudaStreamDevice stream; \
Eigen::GpuDevice device(&stream); \
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
cudaDeviceSynchronize(); \
suite.FUNC(iters); \
cudaStreamDestroy(stream); \
} \
BENCHMARK_RANGE(BM_##FUNC, 10, 5000);
@ -35,13 +33,11 @@ BM_FuncGPU(reduction);
#define BM_FuncWithInputDimsGPU(FUNC, D1, D2, D3) \
static void BM_##FUNC##_##D1##x##D2##x##D3(int iters, int N) { \
StopBenchmarkTiming(); \
cudaStream_t stream; \
cudaStreamCreate(&stream); \
Eigen::CudaStreamDevice stream; \
Eigen::GpuDevice device(&stream); \
BenchmarkSuite<Eigen::GpuDevice> suite(device, D1, D2, D3); \
cudaDeviceSynchronize(); \
suite.FUNC(iters); \
cudaStreamDestroy(stream); \
} \
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000);
@ -55,13 +51,11 @@ BM_FuncWithInputDimsGPU(contraction, N, 64, N);
#define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \
static void BM_##FUNC##_##DIM1##x##DIM2(int iters, int N) { \
StopBenchmarkTiming(); \
cudaStream_t stream; \
cudaStreamCreate(&stream); \
Eigen::CudaStreamDevice stream; \
Eigen::GpuDevice device(&stream); \
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
cudaDeviceSynchronize(); \
suite.FUNC(iters, DIM1, DIM2); \
cudaStreamDestroy(stream); \
} \
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2, 128, 5000);