This commit is contained in:
Yangqing Jia 2016-01-28 11:11:45 -08:00
parent c4e47630b1
commit 270c4e1ecd
4 changed files with 23 additions and 27 deletions

View File

@ -13,6 +13,7 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <vector> #include <vector>
@ -45,4 +46,5 @@ void StopBenchmarkTiming();
void StartBenchmarkTiming(); void StartBenchmarkTiming();
#define BENCHMARK(f) \ #define BENCHMARK(f) \
static ::testing::Benchmark* _benchmark_##f __attribute__((unused)) = \ static ::testing::Benchmark* _benchmark_##f __attribute__((unused)) = \
(new ::testing::Benchmark(#f, f)) (new ::testing::Benchmark(#f, f))

View File

@ -17,6 +17,7 @@
#include <regex.h> #include <regex.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h>
#include <string> #include <string>
#include <inttypes.h> #include <inttypes.h>
#include <time.h> #include <time.h>
@ -27,8 +28,14 @@ static int64_t g_benchmark_total_time_ns;
static int64_t g_benchmark_start_time_ns; static int64_t g_benchmark_start_time_ns;
typedef std::map<std::string, ::testing::Benchmark*> BenchmarkMap; typedef std::map<std::string, ::testing::Benchmark*> BenchmarkMap;
typedef BenchmarkMap::iterator BenchmarkMapIt; typedef BenchmarkMap::iterator BenchmarkMapIt;
static BenchmarkMap g_benchmarks;
BenchmarkMap& gBenchmarks() {
static BenchmarkMap g_benchmarks;
return g_benchmarks;
}
static int g_name_column_width = 20; static int g_name_column_width = 20;
static int Round(int n) { static int Round(int n) {
int base = 1; int base = 1;
while (base*10 < n) { while (base*10 < n) {
@ -101,7 +108,7 @@ void Benchmark::Register(const char* name, void (*fn)(int), void (*fn_range)(int
fprintf(stderr, "%s: missing function\n", name_); fprintf(stderr, "%s: missing function\n", name_);
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
g_benchmarks.insert(std::make_pair(name, this)); gBenchmarks().insert(std::make_pair(name, this));
} }
void Benchmark::Run() { void Benchmark::Run() {
if (fn_ != NULL) { if (fn_ != NULL) {
@ -183,16 +190,16 @@ void StartBenchmarkTiming() {
} }
} }
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
if (g_benchmarks.empty()) { if (gBenchmarks().empty()) {
fprintf(stderr, "No benchmarks registered!\n"); fprintf(stderr, "No benchmarks registered!\n");
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
for (BenchmarkMapIt it = g_benchmarks.begin(); it != g_benchmarks.end(); ++it) { for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
int name_width = static_cast<int>(strlen(it->second->Name())); int name_width = static_cast<int>(strlen(it->second->Name()));
g_name_column_width = std::max(g_name_column_width, name_width); g_name_column_width = std::max(g_name_column_width, name_width);
} }
bool need_header = true; bool need_header = true;
for (BenchmarkMapIt it = g_benchmarks.begin(); it != g_benchmarks.end(); ++it) { for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
::testing::Benchmark* b = it->second; ::testing::Benchmark* b = it->second;
if (b->ShouldRun(argc, argv)) { if (b->ShouldRun(argc, argv)) {
if (need_header) { if (need_header) {
@ -206,10 +213,10 @@ int main(int argc, char* argv[]) {
if (need_header) { if (need_header) {
fprintf(stderr, "No matching benchmarks!\n"); fprintf(stderr, "No matching benchmarks!\n");
fprintf(stderr, "Available benchmarks:\n"); fprintf(stderr, "Available benchmarks:\n");
for (BenchmarkMapIt it = g_benchmarks.begin(); it != g_benchmarks.end(); ++it) { for (BenchmarkMapIt it = gBenchmarks().begin(); it != gBenchmarks().end(); ++it) {
fprintf(stderr, " %s\n", it->second->Name()); fprintf(stderr, " %s\n", it->second->Name());
} }
exit(EXIT_FAILURE); exit(EXIT_FAILURE);
} }
return 0; return 0;
} }

View File

@ -10,13 +10,6 @@ typedef int TensorIndex;
#define BENCHMARK_RANGE(bench, lo, hi) \ #define BENCHMARK_RANGE(bench, lo, hi) \
BENCHMARK(bench)->Range(lo, hi) BENCHMARK(bench)->Range(lo, hi)
template <typename... Args>
std::string StrCat(const Args... args) {
std::stringstream ss;
StrCatRecursive(ss, args...);
return ss.str();
}
using Eigen::Tensor; using Eigen::Tensor;
using Eigen::TensorMap; using Eigen::TensorMap;
@ -305,9 +298,9 @@ template <typename Device> class BenchmarkSuite {
} }
size_t m_; TensorIndex m_;
size_t k_; TensorIndex k_;
size_t n_; TensorIndex n_;
float* a_; float* a_;
float* b_; float* b_;
float* c_; float* c_;

View File

@ -10,13 +10,11 @@
#define BM_FuncGPU(FUNC) \ #define BM_FuncGPU(FUNC) \
static void BM_##FUNC(int iters, int N) { \ static void BM_##FUNC(int iters, int N) { \
StopBenchmarkTiming(); \ StopBenchmarkTiming(); \
cudaStream_t stream; \ Eigen::CudaStreamDevice stream; \
cudaStreamCreate(&stream); \
Eigen::GpuDevice device(&stream); \ Eigen::GpuDevice device(&stream); \
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \ BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
cudaDeviceSynchronize(); \ cudaDeviceSynchronize(); \
suite.FUNC(iters); \ suite.FUNC(iters); \
cudaStreamDestroy(stream); \
} \ } \
BENCHMARK_RANGE(BM_##FUNC, 10, 5000); BENCHMARK_RANGE(BM_##FUNC, 10, 5000);
@ -35,13 +33,11 @@ BM_FuncGPU(reduction);
#define BM_FuncWithInputDimsGPU(FUNC, D1, D2, D3) \ #define BM_FuncWithInputDimsGPU(FUNC, D1, D2, D3) \
static void BM_##FUNC##_##D1##x##D2##x##D3(int iters, int N) { \ static void BM_##FUNC##_##D1##x##D2##x##D3(int iters, int N) { \
StopBenchmarkTiming(); \ StopBenchmarkTiming(); \
cudaStream_t stream; \ Eigen::CudaStreamDevice stream; \
cudaStreamCreate(&stream); \
Eigen::GpuDevice device(&stream); \ Eigen::GpuDevice device(&stream); \
BenchmarkSuite<Eigen::GpuDevice> suite(device, D1, D2, D3); \ BenchmarkSuite<Eigen::GpuDevice> suite(device, D1, D2, D3); \
cudaDeviceSynchronize(); \ cudaDeviceSynchronize(); \
suite.FUNC(iters); \ suite.FUNC(iters); \
cudaStreamDestroy(stream); \
} \ } \
BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000); BENCHMARK_RANGE(BM_##FUNC##_##D1##x##D2##x##D3, 10, 5000);
@ -55,13 +51,11 @@ BM_FuncWithInputDimsGPU(contraction, N, 64, N);
#define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \ #define BM_FuncWithKernelDimsGPU(FUNC, DIM1, DIM2) \
static void BM_##FUNC##_##DIM1##x##DIM2(int iters, int N) { \ static void BM_##FUNC##_##DIM1##x##DIM2(int iters, int N) { \
StopBenchmarkTiming(); \ StopBenchmarkTiming(); \
cudaStream_t stream; \ Eigen::CudaStreamDevice stream; \
cudaStreamCreate(&stream); \
Eigen::GpuDevice device(&stream); \ Eigen::GpuDevice device(&stream); \
BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \ BenchmarkSuite<Eigen::GpuDevice> suite(device, N); \
cudaDeviceSynchronize(); \ cudaDeviceSynchronize(); \
suite.FUNC(iters, DIM1, DIM2); \ suite.FUNC(iters, DIM1, DIM2); \
cudaStreamDestroy(stream); \
} \ } \
BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2, 128, 5000); BENCHMARK_RANGE(BM_##FUNC##_##DIM1##x##DIM2, 128, 5000);