Add a perfromance regression benchmark for lazyProduct

This commit is contained in:
Gael Guennebaud 2015-10-07 15:51:06 +02:00
parent c6eb17cbe9
commit 247259f805
5 changed files with 346 additions and 0 deletions

View File

@ -0,0 +1,45 @@
#3.0.1
#3.1.1
#3.2.0
3.2.4
#5745:37f59e65eb6c
5891:d8652709345d # introduce AVX
#5893:24b4dc92c6d3 # merge
5895:997c2ef9fc8b # introduce FMA
#5904:e1eafd14eaa1 # complex and AVX
5908:f8ee3c721251 # improve packing with ptranspose
#5921:ca808bb456b0 # merge
#5927:8b1001f9e3ac
5937:5a4ca1ad8c53 # New gebp kernel handling up to 3 packets x 4 register-level blocks
#5949:f3488f4e45b2 # merge
#5969:e09031dccfd9 # Disable 3pX4 kernel on Altivec
#5992:4a429f5e0483 # merge
before-evaluators
#6334:f6a45e5b8b7c # Implement evaluator for sparse outer products
#6639:c9121c60b5c7
#6655:06f163b5221f # Properly detect FMA support on ARM
#6677:700e023044e7 # FMA has been wrongly disabled
#6681:11d31dafb0e3
#6699:5e6e8e10aad1 # merge default to tensors
#6726:ff2d2388e7b9 # merge default to tensors
#6742:0cbd6195e829 # merge default to tensors
#6747:853d2bafeb8f # Generalized the gebp apis
6765:71584fd55762 # Made the blocking computation aware of the l3 cache; Also optimized the blocking parameters to take into account the number of threads used for a computation
#6781:9cc5a931b2c6 # generalized gemv
#6792:f6e1daab600a # ensured that contractions that can be reduced to a matrix vector product
#6844:039efd86b75c # merge tensor
6845:7333ed40c6ef # change prefetching in gebp
#6856:b5be5e10eb7f # merge index conversion
#6893:c3a64aba7c70 # clean blocking size computation
#6898:6fb31ebe6492 # rotating kernel for ARM
6899:877facace746 # rotating kernel for ARM only
#6904:c250623ae9fa # result_of
6921:915f1b1fc158 # fix prefetching change for ARM
6923:9ff25f6dacc6 # prefetching
6933:52572e60b5d3 # blocking size strategy
6937:c8c042f286b2 # avoid redundant pack_rhs
6981:7e5d6f78da59 # dynamic loop swapping
6984:45f26866c091 # rm dynamic loop swapping, adjust lhs's micro panel height to fully exploit L1 cache
6986:a675d05b6f8f # blocking heuristic: block on the rhs in L1 if the lhs fit in L1.
7013:f875e75f07e5 # organize a little our default cache sizes, and use a saner default L1 outside of x86 (10% faster on Nexus 5)

View File

@ -0,0 +1,97 @@
#include <iostream>
#include <fstream>
#include <vector>
#include <Eigen/Core>
#include "../../BenchTimer.h"
using namespace Eigen;
#ifndef SCALAR
#error SCALAR must be defined
#endif
typedef SCALAR Scalar;
template<typename MatA, typename MatB, typename MatC>
inline void lazy_gemm(const MatA &A, const MatB &B, MatC &C)
{
escape((void*)A.data());
escape((void*)B.data());
C.noalias() += A.lazyProduct(B);
escape((void*)C.data());
}
template<int m, int n, int k, int TA>
EIGEN_DONT_INLINE
double bench()
{
typedef Matrix<Scalar,m,k,TA> MatA;
typedef Matrix<Scalar,k,n> MatB;
typedef Matrix<Scalar,m,n> MatC;
MatA A(m,k);
MatB B(k,n);
MatC C(m,n);
A.setRandom();
B.setRandom();
C.setZero();
BenchTimer t;
double up = 1e7*4/sizeof(Scalar);
double tm0 = 10, tm1 = 20;
double flops = 2. * m * n * k;
long rep = std::max(10., std::min(10000., up/flops) );
long tries = std::max(tm0, std::min(tm1, up/flops) );
BENCH(t, tries, rep, lazy_gemm(A,B,C));
return 1e-9 * rep * flops / t.best();
}
template<int m, int n, int k>
double bench_t(int t)
{
if(t)
return bench<m,n,k,RowMajor>();
else
return bench<m,n,k,0>();
}
EIGEN_DONT_INLINE
double bench_mnk(int m, int n, int k, int t)
{
int id = m*10000 + n*100 + k;
switch(id) {
case 10101 : return bench_t< 1, 1, 1>(t); break;
case 20202 : return bench_t< 2, 2, 2>(t); break;
case 30303 : return bench_t< 3, 3, 3>(t); break;
case 40404 : return bench_t< 4, 4, 4>(t); break;
case 50505 : return bench_t< 5, 5, 5>(t); break;
case 60606 : return bench_t< 6, 6, 6>(t); break;
case 70707 : return bench_t< 7, 7, 7>(t); break;
case 80808 : return bench_t< 8, 8, 8>(t); break;
case 90909 : return bench_t< 9, 9, 9>(t); break;
case 101010 : return bench_t<10,10,10>(t); break;
case 111111 : return bench_t<11,11,11>(t); break;
case 121212 : return bench_t<12,12,12>(t); break;
}
return 0;
}
int main(int argc, char **argv)
{
std::vector<double> results;
std::ifstream settings("settings.txt");
long m, n, k, t;
while(settings >> m >> n >> k >> t)
{
//std::cerr << " Testing " << m << " " << n << " " << k << std::endl;
results.push_back( bench_mnk(m, n, k, t) );
}
std::cout << RowVectorXd::Map(results.data(), results.size());
return 0;
}

View File

@ -0,0 +1,37 @@
#!/bin/bash
# base name of the bench
# it reads $1.out
# and generates $1.pdf
WHAT=$1
header="rev "
while read line
do
if [ ! -z '$line' ]; then
header="$header \"$line\""
fi
done < settings.txt
echo $header > $WHAT.out.header
cat $WHAT.out >> $WHAT.out.header
echo "set title '$WHAT'" > $WHAT.gnuplot
echo "set key autotitle columnhead outside " >> $WHAT.gnuplot
echo "set xtics rotate 1" >> $WHAT.gnuplot
echo "set term pdf color rounded enhanced fontscale 0.35 size 7in,5in" >> $WHAT.gnuplot
echo set output "'"$WHAT.pdf"'" >> $WHAT.gnuplot
col=`cat settings.txt | wc -l`
echo "plot for [col=2:$col+1] '$WHAT.out.header' using 0:col:xticlabels(1) with lines" >> $WHAT.gnuplot
echo " " >> $WHAT.gnuplot
gnuplot -persist < $WHAT.gnuplot
# generate a png file
# convert -background white -density 120 -rotate 90 -resize 800 +dither -colors 256 -quality 0 $WHAT.ps -background white -flatten .$WHAT.png
# clean
rm $WHAT.out.header $WHAT.gnuplot

View File

@ -0,0 +1,152 @@
#!/bin/bash
# Examples of environment variables to be set:
# PREFIX="haswell-fma-"
# CXX_FLAGS="-mfma"
# Options:
# -up : enforce the recomputation of existing data, and keep best results as a merging strategy
# -s : recompute selected changesets only and keep bests
if echo "$*" | grep '\-up' > /dev/null; then
update=true
else
update=false
fi
if echo "$*" | grep '\-s' > /dev/null; then
selected=true
else
selected=false
fi
global_args="$*"
if [ $selected == true ]; then
echo "Recompute selected changesets only and keep bests"
elif [ $update == true ]; then
echo "(Re-)Compute all changesets and keep bests"
else
echo "Skip previously computed changesets"
fi
if [ ! -d "eigen_src" ]; then
hg clone https://bitbucket.org/eigen/eigen eigen_src
else
cd eigen_src
hg pull -u
cd ..
fi
if [ ! -z '$CXX' ]; then
CXX=g++
fi
function make_backup
{
if [ -f "$1.out" ]; then
mv "$1.out" "$1.backup"
fi
}
function merge
{
count1=`echo $1 | wc -w`
count2=`echo $2 | wc -w`
if [ $count1 == $count2 ]; then
a=( $1 ); b=( $2 )
res=""
for (( i=0 ; i<$count1 ; i++ )); do
ai=${a[$i]}; bi=${b[$i]}
tmp=`echo "if ($ai > $bi) $ai else $bi " | bc -l`
res="$res $tmp"
done
echo $res
else
echo $1
fi
}
function test_current
{
rev=$1
scalar=$2
name=$3
prev=""
if [ -e "$name.backup" ]; then
prev=`grep $rev "$name.backup" | cut -c 14-`
fi
res=$prev
count_rev=`echo $prev | wc -w`
count_ref=`cat "settings.txt" | wc -l`
if echo "$global_args" | grep "$rev" > /dev/null; then
rev_found=true
else
rev_found=false
fi
# echo $update et $selected et $rev_found because $rev et "$global_args"
# echo $count_rev et $count_ref
if [ $update == true ] || [ $count_rev != $count_ref ] || ([ $selected == true ] && [ $rev_found == true ]); then
if $CXX -O2 -DNDEBUG -march=native $CXX_FLAGS -I eigen_src lazy_gemm.cpp -DSCALAR=$scalar -o $name; then
curr=`./$name`
if [ $count_rev == $count_ref ]; then
echo "merge previous $prev"
echo "with new $curr"
else
echo "got $curr"
fi
res=`merge "$curr" "$prev"`
# echo $res
echo "$rev $res" >> $name.out
else
echo "Compilation failed, skip rev $rev"
fi
else
echo "Skip existing results for $rev / $name"
echo "$rev $res" >> $name.out
fi
}
make_backup $PREFIX"slazy_gemm"
make_backup $PREFIX"dlazy_gemm"
make_backup $PREFIX"clazy_gemm"
cut -f1 -d"#" < changesets.txt | grep -E '[[:alnum:]]' | while read rev
do
if [ ! -z '$rev' ]; then
echo "Testing rev $rev"
cd eigen_src
hg up -C $rev > /dev/null
actual_rev=`hg identify | cut -f1 -d' '`
cd ..
test_current $actual_rev float $PREFIX"slazy_gemm"
test_current $actual_rev double $PREFIX"dlazy_gemm"
test_current $actual_rev "std::complex<double>" $PREFIX"clazy_gemm"
fi
done
echo "Float:"
cat $PREFIX"slazy_gemm.out"
echo ""
echo "Double:"
cat $PREFIX"dlazy_gemm.out"
echo ""
echo "Complex:"
cat $PREFIX"clazy_gemm.out"
echo ""
./make_plot.sh $PREFIX"slazy_gemm"
./make_plot.sh $PREFIX"dlazy_gemm"
./make_plot.sh $PREFIX"clazy_gemm"

View File

@ -0,0 +1,15 @@
1 1 1 0
2 2 2 0
3 3 3 0
4 4 4 0
4 4 4 1
5 5 5 0
6 6 6 0
7 7 7 0
7 7 7 1
8 8 8 0
9 9 9 0
10 10 10 0
11 11 11 0
12 12 12 0
12 12 12 1