resurrected tvmet, added mt4, intel's MKL and handcoded vectorized backends

in the benchmark suite
This commit is contained in:
Gael Guennebaud 2008-07-10 18:28:50 +00:00
parent 2b53fd4d53
commit 6f71ef8277
37 changed files with 599 additions and 671 deletions

View File

@ -28,56 +28,47 @@ include_directories(
MACRO(BTL_ADD_BENCH targetname)
ADD_EXECUTABLE(${ARGV})
ADD_TEST(${targetname} "${targetname}")
foreach(_current_var ${ARGN})
set(_last_var ${_current_var})
endforeach(_current_var)
set(_sources ${ARGN})
list(LENGTH _sources _argn_length)
list(REMOVE_ITEM _sources ON OFF TRUE FALSE)
list(LENGTH _sources _src_length)
if (${_argn_length} EQUAL ${_src_length})
set(_last_var ON)
endif (${_argn_length} EQUAL ${_src_length})
OPTION(BUILD_${targetname} "Build benchmark ${targetname}" ${_last_var})
message(STATUS ${targetname} " : " ${ARGN} " => " ${_sources} " => " ${_last_var})
IF(BUILD_${targetname})
ADD_EXECUTABLE(${targetname} ${_sources})
ADD_TEST(${targetname} "${targetname}")
ENDIF(BUILD_${targetname})
ENDMACRO(BTL_ADD_BENCH)
ENABLE_TESTING()
# Eigen2
find_package(Eigen2)
if (EIGEN2_FOUND)
macro_optional_add_subdirectory(libs/eigen2 ON)
macro_optional_add_subdirectory(libs/tiny_eigen2 OFF)
endif (EIGEN2_FOUND)
# GMM++
find_package(GMM)
if (GMM_FOUND)
macro_optional_add_subdirectory(libs/gmm ON)
endif (GMM_FOUND)
# Boost
find_package(Boost)
if (Boost_FOUND)
include_directories(${Boost_INCLUDE_DIRS})
macro_optional_add_subdirectory(libs/ublas ON)
endif (Boost_FOUND)
# blitz
find_package(Blitz)
if (BLITZ_FOUND)
macro_optional_add_subdirectory(libs/blitz ON)
macro_optional_add_subdirectory(libs/tiny_blitz OFF)
endif (BLITZ_FOUND)
# tvmet
find_package(Tvmet)
if (TVMET_FOUND)
macro_optional_add_subdirectory(libs/tvmet OFF)
endif (TVMET_FOUND)
# cblas
find_package(CBLAS)
if (CBLAS_FOUND)
macro_optional_add_subdirectory(libs/C_BLAS ON)
endif (CBLAS_FOUND)
macro_optional_add_subdirectory(libs/f77 ON)
macro_optional_add_subdirectory(libs/C ON)
macro_optional_add_subdirectory(libs/STL ON)
macro_optional_add_subdirectory(libs/STL_algo ON)
add_subdirectory(libs/eigen2)
add_subdirectory(libs/hand_vec)
add_subdirectory(libs/gmm)
add_subdirectory(libs/mtl4)
add_subdirectory(libs/ublas)
add_subdirectory(libs/blitz)
add_subdirectory(libs/tvmet)
add_subdirectory(libs/C_BLAS)
add_subdirectory(libs/f77)
add_subdirectory(libs/C)
add_subdirectory(libs/STL)
add_subdirectory(libs/STL_algo)
add_subdirectory(data)

View File

@ -0,0 +1,53 @@
if (MKL_INCLUDES AND MKL_LIBRARIES)
set(CBLAS_FIND_QUIETLY TRUE)
endif (MKL_INCLUDES AND MKL_LIBRARIES)
find_path(MKL_INCLUDES
NAMES
cblas.h
PATHS
$ENV{MKLDIR}/include
${INCLUDE_INSTALL_DIR}
)
message(STATUS ${CMAKE_HOST_SYSTEM_PROCESSOR})
if(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
message(STATUS "64 bits")
find_library(MKL_LIBRARIES
mkl_core
PATHS
$ENV{MKLLIB}
/opt/intel/mkl/*/lib/em64t
${LIB_INSTALL_DIR}
)
if(MKL_LIBRARIES)
set(MKL_LIBRARIES ${MKL_LIBRARIES} mkl_intel_lp64 mkl_sequential guide pthread)
endif(MKL_LIBRARIES)
message(STATUS ${MKL_LIBRARIES})
else(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
message(STATUS "32 bits")
find_library(MKL_LIBRARIES
mkl_core mkl_intel mkl_sequential guide pthread
PATHS
$ENV{MKLLIB}
/opt/intel/mkl/*/lib/32
${LIB_INSTALL_DIR}
)
endif(${CMAKE_HOST_SYSTEM_PROCESSOR} STREQUAL "x86_64")
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(MKL DEFAULT_MSG
MKL_INCLUDES MKL_LIBRARIES)
mark_as_advanced(MKL_INCLUDES MKL_LIBRARIES)

View File

@ -0,0 +1,31 @@
# - Try to find eigen2 headers
# Once done this will define
#
# MTL4_FOUND - system has eigen2 lib
# MTL4_INCLUDE_DIR - the eigen2 include directory
#
# Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
# Adapted from FindEigen.cmake:
# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
# Redistribution and use is allowed according to the terms of the BSD license.
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
if (MTL4_INCLUDE_DIR)
# in cache already
set(MTL4_FOUND TRUE)
else (MTL4_INCLUDE_DIR)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(MTL4 DEFAULT_MSG MTL4_INCLUDE_DIR)
find_path(MTL4_INCLUDE_DIR NAMES boost/numeric/mtl/mtl.hpp
PATHS
${INCLUDE_INSTALL_DIR}
)
mark_as_advanced(MTL4_INCLUDE_DIR)
endif(MTL4_INCLUDE_DIR)

View File

@ -0,0 +1,32 @@
# - Try to find tvmet headers
# Once done this will define
#
# TVMET_FOUND - system has tvmet lib
# TVMET_INCLUDE_DIR - the tvmet include directory
#
# Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
# Adapted from FindEigen.cmake:
# Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
# Redistribution and use is allowed according to the terms of the BSD license.
# For details see the accompanying COPYING-CMAKE-SCRIPTS file.
if (TVMET_INCLUDE_DIR)
# in cache already
set(TVMET_FOUND TRUE)
else (TVMET_INCLUDE_DIR)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(Tvmet DEFAULT_MSG TVMET_INCLUDE_DIR)
find_path(TVMET_INCLUDE_DIR NAMES tvmet/tvmet.h
PATHS
${TVMETDIR}/
${INCLUDE_INSTALL_DIR}
)
mark_as_advanced(TVMET_INCLUDE_DIR)
endif(TVMET_INCLUDE_DIR)

View File

@ -15,24 +15,26 @@ done
for FILE in $DATA_FILE
do
if [ $FILE != $LAST ]
then
echo "'"$FILE"'" ",\\" >> $WHAT.gnuplot
fi
if [ $FILE != $LAST ]
then
echo "'"$FILE"'" ",\\" >> $WHAT.gnuplot
fi
done
echo "'"$LAST"'" >> $WHAT.gnuplot
echo set term postscript color >> $WHAT.gnuplot
echo set output "'"../${DIR}/$WHAT.ps"'" >> $WHAT.gnuplot
#echo set term pbm color >> $WHAT.gnuplot
#echo set output "'"../${DIR}/$WHAT.ppm"'" >> $WHAT.gnuplot
# echo set term pdf color >> $WHAT.gnuplot
# echo set output "'"../${DIR}/$WHAT.pdf"'" >> $WHAT.gnuplot
# echo set term png truecolor size 1024,768 >> $WHAT.gnuplot
# echo set output "'"../${DIR}/$WHAT.png"'" >> $WHAT.gnuplot
echo plot \\ >> $WHAT.gnuplot
for FILE in $DATA_FILE
do
if [ $FILE != $LAST ]
then
echo "'"$FILE"'" ",\\" >> $WHAT.gnuplot
fi
if [ $FILE != $LAST ]
then
echo "'"$FILE"'" ",\\" >> $WHAT.gnuplot
fi
done
echo "'"$LAST"'" >> $WHAT.gnuplot

View File

@ -18,8 +18,10 @@
//
#ifndef BTL_HH
#define BTL_HH
#include "bench_parameter.hh"
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include "utilities.h"

View File

@ -3,12 +3,12 @@
// Author : L. Plagne <laurent.plagne@edf.fr)> from boost lib
// Copyright (C) EDF R&D, lun sep 30 14:23:17 CEST 2002
//=====================================================
//
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@ -16,18 +16,22 @@
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
//
// simple_time extracted from the boost library
//
//
#ifndef _PORTABLE_TIMER_HH
#define _PORTABLE_TIMER_HH
#include <ctime>
#include <cstdlib>
#include <time.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <unistd.h>
#include <sys/times.h>
#define USEC_IN_SEC 1000000
@ -45,37 +49,37 @@ class Portable_Timer
_utime_usec_stop(-1)
{
}
void start()
{
void start()
{
int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
_start_time = std::clock();
_start_time = std::clock();
_utime_sec_start = resourcesUsage.ru_utime.tv_sec ;
_utime_usec_start = resourcesUsage.ru_utime.tv_usec ;
}
void stop()
{
void stop()
{
int status=getrusage(RUSAGE_SELF, &resourcesUsage) ;
_stop_time = std::clock();
_stop_time = std::clock();
_utime_sec_stop = resourcesUsage.ru_utime.tv_sec ;
_utime_usec_stop = resourcesUsage.ru_utime.tv_usec ;
}
double elapsed()
{
return double(_stop_time - _start_time) / CLOCKS_PER_SEC;
return double(_stop_time - _start_time) / CLOCKS_PER_SEC;
}
double user_time()
{
long tot_utime_sec=_utime_sec_stop-_utime_sec_start;
@ -83,17 +87,17 @@ class Portable_Timer
return double(tot_utime_sec)+ double(tot_utime_usec)/double(USEC_IN_SEC) ;
}
private:
struct rusage resourcesUsage ;
long _utime_sec_start ;
long _utime_usec_start ;
long _utime_sec_stop ;
long _utime_usec_stop ;
std::clock_t _start_time;
std::clock_t _stop_time;

View File

@ -1,118 +0,0 @@
//=====================================================
// File : ATLAS_LU_solve_interface.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:22 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ATLAS_LU_solve_interface_HH
#define ATLAS_LU_solve_interface_HH
#include "ATLAS_interface.hh"
extern "C"
{
#include <atlas_level1.h>
#include <atlas_level2.h>
#include <atlas_level3.h>
#include "cblas.h"
#include <atlas_lapack.h>
}
template<class real>
class ATLAS_LU_solve_interface : public ATLAS_interface<real>
{
public :
typedef typename ATLAS_interface<real>::gene_matrix gene_matrix;
typedef typename ATLAS_interface<real>::gene_vector gene_vector;
typedef int * Pivot_Vector;
inline static void new_Pivot_Vector(Pivot_Vector & pivot, int N)
{
pivot = new int[N];
}
inline static void free_Pivot_Vector(Pivot_Vector & pivot)
{
delete pivot;
}
inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
{
int error=ATL_dgetrf(CblasColMajor,N,N,LU,N,pivot);
}
inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, const gene_vector &B, gene_vector X, int N)
{
copy_vector(B,X,N);
ATL_dgetrs(CblasColMajor,CblasNoTrans,N,1,LU,N,pivot,X,N);
}
};
template<>
class ATLAS_LU_solve_interface<float> : public ATLAS_interface<float>
{
public :
typedef int * Pivot_Vector;
inline static void new_Pivot_Vector(Pivot_Vector & pivot, int N)
{
pivot = new int[N];
}
inline static void free_Pivot_Vector(Pivot_Vector & pivot)
{
delete pivot;
}
inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
{
int error=ATL_sgetrf(CblasColMajor,N,N,LU,N,pivot);
}
inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, const gene_vector &B, gene_vector X, int N)
{
copy_vector(B,X,N);
ATL_sgetrs(CblasColMajor,CblasNoTrans,N,1,LU,N,pivot,X,N);
}
};
#endif

View File

@ -1,120 +0,0 @@
//=====================================================
// File : ATLAS_interface.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:21 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef ATLAS_PRODUIT_MATRICE_VECTEUR_HH
#define ATLAS_PRODUIT_MATRICE_VECTEUR_HH
#include "f77_interface_base.hh"
#include <string>
extern "C"
{
#include <atlas_level1.h>
#include <atlas_level2.h>
#include <atlas_level3.h>
#include "cblas.h"
}
template<class real>
class ATLAS_interface : public f77_interface_base<real>
{
public :
typedef typename f77_interface_base<real>::gene_matrix gene_matrix;
typedef typename f77_interface_base<real>::gene_vector gene_vector;
static inline std::string name( void )
{
return "ATLAS";
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
ATL_dgemv(CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
}
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
{
ATL_dgemm(CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
}
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
{
ATL_dgemm(CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
{
ATL_dgemm(CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
{
ATL_daxpy(N,coef,X,1,Y,1);
}
};
template<>
class ATLAS_interface<float> : public f77_interface_base<float>
{
public :
static inline std::string name( void )
{
return "ATLAS";
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
ATL_sgemv(CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
}
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
{
ATL_sgemm(CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
}
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
{
ATL_sgemm(CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
{
ATL_sgemm(CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N)
{
ATL_saxpy(N,coef,X,1,Y,1);
}
};
#endif

View File

@ -1,4 +0,0 @@
include_directories(${BLITZ_INCLUDES})
add_executable(btl_blitz main.cpp)
target_link_libraries(btl_blitz ${BLITZ_LIBRARIES})

View File

@ -1,33 +0,0 @@
Bonjour à tous,
Une dizaine de candidats Neptune se sont déjà déclarés pour la formation C++
(sur la base de 2 jours/semaine pendant 1 mois).
Il faut faire une proposition de date pour la formation. Les vacances scolaires zone C (Paris)
se terminent le 22 avril (au matin) nous pourrions commencer ce jour.
A priori il me semble que deux jours consécutifs soient préférables. Je propose les mardi et mercredi
de chaque semaine. Sachant qu'il y a 2 jeudi (1er et 8 mai) consécutifs qui sont fériés.
Dans cette hypothèse les dates de formation seraient :
Mardi 22 avril (Vincent/Marc)
Mercredi 23 avril (Marc)
Mardi 29 avril (Marc/Antoine)
Mercredi 30 avril (Antoine)
Mardi 6 mai (Antoine)
Mercredi 7 mai (Antoine)
Mardi 13 mai (Laurent)
Mercredi 14 mai (Laurent)
J'ai mis entre parenthèse les intervenants principaux (on doit choisir le deuxième formateur pour chaque session).
Qu'en pensez-vous ?
Je dois toujours présenter un programme, pouvez-vous me donner vos programmes respectifs...?
Laurent

View File

@ -34,11 +34,32 @@ public :
static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
{
// for (int i=0;i<N;i++)
// {
// real somme = 0.0;
// for (int j=0;j<N;j++)
// somme += A[j*N+i] * B[j];
// X[i] = somme;
// }
for (int i=0;i<N;i++)
X[i] = 0;
for (int i=0;i<N;i++)
{
real tmp = B[i];
int iN = i*N;
for (int j=0;j<N;j++)
X[j] += tmp * A[j+iN];
}
}
static inline void atv_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
{
for (int i=0;i<N;i++)
{
int iN = i*N;
real somme = 0.0;
for (int j=0;j<N;j++)
somme += A[j*N+i] * B[j];
somme += A[iN+j] * B[j];
X[i] = somme;
}
}

View File

@ -21,6 +21,7 @@
#include "bench.hh"
#include "C_interface.hh"
#include "action_matrix_vector_product.hh"
#include "action_atv_product.hh"
#include "action_matrix_matrix_product.hh"
#include "action_axpy.hh"
#include "action_ata_product.hh"
@ -34,6 +35,7 @@ int main()
{
bench<Action_matrix_vector_product<C_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<C_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<C_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<C_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<C_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

View File

@ -1,4 +1,20 @@
include_directories(${CBLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77)
btl_add_bench(btl_cblas main.cpp)
target_link_libraries(btl_cblas ${CBLAS_LIBRARIES})
find_package(CBLAS)
if (CBLAS_FOUND)
include_directories(${CBLAS_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77)
btl_add_bench(btl_cblas main.cpp)
if(BUILD_btl_cblas)
target_link_libraries(btl_cblas ${CBLAS_LIBRARIES})
set_target_properties(btl_cblas PROPERTIES COMPILE_FLAGS "-DCBLASNAME=ATLAS")
endif(BUILD_btl_cblas)
endif (CBLAS_FOUND)
find_package(MKL)
if (MKL_FOUND)
include_directories(${MKL_INCLUDES} ${PROJECT_SOURCE_DIR}/libs/f77)
btl_add_bench(btl_mkl main.cpp)
if(BUILD_btl_mkl)
target_link_libraries(btl_mkl ${MKL_LIBRARIES})
set_target_properties(btl_mkl PROPERTIES COMPILE_FLAGS "-DCBLASNAME=INTEL_MKL")
endif(BUILD_btl_mkl)
endif (MKL_FOUND)

View File

@ -27,6 +27,9 @@ extern "C"
#include "cblas.h"
}
#define MAKE_STRING2(S) #S
#define MAKE_STRING(S) MAKE_STRING2(S)
template<class real>
class C_BLAS_interface : public f77_interface_base<real>
{
@ -37,7 +40,7 @@ public :
static inline std::string name( void )
{
return "C_BLAS";
return MAKE_STRING(CBLASNAME);
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
@ -84,7 +87,7 @@ public :
static inline std::string name( void )
{
return "C_BLAS";
return MAKE_STRING(CBLASNAME);
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)

View File

@ -22,6 +22,7 @@
#include "bench.hh"
#include "action_matrix_vector_product.hh"
#include "action_matrix_matrix_product.hh"
#include "action_atv_product.hh"
#include "action_axpy.hh"
#include "action_lu_solve.hh"
#include "action_ata_product.hh"
@ -36,6 +37,8 @@ int main()
bench<Action_matrix_vector_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<C_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

View File

@ -1,127 +0,0 @@
//=====================================================
// File : INTEL_BLAS_LU_solve_interface.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:29 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef INTEL_BLAS_LU_solve_interface_HH
#define INTEL_BLAS_LU_solve_interface_HH
#include "INTEL_BLAS_interface.hh"
extern "C"
{
// void dgetrf_(int *M, int *N, double *A, int *LDA, int *IPIV, int *INFO);
// void dgetrs_(char *TRANS, int *N, int *NRHS, double *A, int *LDA, int *IPIV, double *B, int *LDB, int *INFO);
// void sgetrf_(int *M, int *N, float *A, int *LDA, int *IPIV, int *INFO);
// void sgetrs_(char *TRANS, int *N, int *NRHS, float *A, int *LDA, int *IPIV, double *B, int *LDB, int *INFO);
#include "mkl_lapack.h"
}
template<class real>
class INTEL_BLAS_LU_solve_interface : public INTEL_BLAS_interface<real>
{
public :
typedef typename INTEL_BLAS_interface<real>::gene_matrix gene_matrix;
typedef typename INTEL_BLAS_interface<real>::gene_vector gene_vector;
typedef int * Pivot_Vector;
inline static void new_Pivot_Vector(Pivot_Vector & pivot, int N)
{
pivot = new int[N];
}
inline static void free_Pivot_Vector(Pivot_Vector & pivot)
{
delete pivot;
}
inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
{
int info;
DGETRF(&N,&N,LU,&N,pivot,&info);
}
inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, const gene_vector &B, gene_vector X, int N)
{
int info;
int one=1;
char * transpose="N";
copy_vector(B,X,N);
DGETRS(transpose,&N,&one,LU,&N,pivot,X,&N,&info);
}
};
template<>
class INTEL_BLAS_LU_solve_interface<float> : public INTEL_BLAS_interface<float>
{
public :
typedef int * Pivot_Vector;
inline static void new_Pivot_Vector(Pivot_Vector & pivot, int N)
{
pivot = new int[N];
}
inline static void free_Pivot_Vector(Pivot_Vector & pivot)
{
delete pivot;
}
inline static void LU_factor(gene_matrix & LU, Pivot_Vector & pivot, int N)
{
int info;
SGETRF(&N,&N,LU,&N,pivot,&info);
}
inline static void LU_solve(const gene_matrix & LU, const Pivot_Vector pivot, const gene_vector &B, gene_vector X, int N)
{
char * transpose="N";
int info;
int one=1;
copy_vector(B,X,N);
SGETRS(transpose,&N,&one,LU,&N,pivot,X,&N,&info);
}
};
#endif

View File

@ -1,95 +0,0 @@
//=====================================================
// File : INTEL_BLAS_interface.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:29 CEST 2002
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef INTEL_BLAS_PRODUIT_MATRICE_VECTEUR_HH
#define INTEL_BLAS_PRODUIT_MATRICE_VECTEUR_HH
#include "f77_interface.hh"
extern "C"
{
#include "mkl_cblas.h"
}
template<class real>
class INTEL_BLAS_interface : public f77_interface_base<real>
{
public :
typedef typename f77_interface_base<real>::gene_matrix gene_matrix;
typedef typename f77_interface_base<real>::gene_vector gene_vector;
static inline std::string name( void ) { return "INTEL_BLAS"; }
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N) {
cblas_dgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
}
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) {
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
}
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N) {
cblas_dgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N) {
cblas_dgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N) {
cblas_daxpy(N,coef,X,1,Y,1);
}
};
template<>
class INTEL_BLAS_interface<float> : public f77_interface_base<float>
{
public :
static inline std::string name() { return "INTEL_BLAS"; }
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N) {
// cblas_sgemv(CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
cblas_sgemv(CblasColMajor,CblasNoTrans,N,N,1.0,A,N,B,1,0.0,X,1);
}
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N) {
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasNoTrans,N,N,N,1.0,A,N,B,N,0.0,X,N);
}
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N) {
cblas_sgemm(CblasColMajor,CblasTrans,CblasNoTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N) {
cblas_sgemm(CblasColMajor,CblasNoTrans,CblasTrans,N,N,N,1.0,A,N,A,N,0.0,X,N);
}
static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N) {
cblas_saxpy(N,coef,X,1,Y,1);
}
};
#endif

View File

@ -1,2 +0,0 @@
#! /bin/bash
export LD_LIBRARY_PATH=/opt/intel/mkl/lib/32:${LD_LIBRARY_PATH}

View File

@ -1,4 +1,17 @@
include_directories(${BLITZ_INCLUDES})
btl_add_bench(btl_blitz main.cpp)
target_link_libraries(btl_blitz ${BLITZ_LIBRARIES})
find_package(Blitz)
if (BLITZ_FOUND)
include_directories(${BLITZ_INCLUDES})
btl_add_bench(btl_blitz btl_blitz.cpp)
if (BUILD_btl_blitz)
target_link_libraries(btl_blitz ${BLITZ_LIBRARIES})
endif (BUILD_btl_blitz)
btl_add_bench(btl_tiny_blitz btl_tiny_blitz.cpp OFF)
if (BUILD_btl_tiny_blitz)
target_link_libraries(btl_tiny_blitz ${BLITZ_LIBRARIES})
endif (BUILD_btl_tiny_blitz)
endif (BLITZ_FOUND)

View File

@ -1,8 +1,24 @@
include_directories(${EIGEN2_INCLUDE_DIR})
btl_add_bench(btl_eigen2 main.cpp)
find_package(Eigen2)
if (EIGEN2_FOUND)
IF(NOT BTL_NOVEC)
btl_add_bench(btl_eigen2_novec main.cpp)
set_target_properties(btl_eigen2_novec PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
ENDIF(NOT BTL_NOVEC)
include_directories(${EIGEN2_INCLUDE_DIR})
btl_add_bench(btl_eigen2 main.cpp)
IF(NOT BTL_NOVEC)
btl_add_bench(btl_eigen2_novec main.cpp)
if(BUILD_btl_eigen2_novec)
set_target_properties(btl_eigen2_novec PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
endif(BUILD_btl_eigen2_novec)
ENDIF(NOT BTL_NOVEC)
btl_add_bench(btl_tiny_eigen2 btl_tiny_eigen2.cpp OFF)
IF(NOT BTL_NOVEC)
btl_add_bench(btl_tiny_eigen2_novec btl_tiny_eigen2.cpp OFF)
if(BUILD_btl_tiny_eigen2_novec)
set_target_properties(btl_tiny_eigen2_novec PROPERTIES COMPILE_FLAGS "-DEIGEN_DONT_VECTORIZE")
endif(BUILD_btl_tiny_eigen2_novec)
ENDIF(NOT BTL_NOVEC)
endif (EIGEN2_FOUND)

View File

@ -1,14 +1,12 @@
//=====================================================
// File : main.cpp
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:21 CEST 2002
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//=====================================================
//
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@ -16,32 +14,27 @@
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
//
#include "utilities.h"
#include "ATLAS_interface.hh"
#include "ATLAS_LU_solve_interface.hh"
#include "bench.hh"
#include "eigen2_interface.hh"
#include "static/bench_static.hh"
#include "action_matrix_vector_product.hh"
#include "action_matrix_matrix_product.hh"
#include "action_axpy.hh"
#include "action_lu_solve.hh"
#include "action_ata_product.hh"
#include "action_aat_product.hh"
#include "action_atv_product.hh"
BTL_MAIN;
int main()
{
bench<Action_axpy<ATLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<ATLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<ATLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<ATLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<ATLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_lu_solve<ATLAS_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
bench_static<Action_axpy,eigen2_interface>();
bench_static<Action_matrix_matrix_product,eigen2_interface>();
bench_static<Action_matrix_vector_product,eigen2_interface>();
bench_static<Action_atv_product,eigen2_interface>();
return 0;
}

View File

@ -33,7 +33,7 @@ int main()
bench<Action_matrix_vector_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<eigen2_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
// bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_axpy<eigen2_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
// bench<Action_matrix_matrix_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_aat_product<eigen2_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);

View File

@ -1,14 +1,14 @@
//=====================================================
// File : f77_interface.hh
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:24 CEST 2002
//=====================================================
//
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@ -16,7 +16,7 @@
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
//
#ifndef F77_INTERFACE_HH
#define F77_INTERFACE_HH
#include "f77_interface_base.hh"
@ -55,39 +55,31 @@ public :
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
dmxv_(A,&N,B,&N,X);
}
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
{
dmxm_(A,&N,B,&N,X,&N);
}
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
{
data_(A,X,&N);
}
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
{
daat_(A,X,&N);
}
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N)
{
int one=1;
daxpy_(&N,&coef,X,Y);
}
};
@ -104,34 +96,26 @@ public :
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N)
{
smxv_(A,&N,B,&N,X);
}
static inline void matrix_matrix_product(gene_matrix & A, gene_matrix & B, gene_matrix & X, int N)
{
smxm_(A,&N,B,&N,X,&N);
}
static inline void ata_product(gene_matrix & A, gene_matrix & X, int N)
{
sata_(A,X,&N);
}
static inline void aat_product(gene_matrix & A, gene_matrix & X, int N)
{
saat_(A,X,&N);
}
static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N)
static inline void axpy(float coef, const gene_vector & X, gene_vector & Y, int N)
{
saxpy_(&N,&coef,X,Y);
}

View File

@ -1,3 +1,6 @@
include_directories(${GMM_INCLUDES})
btl_add_bench(btl_gmm main.cpp)
find_package(GMM)
if (GMM_FOUND)
include_directories(${GMM_INCLUDES})
btl_add_bench(btl_gmm main.cpp)
endif (GMM_FOUND)

View File

@ -0,0 +1,12 @@
find_package(Eigen2)
if (EIGEN2_FOUND)
include_directories(${EIGEN2_INCLUDE_DIR} ${PROJECT_SOURCE_DIR}/libs/f77)
btl_add_bench(btl_hand_vec main.cpp)
btl_add_bench(btl_hand_peeling main.cpp)
if (BUILD_btl_hand_peeling)
set_target_properties(btl_hand_peeling PROPERTIES COMPILE_FLAGS "-DPEELING")
endif (BUILD_btl_hand_peeling)
endif (EIGEN2_FOUND)

View File

@ -0,0 +1,238 @@
//=====================================================
// File : hand_vec_interface.hh
// Copyright (C) 2008 Gael Guennebaud <g.gael@free.fr>
//=====================================================
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
#ifndef HAND_VEC_INTERFACE_HH
#define HAND_VEC_INTERFACE_HH
#include <Eigen/Core>
#include "f77_interface.hh"
using namespace Eigen;
template<class real>
class hand_vec_interface : public f77_interface_base<real> {
public :
typedef typename ei_packet_traits<real>::type Packet;
static const int PacketSize = ei_packet_traits<real>::size;
typedef typename f77_interface_base<real>::stl_matrix stl_matrix;
typedef typename f77_interface_base<real>::stl_vector stl_vector;
typedef typename f77_interface_base<real>::gene_matrix gene_matrix;
typedef typename f77_interface_base<real>::gene_vector gene_vector;
static void free_matrix(gene_matrix & A, int N){
ei_aligned_free(A);
}
static void free_vector(gene_vector & B){
ei_aligned_free(B);
}
static inline void matrix_from_stl(gene_matrix & A, stl_matrix & A_stl){
int N = A_stl.size();
A = ei_aligned_malloc<real>(N*N);
for (int j=0;j<N;j++)
for (int i=0;i<N;i++)
A[i+N*j] = A_stl[j][i];
}
static inline void vector_from_stl(gene_vector & B, stl_vector & B_stl){
int N = B_stl.size();
B = ei_aligned_malloc<real>(N);
for (int i=0;i<N;i++)
B[i] = B_stl[i];
}
static inline std::string name() {
#ifdef PEELING
return "hand_vectorized_peeling";
#else
return "hand_vectorized";
#endif
}
static inline void matrix_vector_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
{
int AN = (N/PacketSize)*PacketSize;
for (int i=0;i<N;i++)
X[i] = 0;
for (int i=0;i<N;i++)
{
real tmp = B[i];
Packet ptmp = ei_pset1(tmp);
int iN = i*N;
if (AN>0)
{
bool aligned = (iN % PacketSize) == 0;
if (aligned)
{
#ifdef PEELING
int ANP = (AN/(8*PacketSize))*8*PacketSize;
for (int j = 0;j<ANP;j+=PacketSize*8)
{
ei_pstore(&X[j], ei_padd(ei_pload(&X[j]), ei_pmul(ptmp,ei_pload(&A[j+iN]))));
ei_pstore(&X[j+PacketSize], ei_padd(ei_pload(&X[j+PacketSize]), ei_pmul(ptmp,ei_pload(&A[j+PacketSize+iN]))));
ei_pstore(&X[j+2*PacketSize], ei_padd(ei_pload(&X[j+2*PacketSize]), ei_pmul(ptmp,ei_pload(&A[j+2*PacketSize+iN]))));
ei_pstore(&X[j+3*PacketSize], ei_padd(ei_pload(&X[j+3*PacketSize]), ei_pmul(ptmp,ei_pload(&A[j+3*PacketSize+iN]))));
ei_pstore(&X[j+4*PacketSize], ei_padd(ei_pload(&X[j+4*PacketSize]), ei_pmul(ptmp,ei_pload(&A[j+4*PacketSize+iN]))));
ei_pstore(&X[j+5*PacketSize], ei_padd(ei_pload(&X[j+5*PacketSize]), ei_pmul(ptmp,ei_pload(&A[j+5*PacketSize+iN]))));
ei_pstore(&X[j+6*PacketSize], ei_padd(ei_pload(&X[j+6*PacketSize]), ei_pmul(ptmp,ei_pload(&A[j+6*PacketSize+iN]))));
ei_pstore(&X[j+7*PacketSize], ei_padd(ei_pload(&X[j+7*PacketSize]), ei_pmul(ptmp,ei_pload(&A[j+7*PacketSize+iN]))));
}
for (int j = ANP;j<AN;j+=PacketSize)
ei_pstore(&X[j], ei_padd(ei_pload(&X[j]), ei_pmul(ptmp,ei_pload(&A[j+iN]))));
#else
for (int j = 0;j<AN;j+=PacketSize)
ei_pstore(&X[j], ei_padd(ei_pload(&X[j]), ei_pmul(ptmp,ei_pload(&A[j+iN]))));
#endif
}
else
{
#ifdef PEELING
int ANP = (AN/(8*PacketSize))*8*PacketSize;
for (int j = 0;j<ANP;j+=PacketSize*8)
{
ei_pstore(&X[j], ei_padd(ei_pload(&X[j]), ei_pmul(ptmp,ei_ploadu(&A[j+iN]))));
ei_pstore(&X[j+PacketSize], ei_padd(ei_pload(&X[j+PacketSize]), ei_pmul(ptmp,ei_ploadu(&A[j+PacketSize+iN]))));
ei_pstore(&X[j+2*PacketSize], ei_padd(ei_pload(&X[j+2*PacketSize]), ei_pmul(ptmp,ei_ploadu(&A[j+2*PacketSize+iN]))));
ei_pstore(&X[j+3*PacketSize], ei_padd(ei_pload(&X[j+3*PacketSize]), ei_pmul(ptmp,ei_ploadu(&A[j+3*PacketSize+iN]))));
ei_pstore(&X[j+4*PacketSize], ei_padd(ei_pload(&X[j+4*PacketSize]), ei_pmul(ptmp,ei_ploadu(&A[j+4*PacketSize+iN]))));
ei_pstore(&X[j+5*PacketSize], ei_padd(ei_pload(&X[j+5*PacketSize]), ei_pmul(ptmp,ei_ploadu(&A[j+5*PacketSize+iN]))));
ei_pstore(&X[j+6*PacketSize], ei_padd(ei_pload(&X[j+6*PacketSize]), ei_pmul(ptmp,ei_ploadu(&A[j+6*PacketSize+iN]))));
ei_pstore(&X[j+7*PacketSize], ei_padd(ei_pload(&X[j+7*PacketSize]), ei_pmul(ptmp,ei_ploadu(&A[j+7*PacketSize+iN]))));
}
for (int j = ANP;j<AN;j+=PacketSize)
ei_pstore(&X[j], ei_padd(ei_pload(&X[j]), ei_pmul(ptmp,ei_ploadu(&A[j+iN]))));
#else
for (int j = 0;j<AN;j+=PacketSize)
ei_pstore(&X[j], ei_padd(ei_pload(&X[j]), ei_pmul(ptmp,ei_ploadu(&A[j+iN]))));
#endif
}
}
// process remaining scalars
for (int j=AN;j<N;j++)
X[j] += tmp * A[j+iN];
}
}
static inline void atv_product(const gene_matrix & A, const gene_vector & B, gene_vector & X, int N)
{
int AN = (N/PacketSize)*PacketSize;
for (int i=0;i<N;i++)
X[i] = 0;
for (int i=0;i<N;i++)
{
real tmp = 0;
Packet ptmp = ei_pset1(real(0));
int iN = i*N;
if (AN>0)
{
bool aligned = (iN % PacketSize) == 0;
if (aligned)
{
#ifdef PEELING
int ANP = (AN/(8*PacketSize))*8*PacketSize;
for (int j = 0;j<ANP;j+=PacketSize*8)
{
ptmp =
ei_padd(ei_pmul(ei_pload(&B[j]), ei_pload(&A[j+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+PacketSize]), ei_pload(&A[j+PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+2*PacketSize]), ei_pload(&A[j+2*PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+3*PacketSize]), ei_pload(&A[j+3*PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+4*PacketSize]), ei_pload(&A[j+4*PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+5*PacketSize]), ei_pload(&A[j+5*PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+6*PacketSize]), ei_pload(&A[j+6*PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+7*PacketSize]), ei_pload(&A[j+7*PacketSize+iN])),
ptmp))))))));
}
for (int j = ANP;j<AN;j+=PacketSize)
ptmp = ei_padd(ptmp, ei_pmul(ei_pload(&B[j]), ei_pload(&A[j+iN])));
#else
for (int j = 0;j<AN;j+=PacketSize)
ptmp = ei_padd(ptmp, ei_pmul(ei_pload(&B[j]), ei_pload(&A[j+iN])));
#endif
}
else
{
#ifdef PEELING
int ANP = (AN/(8*PacketSize))*8*PacketSize;
for (int j = 0;j<ANP;j+=PacketSize*8)
{
ptmp =
ei_padd(ei_pmul(ei_pload(&B[j]), ei_ploadu(&A[j+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+PacketSize]), ei_ploadu(&A[j+PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+2*PacketSize]), ei_ploadu(&A[j+2*PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+3*PacketSize]), ei_ploadu(&A[j+3*PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+4*PacketSize]), ei_ploadu(&A[j+4*PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+5*PacketSize]), ei_ploadu(&A[j+5*PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+6*PacketSize]), ei_ploadu(&A[j+6*PacketSize+iN])),
ei_padd(ei_pmul(ei_pload(&B[j+7*PacketSize]), ei_ploadu(&A[j+7*PacketSize+iN])),
ptmp))))))));
}
for (int j = ANP;j<AN;j+=PacketSize)
ptmp = ei_padd(ptmp, ei_pmul(ei_pload(&B[j]), ei_ploadu(&A[j+iN])));
#else
for (int j = 0;j<AN;j+=PacketSize)
ptmp = ei_padd(ptmp, ei_pmul(ei_pload(&B[j]), ei_ploadu(&A[j+iN])));
#endif
}
tmp = ei_predux(ptmp);
}
// process remaining scalars
for (int j=AN;j<N;j++)
tmp += B[j] * A[j+iN];
X[i] = tmp;
}
}
static inline void axpy(real coef, const gene_vector & X, gene_vector & Y, int N){
int AN = (N/PacketSize)*PacketSize;
if (AN>0)
{
Packet pcoef = ei_pset1(coef);
#ifdef PEELING
int ANP = (AN/(8*PacketSize))*8*PacketSize;
for (int j = 0;j<ANP;j+=PacketSize*8)
{
ei_pstore(&Y[j ], ei_padd(ei_pload(&Y[j ]), ei_pmul(pcoef,ei_pload(&X[j ]))));
ei_pstore(&Y[j+ PacketSize], ei_padd(ei_pload(&Y[j+ PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+ PacketSize]))));
ei_pstore(&Y[j+2*PacketSize], ei_padd(ei_pload(&Y[j+2*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+2*PacketSize]))));
ei_pstore(&Y[j+3*PacketSize], ei_padd(ei_pload(&Y[j+3*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+3*PacketSize]))));
ei_pstore(&Y[j+4*PacketSize], ei_padd(ei_pload(&Y[j+4*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+4*PacketSize]))));
ei_pstore(&Y[j+5*PacketSize], ei_padd(ei_pload(&Y[j+5*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+5*PacketSize]))));
ei_pstore(&Y[j+6*PacketSize], ei_padd(ei_pload(&Y[j+6*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+6*PacketSize]))));
ei_pstore(&Y[j+7*PacketSize], ei_padd(ei_pload(&Y[j+7*PacketSize]), ei_pmul(pcoef,ei_pload(&X[j+7*PacketSize]))));
}
for (int j = ANP;j<AN;j+=PacketSize)
ei_pstore(&Y[j], ei_padd(ei_pload(&Y[j]), ei_pmul(pcoef,ei_pload(&X[j]))));
#else
for (int j = 0;j<AN;j+=PacketSize)
ei_pstore(&Y[j], ei_padd(ei_pload(&Y[j]), ei_pmul(pcoef,ei_pload(&X[j]))));
#endif
}
// process remaining scalars
for (int i=AN;i<N;i++)
Y[i] += coef * X[i];
}
};
#endif

View File

@ -1,14 +1,14 @@
//=====================================================
// File : main.cpp
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:29 CEST 2002
// Author : L. Plagne <laurent.plagne@edf.fr)>
// Copyright (C) EDF R&D, lun sep 30 14:23:23 CEST 2002
//=====================================================
//
//
// This program is free software; you can redistribute it and/or
// modify it under the terms of the GNU General Public License
// as published by the Free Software Foundation; either version 2
// of the License, or (at your option) any later version.
//
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
@ -16,32 +16,31 @@
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
//
//
#include "utilities.h"
#include "INTEL_BLAS_interface.hh"
#include "INTEL_BLAS_LU_solve_interface.hh"
#include "bench.hh"
#include "hand_vec_interface.hh"
#include "action_matrix_vector_product.hh"
#include "action_atv_product.hh"
#include "action_matrix_matrix_product.hh"
#include "action_axpy.hh"
#include "action_lu_solve.hh"
#include "action_ata_product.hh"
#include "action_aat_product.hh"
//#include "action_lu_solve.hh"
#include "timers/mixed_perf_analyzer.hh"
BTL_MAIN;
int main()
{
bench<Action_axpy<INTEL_BLAS_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<hand_vec_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<hand_vec_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
// bench<Action_matrix_matrix_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_aat_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<hand_vec_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_axpy<hand_vec_interface<REAL_TYPE> > >(MIN_AXPY,MAX_AXPY,NB_POINT);
bench<Action_matrix_vector_product<INTEL_BLAS_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<INTEL_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<INTEL_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<INTEL_BLAS_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_lu_solve<INTEL_BLAS_LU_solve_interface<REAL_TYPE> > >(MIN_LU,MAX_LU,NB_POINT);
return 0;
}

View File

@ -0,0 +1,6 @@
find_package(MTL4)
if (MTL4_FOUND)
include_directories(${MTL4_INCLUDE_DIR})
btl_add_bench(btl_mtl4 main.cpp)
endif (MTL4_FOUND)

View File

@ -35,8 +35,8 @@ int main()
bench<Action_matrix_vector_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_atv_product<mtl4_interface<REAL_TYPE> > >(MIN_MV,MAX_MV,NB_POINT);
bench<Action_matrix_matrix_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_ata_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
bench<Action_aat_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_ata_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
// bench<Action_aat_product<mtl4_interface<REAL_TYPE> > >(MIN_MM,MAX_MM,NB_POINT);
return 0;
}

View File

@ -88,11 +88,11 @@ public :
}
static inline void ata_product(const gene_matrix & A, gene_matrix & X, int N){
X = (trans(A)*A);
// X = (trans(A)*A);
}
static inline void aat_product(const gene_matrix & A, gene_matrix & X, int N){
X = (A*trans(A));
// X = (A*trans(A));
}
static inline void matrix_vector_product(gene_matrix & A, gene_vector & B, gene_vector & X, int N){

View File

@ -1,4 +0,0 @@
include_directories(${BLITZ_INCLUDES})
btl_add_bench(btl_tiny_blitz main.cpp)
target_link_libraries(btl_tiny_blitz ${BLITZ_LIBRARIES})

View File

@ -1,3 +1,6 @@
include_directories(${TVMET_INCLUDE_DIR})
add_executable(btl_tvmet main.cpp)
find_package(Tvmet)
if (TVMET_FOUND)
include_directories(${TVMET_INCLUDE_DIR})
btl_add_bench(btl_tvmet main.cpp OFF)
endif (TVMET_FOUND)

View File

@ -1,3 +1,7 @@
include_directories(${Boost_INCLUDES})
btl_add_bench(btl_ublas main.cpp)
find_package(Boost)
if (Boost_FOUND)
include_directories(${Boost_INCLUDE_DIRS})
include_directories(${Boost_INCLUDES})
btl_add_bench(btl_ublas main.cpp)
endif (Boost_FOUND)