mirror of
https://gitlab.com/libeigen/eigen.git
synced 2024-12-21 07:19:46 +08:00
00f32752f7
* Unifying all loadLocalTile from lhs and rhs to an extract_block function. * Adding get_tensor operation which was missing in TensorContractionMapper. * Adding the -D method missing from cmake for Disable_Skinny Contraction operation. * Wrapping all the indices in TensorScanSycl into Scan parameter struct. * Fixing typo in Device SYCL * Unifying load to private register for tall/skinny no shared * Unifying load to vector tile for tensor-vector/vector-tensor operation * Removing all the LHS/RHS class for extracting data from global * Removing Outputfunction from TensorContractionSkinnyNoshared. * Combining the local memory version of tall/skinny and normal tensor contraction into one kernel. * Combining the no-local memory version of tall/skinny and normal tensor contraction into one kernel. * Combining General Tensor-Vector and VectorTensor contraction into one kernel. * Making double buffering optional for Tensor contraction when local memory is version is used. * Modifying benchmark to accept custom Reduction Sizes * Disabling AVX optimization for SYCL backend on the host to allow SSE optimization to the host * Adding Test for SYCL * Modifying SYCL CMake
425 lines
15 KiB
CMake
425 lines
15 KiB
CMake
# The file split_test_helper.h was generated at first run,
|
|
# it is now included in test/
|
|
if(EXISTS ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h)
|
|
file(REMOVE ${CMAKE_CURRENT_BINARY_DIR}/split_test_helper.h)
|
|
endif()
|
|
|
|
set_property(GLOBAL PROPERTY EIGEN_CURRENT_SUBPROJECT "Unsupported")
|
|
add_custom_target(BuildUnsupported)
|
|
|
|
include_directories(../../test ../../unsupported ../../Eigen
|
|
${CMAKE_CURRENT_BINARY_DIR}/../../test)
|
|
|
|
find_package (Threads)
|
|
|
|
find_package(GoogleHash)
|
|
if(GOOGLEHASH_FOUND)
|
|
add_definitions("-DEIGEN_GOOGLEHASH_SUPPORT")
|
|
include_directories(${GOOGLEHASH_INCLUDES})
|
|
ei_add_property(EIGEN_TESTED_BACKENDS "GoogleHash, ")
|
|
else()
|
|
ei_add_property(EIGEN_MISSING_BACKENDS "GoogleHash, ")
|
|
endif()
|
|
|
|
|
|
find_package(Adolc)
|
|
if(ADOLC_FOUND)
|
|
include_directories(${ADOLC_INCLUDES})
|
|
ei_add_property(EIGEN_TESTED_BACKENDS "Adolc, ")
|
|
if(EIGEN_TEST_CXX11)
|
|
ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES})
|
|
else()
|
|
message(STATUS "Adolc found, but tests require C++11 mode")
|
|
endif()
|
|
else()
|
|
ei_add_property(EIGEN_MISSING_BACKENDS "Adolc, ")
|
|
endif()
|
|
|
|
# this test seems to never have been successful on x87, so is considered to contain a FP-related bug.
|
|
# see thread: "non-linear optimization test summary"
|
|
ei_add_test(NonLinearOptimization)
|
|
|
|
ei_add_test(NumericalDiff)
|
|
ei_add_test(autodiff_scalar)
|
|
ei_add_test(autodiff)
|
|
|
|
ei_add_test(BVH)
|
|
|
|
ei_add_test(matrix_exponential)
|
|
ei_add_test(matrix_function)
|
|
ei_add_test(matrix_power)
|
|
ei_add_test(matrix_square_root)
|
|
ei_add_test(alignedvector3)
|
|
|
|
ei_add_test(FFT)
|
|
|
|
ei_add_test(EulerAngles)
|
|
|
|
find_package(MPFR 2.3.0)
|
|
find_package(GMP)
|
|
if(MPFR_FOUND AND EIGEN_COMPILER_SUPPORT_CPP11)
|
|
include_directories(${MPFR_INCLUDES} ./mpreal)
|
|
ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ")
|
|
set(EIGEN_MPFR_TEST_LIBRARIES ${MPFR_LIBRARIES} ${GMP_LIBRARIES})
|
|
ei_add_test(mpreal_support "-std=c++11" "${EIGEN_MPFR_TEST_LIBRARIES}" )
|
|
else()
|
|
ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ")
|
|
endif()
|
|
|
|
ei_add_test(sparse_extra "" "")
|
|
|
|
find_package(FFTW)
|
|
if(FFTW_FOUND)
|
|
ei_add_property(EIGEN_TESTED_BACKENDS "fftw, ")
|
|
include_directories( ${FFTW_INCLUDES} )
|
|
if(FFTWL_LIB)
|
|
ei_add_test(FFTW "-DEIGEN_FFTW_DEFAULT -DEIGEN_HAS_FFTWL" "${FFTW_LIBRARIES}" )
|
|
else()
|
|
ei_add_test(FFTW "-DEIGEN_FFTW_DEFAULT" "${FFTW_LIBRARIES}" )
|
|
endif()
|
|
else()
|
|
ei_add_property(EIGEN_MISSING_BACKENDS "fftw, ")
|
|
endif()
|
|
|
|
option(EIGEN_TEST_NO_OPENGL "Disable OpenGL support in unit tests" OFF)
|
|
if(NOT EIGEN_TEST_NO_OPENGL)
|
|
find_package(OpenGL)
|
|
find_package(GLUT)
|
|
find_package(GLEW)
|
|
if(OPENGL_FOUND AND GLUT_FOUND AND GLEW_FOUND)
|
|
include_directories(${OPENGL_INCLUDE_DIR} ${GLUT_INCLUDE_DIR} ${GLEW_INCLUDE_DIRS})
|
|
ei_add_property(EIGEN_TESTED_BACKENDS "OpenGL, ")
|
|
set(EIGEN_GL_LIB ${GLUT_LIBRARIES} ${GLEW_LIBRARIES} ${OPENGL_LIBRARIES})
|
|
ei_add_test(openglsupport "" "${EIGEN_GL_LIB}" )
|
|
else()
|
|
ei_add_property(EIGEN_MISSING_BACKENDS "OpenGL, ")
|
|
endif()
|
|
else()
|
|
ei_add_property(EIGEN_MISSING_BACKENDS "OpenGL, ")
|
|
endif()
|
|
|
|
ei_add_test(polynomialsolver)
|
|
ei_add_test(polynomialutils)
|
|
ei_add_test(splines)
|
|
ei_add_test(gmres)
|
|
ei_add_test(dgmres)
|
|
ei_add_test(minres)
|
|
ei_add_test(levenberg_marquardt)
|
|
ei_add_test(kronecker_product)
|
|
ei_add_test(bessel_functions)
|
|
ei_add_test(special_functions)
|
|
|
|
if(EIGEN_TEST_CXX11)
|
|
if(EIGEN_TEST_SYCL)
|
|
set(EIGEN_SYCL ON)
|
|
# Forward CMake options as preprocessor definitions
|
|
if(EIGEN_SYCL_USE_DEFAULT_SELECTOR)
|
|
add_definitions(-DEIGEN_SYCL_USE_DEFAULT_SELECTOR=${EIGEN_SYCL_USE_DEFAULT_SELECTOR})
|
|
endif()
|
|
if(EIGEN_SYCL_NO_LOCAL_MEM)
|
|
add_definitions(-DEIGEN_SYCL_NO_LOCAL_MEM=${EIGEN_SYCL_NO_LOCAL_MEM})
|
|
endif()
|
|
if(EIGEN_SYCL_LOCAL_MEM)
|
|
add_definitions(-DEIGEN_SYCL_LOCAL_MEM=${EIGEN_SYCL_LOCAL_MEM})
|
|
endif()
|
|
if(EIGEN_SYCL_MAX_GLOBAL_RANGE)
|
|
add_definitions(-DEIGEN_SYCL_MAX_GLOBAL_RANGE=${EIGEN_SYCL_MAX_GLOBAL_RANGE})
|
|
endif()
|
|
if(EIGEN_SYCL_LOCAL_THREAD_DIM0)
|
|
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM0=${EIGEN_SYCL_LOCAL_THREAD_DIM0})
|
|
endif()
|
|
if(EIGEN_SYCL_LOCAL_THREAD_DIM1)
|
|
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM1=${EIGEN_SYCL_LOCAL_THREAD_DIM1})
|
|
endif()
|
|
if(EIGEN_SYCL_REG_M)
|
|
add_definitions(-DEIGEN_SYCL_REG_M=${EIGEN_SYCL_REG_M})
|
|
endif()
|
|
if(EIGEN_SYCL_REG_N)
|
|
add_definitions(-DEIGEN_SYCL_REG_N=${EIGEN_SYCL_REG_N})
|
|
endif()
|
|
if(EIGEN_SYCL_USE_PROGRAM_CLASS)
|
|
add_definitions(-DEIGEN_SYCL_USE_PROGRAM_CLASS=${EIGEN_SYCL_USE_PROGRAM_CLASS})
|
|
endif()
|
|
if(EIGEN_SYCL_ASYNC_EXECUTION)
|
|
add_definitions(-DEIGEN_SYCL_ASYNC_EXECUTION=${EIGEN_SYCL_ASYNC_EXECUTION})
|
|
endif()
|
|
if(EIGEN_SYCL_DISABLE_SKINNY)
|
|
add_definitions(-DEIGEN_SYCL_DISABLE_SKINNY=${EIGEN_SYCL_DISABLE_SKINNY})
|
|
endif()
|
|
if(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER)
|
|
add_definitions(-DEIGEN_SYCL_DISABLE_DOUBLE_BUFFER=${EIGEN_SYCL_DISABLE_DOUBLE_BUFFER})
|
|
endif()
|
|
if(EIGEN_SYCL_DISABLE_RANK1)
|
|
add_definitions(-DEIGEN_SYCL_DISABLE_RANK1=${EIGEN_SYCL_DISABLE_RANK1})
|
|
endif()
|
|
if(EIGEN_SYCL_DISABLE_SCALAR)
|
|
add_definitions(-DEIGEN_SYCL_DISABLE_SCALAR=${EIGEN_SYCL_DISABLE_SCALAR})
|
|
endif()
|
|
if(EIGEN_SYCL_DISABLE_GEMV)
|
|
add_definitions(-DEIGEN_SYCL_DISABLE_GEMV=${EIGEN_SYCL_DISABLE_GEMV})
|
|
endif()
|
|
if(EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION)
|
|
add_definitions(-DEIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=${EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION})
|
|
endif()
|
|
|
|
if(EIGEN_SYCL_TRISYCL)
|
|
set(CMAKE_CXX_STANDARD 14)
|
|
set(STD_CXX_FLAG "-std=c++1z")
|
|
else()
|
|
if(MSVC)
|
|
# Set the host and device compilers C++ standard to C++14. On Windows setting this to C++11
|
|
# can cause issues with the ComputeCpp device compiler parsing Visual Studio Headers.
|
|
set(CMAKE_CXX_STANDARD 14)
|
|
list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
|
|
else()
|
|
set(CMAKE_CXX_STANDARD 11)
|
|
list(APPEND COMPUTECPP_USER_FLAGS -Wall)
|
|
endif()
|
|
# The following flags are not supported by Clang and can cause warnings
|
|
# if used with -Werror so they are removed here.
|
|
if(COMPUTECPP_USE_COMPILER_DRIVER)
|
|
set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
|
|
string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
|
string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
|
string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
|
endif()
|
|
list(APPEND COMPUTECPP_USER_FLAGS
|
|
-DEIGEN_NO_ASSERTION_CHECKING=1
|
|
-no-serial-memop
|
|
-Xclang
|
|
-cl-mad-enable)
|
|
endif()
|
|
|
|
ei_add_test(cxx11_tensor_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_image_op_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_math_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_forced_eval_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_broadcast_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_device_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_reduction_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_morphing_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_shuffling_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_padding_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_builtins_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_contract_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_concatenation_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_reverse_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_convolution_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_striding_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_chipping_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_layout_swap_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_inflation_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_random_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_generator_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_patch_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_image_patch_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_volume_patch_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_argmax_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_custom_op_sycl ${STD_CXX_FLAG})
|
|
ei_add_test(cxx11_tensor_scan_sycl ${STD_CXX_FLAG})
|
|
set(EIGEN_SYCL OFF)
|
|
endif()
|
|
|
|
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
|
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
|
ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
|
|
|
ei_add_test(cxx11_meta)
|
|
ei_add_test(cxx11_maxsizevector)
|
|
ei_add_test(cxx11_tensor_argmax)
|
|
ei_add_test(cxx11_tensor_assign)
|
|
ei_add_test(cxx11_tensor_block_access)
|
|
ei_add_test(cxx11_tensor_block_eval)
|
|
ei_add_test(cxx11_tensor_block_io)
|
|
ei_add_test(cxx11_tensor_broadcasting)
|
|
ei_add_test(cxx11_tensor_casts)
|
|
ei_add_test(cxx11_tensor_chipping)
|
|
ei_add_test(cxx11_tensor_comparisons)
|
|
ei_add_test(cxx11_tensor_concatenation)
|
|
ei_add_test(cxx11_tensor_const)
|
|
ei_add_test(cxx11_tensor_contraction)
|
|
ei_add_test(cxx11_tensor_convolution)
|
|
ei_add_test(cxx11_tensor_custom_index)
|
|
ei_add_test(cxx11_tensor_custom_op)
|
|
ei_add_test(cxx11_tensor_dimension)
|
|
ei_add_test(cxx11_tensor_empty)
|
|
ei_add_test(cxx11_tensor_executor "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
|
ei_add_test(cxx11_tensor_expr)
|
|
ei_add_test(cxx11_tensor_fft)
|
|
ei_add_test(cxx11_tensor_fixed_size)
|
|
ei_add_test(cxx11_tensor_forced_eval)
|
|
ei_add_test(cxx11_tensor_generator)
|
|
ei_add_test(cxx11_tensor_ifft)
|
|
ei_add_test(cxx11_tensor_image_patch)
|
|
ei_add_test(cxx11_tensor_index_list)
|
|
ei_add_test(cxx11_tensor_inflation)
|
|
ei_add_test(cxx11_tensor_intdiv)
|
|
ei_add_test(cxx11_tensor_io)
|
|
ei_add_test(cxx11_tensor_layout_swap)
|
|
ei_add_test(cxx11_tensor_lvalue)
|
|
ei_add_test(cxx11_tensor_map)
|
|
ei_add_test(cxx11_tensor_math)
|
|
ei_add_test(cxx11_tensor_mixed_indices)
|
|
ei_add_test(cxx11_tensor_morphing)
|
|
ei_add_test(cxx11_tensor_move)
|
|
ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
|
ei_add_test(cxx11_tensor_of_complex)
|
|
ei_add_test(cxx11_tensor_of_const_values)
|
|
ei_add_test(cxx11_tensor_of_strings)
|
|
ei_add_test(cxx11_tensor_padding)
|
|
ei_add_test(cxx11_tensor_patch)
|
|
ei_add_test(cxx11_tensor_random)
|
|
ei_add_test(cxx11_tensor_reduction)
|
|
ei_add_test(cxx11_tensor_ref)
|
|
ei_add_test(cxx11_tensor_roundings)
|
|
ei_add_test(cxx11_tensor_scan)
|
|
ei_add_test(cxx11_tensor_shuffling)
|
|
ei_add_test(cxx11_tensor_simple)
|
|
ei_add_test(cxx11_tensor_striding)
|
|
ei_add_test(cxx11_tensor_sugar)
|
|
ei_add_test(cxx11_tensor_thread_local "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
|
ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
|
ei_add_test(cxx11_tensor_trace)
|
|
ei_add_test(cxx11_tensor_volume_patch)
|
|
# ei_add_test(cxx11_tensor_symmetry)
|
|
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
|
# This test requires __uint128_t which is only available on 64bit systems
|
|
ei_add_test(cxx11_tensor_uint128)
|
|
endif()
|
|
|
|
endif()
|
|
|
|
# These tests needs nvcc
|
|
find_package(CUDA 7.0)
|
|
if(CUDA_FOUND AND EIGEN_TEST_CUDA)
|
|
# Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
|
|
# and -fno-check-new flags since they trigger thousands of compilation warnings
|
|
# in the CUDA runtime
|
|
# Also remove -ansi that is incompatible with std=c++11.
|
|
string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
|
|
|
message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
|
|
|
|
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
|
set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
|
|
endif()
|
|
if(EIGEN_TEST_CUDA_CLANG)
|
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
|
string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}")
|
|
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
|
|
string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${ARCH}")
|
|
endforeach()
|
|
endif()
|
|
|
|
set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr")
|
|
if (${CUDA_VERSION} STREQUAL "7.0")
|
|
set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
|
|
endif()
|
|
|
|
if(( (NOT EIGEN_TEST_CXX11) OR (CMAKE_VERSION VERSION_LESS 3.3)) AND EIGEN_TEST_CXX11)
|
|
set(EIGEN_CUDA_CXX11_FLAG "-std=c++11")
|
|
else()
|
|
# otherwise the flag has already been added because of the above set(CMAKE_CXX_STANDARD 11)
|
|
set(EIGEN_CUDA_CXX11_FLAG "")
|
|
endif()
|
|
|
|
set(NVCC_ARCH_FLAGS)
|
|
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
|
|
string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
|
|
endforeach()
|
|
set(CUDA_NVCC_FLAGS "${EIGEN_CUDA_CXX11_FLAG} ${EIGEN_CUDA_RELAXED_CONSTEXPR} -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS}")
|
|
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
|
|
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
|
|
|
ei_add_test(cxx11_tensor_complex_gpu)
|
|
ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
|
|
ei_add_test(cxx11_tensor_reduction_gpu)
|
|
ei_add_test(cxx11_tensor_argmax_gpu)
|
|
ei_add_test(cxx11_tensor_cast_float16_gpu)
|
|
ei_add_test(cxx11_tensor_scan_gpu)
|
|
|
|
set(EIGEN_CUDA_OLDEST_COMPUTE_ARCH 9999)
|
|
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
|
|
if(${ARCH} LESS ${EIGEN_CUDA_OLDEST_COMPUTE_ARCH})
|
|
set(EIGEN_CUDA_OLDEST_COMPUTE_ARCH ${ARCH})
|
|
endif()
|
|
endforeach()
|
|
|
|
# Contractions require arch 3.0 or higher
|
|
if (${EIGEN_CUDA_OLDEST_COMPUTE_ARCH} GREATER 29)
|
|
ei_add_test(cxx11_tensor_device)
|
|
ei_add_test(cxx11_tensor_gpu)
|
|
ei_add_test(cxx11_tensor_contract_gpu)
|
|
ei_add_test(cxx11_tensor_of_float16_gpu)
|
|
endif()
|
|
|
|
# The random number generation code requires arch 3.5 or greater.
|
|
if (${EIGEN_CUDA_OLDEST_COMPUTE_ARCH} GREATER 34)
|
|
ei_add_test(cxx11_tensor_random_gpu)
|
|
endif()
|
|
|
|
|
|
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
|
|
endif()
|
|
|
|
# Add HIP specific tests
|
|
if (EIGEN_TEST_HIP)
|
|
|
|
set(HIP_PATH "/opt/rocm/hip" CACHE STRING "Path to the HIP installation.")
|
|
|
|
if (EXISTS ${HIP_PATH})
|
|
|
|
list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake)
|
|
|
|
find_package(HIP REQUIRED)
|
|
if (HIP_FOUND)
|
|
|
|
execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM)
|
|
|
|
if (${HIP_PLATFORM} STREQUAL "hcc")
|
|
|
|
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
|
include_directories(${HIP_PATH}/include)
|
|
|
|
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
|
#
|
|
# complex datatype is not yet supported by HIP
|
|
# so leaving out those tests for now
|
|
#
|
|
# ei_add_test(cxx11_tensor_complex_gpu)
|
|
# ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
|
|
#
|
|
ei_add_test(cxx11_tensor_reduction_gpu)
|
|
ei_add_test(cxx11_tensor_argmax_gpu)
|
|
ei_add_test(cxx11_tensor_cast_float16_gpu)
|
|
ei_add_test(cxx11_tensor_scan_gpu)
|
|
ei_add_test(cxx11_tensor_device)
|
|
|
|
ei_add_test(cxx11_tensor_gpu)
|
|
ei_add_test(cxx11_tensor_contract_gpu)
|
|
ei_add_test(cxx11_tensor_of_float16_gpu)
|
|
ei_add_test(cxx11_tensor_random_gpu)
|
|
|
|
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
|
|
|
|
elseif (${HIP_PLATFORM} STREQUAL "nvcc")
|
|
message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
|
|
else ()
|
|
message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
|
|
endif()
|
|
|
|
endif()
|
|
|
|
else ()
|
|
|
|
message(FATAL_ERROR "EIGEN_TEST_HIP is ON, but the specified HIP_PATH (${HIP_PATH}) does not exist")
|
|
|
|
endif()
|
|
|
|
endif()
|
|
|