ADD: added other eigen lib

This commit is contained in:
Henry Winkel
2022-12-21 16:19:04 +01:00
parent a570766dc6
commit 9e56c7f2c0
832 changed files with 36586 additions and 20006 deletions

View File

@@ -26,11 +26,7 @@ find_package(Adolc)
if(Adolc_FOUND)
include_directories(${ADOLC_INCLUDES})
ei_add_property(EIGEN_TESTED_BACKENDS "Adolc, ")
if(EIGEN_TEST_CXX11)
ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES})
else()
message(STATUS "Adolc found, but tests require C++11 mode")
endif()
ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES})
else()
ei_add_property(EIGEN_MISSING_BACKENDS "Adolc, ")
endif()
@@ -56,14 +52,16 @@ ei_add_test(FFT)
ei_add_test(EulerAngles)
find_package(MPREAL)
if(MPREAL_FOUND AND EIGEN_COMPILER_SUPPORT_CPP11)
if(MPREAL_FOUND)
ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ")
include_directories(${MPREAL_INCLUDES})
ei_add_test(mpreal_support "-std=c++11" "${MPREAL_LIBRARIES}" )
ei_add_test(mpreal_support "" "${MPREAL_LIBRARIES}" )
else()
ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ")
endif()
ei_add_test(NNLS)
ei_add_test(sparse_extra "" "")
find_package(FFTW)
@@ -79,6 +77,17 @@ else()
ei_add_property(EIGEN_MISSING_BACKENDS "fftw, ")
endif()
find_path(POCKETFFT pocketfft_hdronly.h)
if(POCKETFFT)
if(EIGEN_TEST_CXX11)
ei_add_property(EIGEN_TESTED_BACKENDS "pocketfft, ")
include_directories( ${POCKETFFT} )
ei_add_test(pocketfft "-pthread" "${CMAKE_THREAD_LIBS_INIT}" "-DEIGEN_POCKETFFT_DEFAULT" )
endif()
else()
ei_add_property(EIGEN_MISSING_BACKENDS "pocketfft, ")
endif()
option(EIGEN_TEST_OPENGL "Enable OpenGL support in unit tests" OFF)
if(EIGEN_TEST_OPENGL)
find_package(OpenGL)
@@ -103,229 +112,222 @@ ei_add_test(gmres)
ei_add_test(dgmres)
ei_add_test(minres)
ei_add_test(idrs)
ei_add_test(bicgstabl)
ei_add_test(idrstabl)
ei_add_test(levenberg_marquardt)
ei_add_test(kronecker_product)
ei_add_test(bessel_functions)
ei_add_test(special_functions)
ei_add_test(special_packetmath "-DEIGEN_FAST_MATH=1")
if(EIGEN_TEST_CXX11)
if(EIGEN_TEST_SYCL)
set(EIGEN_SYCL ON)
# Forward CMake options as preprocessor definitions
if(EIGEN_SYCL_USE_DEFAULT_SELECTOR)
add_definitions(-DEIGEN_SYCL_USE_DEFAULT_SELECTOR=${EIGEN_SYCL_USE_DEFAULT_SELECTOR})
endif()
if(EIGEN_SYCL_NO_LOCAL_MEM)
add_definitions(-DEIGEN_SYCL_NO_LOCAL_MEM=${EIGEN_SYCL_NO_LOCAL_MEM})
endif()
if(EIGEN_SYCL_LOCAL_MEM)
add_definitions(-DEIGEN_SYCL_LOCAL_MEM=${EIGEN_SYCL_LOCAL_MEM})
endif()
if(EIGEN_SYCL_MAX_GLOBAL_RANGE)
add_definitions(-DEIGEN_SYCL_MAX_GLOBAL_RANGE=${EIGEN_SYCL_MAX_GLOBAL_RANGE})
endif()
if(EIGEN_SYCL_LOCAL_THREAD_DIM0)
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM0=${EIGEN_SYCL_LOCAL_THREAD_DIM0})
endif()
if(EIGEN_SYCL_LOCAL_THREAD_DIM1)
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM1=${EIGEN_SYCL_LOCAL_THREAD_DIM1})
endif()
if(EIGEN_SYCL_REG_M)
add_definitions(-DEIGEN_SYCL_REG_M=${EIGEN_SYCL_REG_M})
endif()
if(EIGEN_SYCL_REG_N)
add_definitions(-DEIGEN_SYCL_REG_N=${EIGEN_SYCL_REG_N})
endif()
if(EIGEN_SYCL_USE_PROGRAM_CLASS)
add_definitions(-DEIGEN_SYCL_USE_PROGRAM_CLASS=${EIGEN_SYCL_USE_PROGRAM_CLASS})
endif()
if(EIGEN_SYCL_ASYNC_EXECUTION)
add_definitions(-DEIGEN_SYCL_ASYNC_EXECUTION=${EIGEN_SYCL_ASYNC_EXECUTION})
endif()
if(EIGEN_SYCL_DISABLE_SKINNY)
add_definitions(-DEIGEN_SYCL_DISABLE_SKINNY=${EIGEN_SYCL_DISABLE_SKINNY})
endif()
if(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER)
if(EIGEN_TEST_SYCL)
set(EIGEN_SYCL ON)
# Forward CMake options as preprocessor definitions
if(EIGEN_SYCL_USE_DEFAULT_SELECTOR)
add_definitions(-DEIGEN_SYCL_USE_DEFAULT_SELECTOR=${EIGEN_SYCL_USE_DEFAULT_SELECTOR})
endif()
if(EIGEN_SYCL_NO_LOCAL_MEM)
add_definitions(-DEIGEN_SYCL_NO_LOCAL_MEM=${EIGEN_SYCL_NO_LOCAL_MEM})
endif()
if(EIGEN_SYCL_LOCAL_MEM)
add_definitions(-DEIGEN_SYCL_LOCAL_MEM=${EIGEN_SYCL_LOCAL_MEM})
endif()
if(EIGEN_SYCL_MAX_GLOBAL_RANGE)
add_definitions(-DEIGEN_SYCL_MAX_GLOBAL_RANGE=${EIGEN_SYCL_MAX_GLOBAL_RANGE})
endif()
if(EIGEN_SYCL_LOCAL_THREAD_DIM0)
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM0=${EIGEN_SYCL_LOCAL_THREAD_DIM0})
endif()
if(EIGEN_SYCL_LOCAL_THREAD_DIM1)
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM1=${EIGEN_SYCL_LOCAL_THREAD_DIM1})
endif()
if(EIGEN_SYCL_REG_M)
add_definitions(-DEIGEN_SYCL_REG_M=${EIGEN_SYCL_REG_M})
endif()
if(EIGEN_SYCL_REG_N)
add_definitions(-DEIGEN_SYCL_REG_N=${EIGEN_SYCL_REG_N})
endif()
if(EIGEN_SYCL_USE_PROGRAM_CLASS)
add_definitions(-DEIGEN_SYCL_USE_PROGRAM_CLASS=${EIGEN_SYCL_USE_PROGRAM_CLASS})
endif()
if(EIGEN_SYCL_ASYNC_EXECUTION)
add_definitions(-DEIGEN_SYCL_ASYNC_EXECUTION=${EIGEN_SYCL_ASYNC_EXECUTION})
endif()
if(EIGEN_SYCL_DISABLE_SKINNY)
add_definitions(-DEIGEN_SYCL_DISABLE_SKINNY=${EIGEN_SYCL_DISABLE_SKINNY})
endif()
if(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER)
add_definitions(-DEIGEN_SYCL_DISABLE_DOUBLE_BUFFER=${EIGEN_SYCL_DISABLE_DOUBLE_BUFFER})
endif()
if(EIGEN_SYCL_DISABLE_RANK1)
add_definitions(-DEIGEN_SYCL_DISABLE_RANK1=${EIGEN_SYCL_DISABLE_RANK1})
endif()
if(EIGEN_SYCL_DISABLE_SCALAR)
add_definitions(-DEIGEN_SYCL_DISABLE_SCALAR=${EIGEN_SYCL_DISABLE_SCALAR})
endif()
if(EIGEN_SYCL_DISABLE_GEMV)
add_definitions(-DEIGEN_SYCL_DISABLE_GEMV=${EIGEN_SYCL_DISABLE_GEMV})
endif()
if(EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION)
add_definitions(-DEIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=${EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION})
endif()
if(EIGEN_SYCL_TRISYCL)
# triSYCL now requires c++17.
set(CMAKE_CXX_STANDARD 17)
else()
if(MSVC)
# Set the host and device compilers C++ standard to C++14. On Windows setting this to C++11
# can cause issues with the ComputeCpp device compiler parsing Visual Studio Headers.
set(CMAKE_CXX_STANDARD 14)
list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
else()
set(CMAKE_CXX_STANDARD 11)
list(APPEND COMPUTECPP_USER_FLAGS -Wall)
endif()
# The following flags are not supported by Clang and can cause warnings
# if used with -Werror so they are removed here.
if(COMPUTECPP_USE_COMPILER_DRIVER)
set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
endif()
list(APPEND COMPUTECPP_USER_FLAGS
-DEIGEN_NO_ASSERTION_CHECKING=1
-no-serial-memop
-Xclang
-cl-mad-enable)
endif()
ei_add_test(cxx11_tensor_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_image_op_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_math_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_forced_eval_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_broadcast_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_device_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_reduction_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_morphing_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_shuffling_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_padding_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_builtins_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_contract_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_concatenation_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_reverse_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_convolution_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_striding_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_chipping_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_layout_swap_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_inflation_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_random_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_generator_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_patch_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_image_patch_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_volume_patch_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_argmax_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_custom_op_sycl ${STD_CXX_FLAG})
ei_add_test(cxx11_tensor_scan_sycl ${STD_CXX_FLAG})
set(EIGEN_SYCL OFF)
if(EIGEN_SYCL_DISABLE_RANK1)
add_definitions(-DEIGEN_SYCL_DISABLE_RANK1=${EIGEN_SYCL_DISABLE_RANK1})
endif()
if(EIGEN_SYCL_DISABLE_SCALAR)
add_definitions(-DEIGEN_SYCL_DISABLE_SCALAR=${EIGEN_SYCL_DISABLE_SCALAR})
endif()
if(EIGEN_SYCL_DISABLE_GEMV)
add_definitions(-DEIGEN_SYCL_DISABLE_GEMV=${EIGEN_SYCL_DISABLE_GEMV})
endif()
if(EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION)
add_definitions(-DEIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=${EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION})
endif()
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
if(EIGEN_SYCL_TRISYCL)
# triSYCL now requires c++17.
set(CMAKE_CXX_STANDARD 17)
else()
if(MSVC)
list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
else()
list(APPEND COMPUTECPP_USER_FLAGS -Wall)
endif()
# The following flags are not supported by Clang and can cause warnings
# if used with -Werror so they are removed here.
if(COMPUTECPP_USE_COMPILER_DRIVER)
set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
endif()
list(APPEND COMPUTECPP_USER_FLAGS
-DEIGEN_NO_ASSERTION_CHECKING=1
-no-serial-memop
-Xclang
-cl-mad-enable)
endif()
ei_add_test(cxx11_meta)
ei_add_test(cxx11_maxsizevector)
ei_add_test(cxx11_tensor_argmax)
ei_add_test(cxx11_tensor_assign)
ei_add_test(cxx11_tensor_block_access)
ei_add_test(cxx11_tensor_block_eval)
ei_add_test(cxx11_tensor_block_io)
ei_add_test(cxx11_tensor_broadcasting)
ei_add_test(cxx11_tensor_casts)
ei_add_test(cxx11_tensor_chipping)
ei_add_test(cxx11_tensor_comparisons)
ei_add_test(cxx11_tensor_concatenation)
ei_add_test(cxx11_tensor_const)
ei_add_test(cxx11_tensor_contraction)
ei_add_test(cxx11_tensor_convolution)
ei_add_test(cxx11_tensor_custom_index)
ei_add_test(cxx11_tensor_custom_op)
ei_add_test(cxx11_tensor_dimension)
ei_add_test(cxx11_tensor_empty)
ei_add_test(cxx11_tensor_executor "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_expr)
ei_add_test(cxx11_tensor_fft)
ei_add_test(cxx11_tensor_fixed_size)
ei_add_test(cxx11_tensor_forced_eval)
ei_add_test(cxx11_tensor_generator)
ei_add_test(cxx11_tensor_ifft)
ei_add_test(cxx11_tensor_image_patch)
ei_add_test(cxx11_tensor_index_list)
ei_add_test(cxx11_tensor_inflation)
ei_add_test(cxx11_tensor_intdiv)
ei_add_test(cxx11_tensor_io)
ei_add_test(cxx11_tensor_layout_swap)
ei_add_test(cxx11_tensor_lvalue)
ei_add_test(cxx11_tensor_map)
ei_add_test(cxx11_tensor_math)
ei_add_test(cxx11_tensor_mixed_indices)
ei_add_test(cxx11_tensor_morphing)
ei_add_test(cxx11_tensor_move)
ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_of_complex)
ei_add_test(cxx11_tensor_of_const_values)
ei_add_test(cxx11_tensor_of_strings)
ei_add_test(cxx11_tensor_padding)
ei_add_test(cxx11_tensor_patch)
ei_add_test(cxx11_tensor_random)
ei_add_test(cxx11_tensor_reduction)
ei_add_test(cxx11_tensor_ref)
ei_add_test(cxx11_tensor_roundings)
ei_add_test(cxx11_tensor_scan)
ei_add_test(cxx11_tensor_shuffling)
ei_add_test(cxx11_tensor_simple)
ei_add_test(cxx11_tensor_striding)
ei_add_test(cxx11_tensor_sugar)
ei_add_test(cxx11_tensor_thread_local "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_trace)
ei_add_test(cxx11_tensor_volume_patch)
ei_add_test(cxx11_tensor_sycl)
ei_add_test(cxx11_tensor_image_op_sycl)
ei_add_test(cxx11_tensor_math_sycl)
ei_add_test(cxx11_tensor_forced_eval_sycl)
ei_add_test(cxx11_tensor_broadcast_sycl)
ei_add_test(cxx11_tensor_device_sycl)
ei_add_test(cxx11_tensor_reduction_sycl)
ei_add_test(cxx11_tensor_morphing_sycl)
ei_add_test(cxx11_tensor_shuffling_sycl)
ei_add_test(cxx11_tensor_padding_sycl)
ei_add_test(cxx11_tensor_builtins_sycl)
ei_add_test(cxx11_tensor_contract_sycl)
ei_add_test(cxx11_tensor_concatenation_sycl)
ei_add_test(cxx11_tensor_reverse_sycl)
ei_add_test(cxx11_tensor_convolution_sycl)
ei_add_test(cxx11_tensor_striding_sycl)
ei_add_test(cxx11_tensor_chipping_sycl)
ei_add_test(cxx11_tensor_layout_swap_sycl)
ei_add_test(cxx11_tensor_inflation_sycl)
ei_add_test(cxx11_tensor_random_sycl)
ei_add_test(cxx11_tensor_generator_sycl)
ei_add_test(cxx11_tensor_patch_sycl)
ei_add_test(cxx11_tensor_image_patch_sycl)
ei_add_test(cxx11_tensor_volume_patch_sycl)
ei_add_test(cxx11_tensor_argmax_sycl)
ei_add_test(cxx11_tensor_custom_op_sycl)
ei_add_test(cxx11_tensor_scan_sycl)
set(EIGEN_SYCL OFF)
endif()
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_meta)
ei_add_test(cxx11_maxsizevector)
ei_add_test(cxx11_tensor_argmax)
ei_add_test(cxx11_tensor_assign)
ei_add_test(cxx11_tensor_block_access)
ei_add_test(cxx11_tensor_block_eval)
ei_add_test(cxx11_tensor_block_io)
ei_add_test(cxx11_tensor_broadcasting)
ei_add_test(cxx11_tensor_casts)
ei_add_test(cxx11_tensor_chipping)
ei_add_test(cxx11_tensor_comparisons)
ei_add_test(cxx11_tensor_concatenation)
ei_add_test(cxx11_tensor_const)
ei_add_test(cxx11_tensor_contraction)
ei_add_test(cxx11_tensor_convolution)
ei_add_test(cxx11_tensor_custom_index)
ei_add_test(cxx11_tensor_custom_op)
ei_add_test(cxx11_tensor_dimension)
ei_add_test(cxx11_tensor_empty)
ei_add_test(cxx11_tensor_executor "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_expr)
ei_add_test(cxx11_tensor_fft)
ei_add_test(cxx11_tensor_fixed_size)
ei_add_test(cxx11_tensor_forced_eval)
ei_add_test(cxx11_tensor_generator)
ei_add_test(cxx11_tensor_ifft)
ei_add_test(cxx11_tensor_image_patch)
ei_add_test(cxx11_tensor_index_list)
ei_add_test(cxx11_tensor_inflation)
ei_add_test(cxx11_tensor_intdiv)
ei_add_test(cxx11_tensor_io)
ei_add_test(cxx11_tensor_layout_swap)
ei_add_test(cxx11_tensor_lvalue)
ei_add_test(cxx11_tensor_map)
ei_add_test(cxx11_tensor_math)
ei_add_test(cxx11_tensor_mixed_indices)
ei_add_test(cxx11_tensor_morphing)
ei_add_test(cxx11_tensor_move)
ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_of_complex)
ei_add_test(cxx11_tensor_of_const_values)
ei_add_test(cxx11_tensor_of_strings)
ei_add_test(cxx11_tensor_padding)
ei_add_test(cxx11_tensor_patch)
ei_add_test(cxx11_tensor_random)
ei_add_test(cxx11_tensor_reduction)
ei_add_test(cxx11_tensor_ref)
ei_add_test(cxx11_tensor_roundings)
ei_add_test(cxx11_tensor_scan)
ei_add_test(cxx11_tensor_shuffling)
ei_add_test(cxx11_tensor_simple)
ei_add_test(cxx11_tensor_striding)
ei_add_test(cxx11_tensor_sugar)
ei_add_test(cxx11_tensor_thread_local "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
ei_add_test(cxx11_tensor_trace)
ei_add_test(cxx11_tensor_volume_patch)
# ei_add_test(cxx11_tensor_symmetry)
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
# This test requires __uint128_t which is only available on 64bit systems
ei_add_test(cxx11_tensor_uint128)
endif()
endif()
# These tests needs nvcc
find_package(CUDA 7.0)
find_package(CUDA 9.0)
if(CUDA_FOUND AND EIGEN_TEST_CUDA)
# Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
# and -fno-check-new flags since they trigger thousands of compilation warnings
# in the CUDA runtime
# Also remove -ansi that is incompatible with std=c++11.
string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
endif()
if(EIGEN_TEST_CUDA_CLANG)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}")
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${ARCH}")
endforeach()
string(APPEND CMAKE_CXX_FLAGS " ${EIGEN_CUDA_CXX_FLAGS}")
else()
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
set(NVCC_ARCH_FLAGS)
# Define an -arch=sm_<arch>, otherwise if GPU does not exactly match one of
# those in the arch list for -gencode, the kernels will fail to run with
# cudaErrorNoKernelImageForDevice
# This can happen with newer cards (e.g. sm_75) and compiling with older
# versions of nvcc (e.g. 9.2) that do not support their specific arch.
list(LENGTH EIGEN_CUDA_COMPUTE_ARCH EIGEN_CUDA_COMPUTE_ARCH_SIZE)
if(EIGEN_CUDA_COMPUTE_ARCH_SIZE)
list(GET EIGEN_CUDA_COMPUTE_ARCH 0 EIGEN_CUDA_COMPUTE_DEFAULT)
set(NVCC_ARCH_FLAGS " -arch=sm_${EIGEN_CUDA_COMPUTE_DEFAULT}")
endif()
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
endforeach()
set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
endif()
set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr")
if (${CUDA_VERSION} STREQUAL "7.0")
set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
endif()
set(NVCC_ARCH_FLAGS)
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
endforeach()
set(CUDA_NVCC_FLAGS "${EIGEN_CUDA_RELAXED_CONSTEXPR} -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS}")
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
ei_add_test(cxx11_tensor_complex_gpu)
@@ -355,7 +357,6 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
ei_add_test(cxx11_tensor_random_gpu)
endif()
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
endif()
@@ -365,52 +366,46 @@ if (EIGEN_TEST_HIP)
set(HIP_PATH "/opt/rocm/hip" CACHE STRING "Path to the HIP installation.")
if (EXISTS ${HIP_PATH})
list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake)
find_package(HIP REQUIRED)
if (HIP_FOUND)
execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM)
if ((${HIP_PLATFORM} STREQUAL "hcc") OR (${HIP_PLATFORM} STREQUAL "amd"))
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${HIP_PATH}/include)
include_directories(${CMAKE_CURRENT_BINARY_DIR})
include_directories(${HIP_PATH}/include)
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
#
# complex datatype is not yet supported by HIP
# so leaving out those tests for now
#
# ei_add_test(cxx11_tensor_complex_gpu)
# ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
#
ei_add_test(cxx11_tensor_reduction_gpu)
ei_add_test(cxx11_tensor_argmax_gpu)
ei_add_test(cxx11_tensor_cast_float16_gpu)
ei_add_test(cxx11_tensor_scan_gpu)
ei_add_test(cxx11_tensor_device)
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
#
# complex datatype is not yet supported by HIP
# so leaving out those tests for now
#
# ei_add_test(cxx11_tensor_complex_gpu)
# ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
#
ei_add_test(cxx11_tensor_reduction_gpu)
ei_add_test(cxx11_tensor_argmax_gpu)
ei_add_test(cxx11_tensor_cast_float16_gpu)
ei_add_test(cxx11_tensor_scan_gpu)
ei_add_test(cxx11_tensor_device)
ei_add_test(cxx11_tensor_gpu)
ei_add_test(cxx11_tensor_contract_gpu)
ei_add_test(cxx11_tensor_of_float16_gpu)
ei_add_test(cxx11_tensor_of_bfloat16_gpu)
ei_add_test(cxx11_tensor_random_gpu)
ei_add_test(cxx11_tensor_gpu)
ei_add_test(cxx11_tensor_contract_gpu)
ei_add_test(cxx11_tensor_of_float16_gpu)
ei_add_test(cxx11_tensor_random_gpu)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
elseif ((${HIP_PLATFORM} STREQUAL "nvcc") OR (${HIP_PLATFORM} STREQUAL "nvidia"))
message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
else ()
message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
endif()
endif()
else ()
message(FATAL_ERROR "EIGEN_TEST_HIP is ON, but the specified HIP_PATH (${HIP_PATH}) does not exist")
endif()
endif()

View File

@@ -1,2 +1,2 @@
#define test_FFTW test_FFT
#include "FFTW.cpp"
#define EIGEN_FFT_DEFAULT 1
#include "fft_test_shared.h"

View File

@@ -1,262 +1,2 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Mark Borgerding mark a borgerding net
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <unsupported/Eigen/FFT>
template <typename T>
std::complex<T> RandomCpx() { return std::complex<T>( (T)(rand()/(T)RAND_MAX - .5), (T)(rand()/(T)RAND_MAX - .5) ); }
using namespace std;
using namespace Eigen;
template < typename T>
complex<long double> promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); }
complex<long double> promote(float x) { return complex<long double>((long double)x); }
complex<long double> promote(double x) { return complex<long double>((long double)x); }
complex<long double> promote(long double x) { return complex<long double>((long double)x); }
template <typename VT1,typename VT2>
long double fft_rmse( const VT1 & fftbuf,const VT2 & timebuf)
{
long double totalpower=0;
long double difpower=0;
long double pi = acos((long double)-1 );
for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) {
complex<long double> acc = 0;
long double phinc = (long double)(-2.)*k0* pi / timebuf.size();
for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
acc += promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
}
totalpower += numext::abs2(acc);
complex<long double> x = promote(fftbuf[k0]);
complex<long double> dif = acc - x;
difpower += numext::abs2(dif);
//cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(numext::abs2(dif)) << endl;
}
cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
return sqrt(difpower/totalpower);
}
template <typename VT1,typename VT2>
long double dif_rmse( const VT1 buf1,const VT2 buf2)
{
long double totalpower=0;
long double difpower=0;
size_t n = (min)( buf1.size(),buf2.size() );
for (size_t k=0;k<n;++k) {
totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2);
difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
}
return sqrt(difpower/totalpower);
}
enum { StdVectorContainer, EigenVectorContainer };
template<int Container, typename Scalar> struct VectorType;
template<typename Scalar> struct VectorType<StdVectorContainer,Scalar>
{
typedef vector<Scalar> type;
};
template<typename Scalar> struct VectorType<EigenVectorContainer,Scalar>
{
typedef Matrix<Scalar,Dynamic,1> type;
};
template <int Container, typename T>
void test_scalar_generic(int nfft)
{
typedef typename FFT<T>::Complex Complex;
typedef typename FFT<T>::Scalar Scalar;
typedef typename VectorType<Container,Scalar>::type ScalarVector;
typedef typename VectorType<Container,Complex>::type ComplexVector;
FFT<T> fft;
ScalarVector tbuf(nfft);
ComplexVector freqBuf;
for (int k=0;k<nfft;++k)
tbuf[k]= (T)( rand()/(double)RAND_MAX - .5);
// make sure it DOESN'T give the right full spectrum answer
// if we've asked for half-spectrum
fft.SetFlag(fft.HalfSpectrum );
fft.fwd( freqBuf,tbuf);
VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) );
VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
fft.ClearFlag(fft.HalfSpectrum );
fft.fwd( freqBuf,tbuf);
VERIFY( (size_t)freqBuf.size() == (size_t)nfft);
VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
if (nfft&1)
return; // odd FFTs get the wrong size inverse FFT
ScalarVector tbuf2;
fft.inv( tbuf2 , freqBuf);
VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
// verify that the Unscaled flag takes effect
ScalarVector tbuf3;
fft.SetFlag(fft.Unscaled);
fft.inv( tbuf3 , freqBuf);
for (int k=0;k<nfft;++k)
tbuf3[k] *= T(1./nfft);
//for (size_t i=0;i<(size_t) tbuf.size();++i)
// cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " - in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) << endl;
VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>() );// gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv( tbuf2 , freqBuf);
VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
}
template <typename T>
void test_scalar(int nfft)
{
test_scalar_generic<StdVectorContainer,T>(nfft);
//test_scalar_generic<EigenVectorContainer,T>(nfft);
}
template <int Container, typename T>
void test_complex_generic(int nfft)
{
typedef typename FFT<T>::Complex Complex;
typedef typename VectorType<Container,Complex>::type ComplexVector;
FFT<T> fft;
ComplexVector inbuf(nfft);
ComplexVector outbuf;
ComplexVector buf3;
for (int k=0;k<nfft;++k)
inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
fft.fwd( outbuf , inbuf);
VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>() );// gross check
fft.inv( buf3 , outbuf);
VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
// verify that the Unscaled flag takes effect
ComplexVector buf4;
fft.SetFlag(fft.Unscaled);
fft.inv( buf4 , outbuf);
for (int k=0;k<nfft;++k)
buf4[k] *= T(1./nfft);
VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>() );// gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv( buf3 , outbuf);
VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
}
template <typename T>
void test_complex(int nfft)
{
test_complex_generic<StdVectorContainer,T>(nfft);
test_complex_generic<EigenVectorContainer,T>(nfft);
}
/*
template <typename T,int nrows,int ncols>
void test_complex2d()
{
typedef typename Eigen::FFT<T>::Complex Complex;
FFT<T> fft;
Eigen::Matrix<Complex,nrows,ncols> src,src2,dst,dst2;
src = Eigen::Matrix<Complex,nrows,ncols>::Random();
//src = Eigen::Matrix<Complex,nrows,ncols>::Identity();
for (int k=0;k<ncols;k++) {
Eigen::Matrix<Complex,nrows,1> tmpOut;
fft.fwd( tmpOut,src.col(k) );
dst2.col(k) = tmpOut;
}
for (int k=0;k<nrows;k++) {
Eigen::Matrix<Complex,1,ncols> tmpOut;
fft.fwd( tmpOut, dst2.row(k) );
dst2.row(k) = tmpOut;
}
fft.fwd2(dst.data(),src.data(),ncols,nrows);
fft.inv2(src2.data(),dst.data(),ncols,nrows);
VERIFY( (src-src2).norm() < test_precision<T>() );
VERIFY( (dst-dst2).norm() < test_precision<T>() );
}
*/
void test_return_by_value(int len)
{
VectorXf in;
VectorXf in1;
in.setRandom( len );
VectorXcf out1,out2;
FFT<float> fft;
fft.SetFlag(fft.HalfSpectrum );
fft.fwd(out1,in);
out2 = fft.fwd(in);
VERIFY( (out1-out2).norm() < test_precision<float>() );
in1 = fft.inv(out1);
VERIFY( (in1-in).norm() < test_precision<float>() );
}
EIGEN_DECLARE_TEST(FFTW)
{
CALL_SUBTEST( test_return_by_value(32) );
//CALL_SUBTEST( ( test_complex2d<float,4,8> () ) ); CALL_SUBTEST( ( test_complex2d<double,4,8> () ) );
//CALL_SUBTEST( ( test_complex2d<long double,4,8> () ) );
CALL_SUBTEST( test_complex<float>(32) ); CALL_SUBTEST( test_complex<double>(32) );
CALL_SUBTEST( test_complex<float>(256) ); CALL_SUBTEST( test_complex<double>(256) );
CALL_SUBTEST( test_complex<float>(3*8) ); CALL_SUBTEST( test_complex<double>(3*8) );
CALL_SUBTEST( test_complex<float>(5*32) ); CALL_SUBTEST( test_complex<double>(5*32) );
CALL_SUBTEST( test_complex<float>(2*3*4) ); CALL_SUBTEST( test_complex<double>(2*3*4) );
CALL_SUBTEST( test_complex<float>(2*3*4*5) ); CALL_SUBTEST( test_complex<double>(2*3*4*5) );
CALL_SUBTEST( test_complex<float>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<double>(2*3*4*5*7) );
CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) );
CALL_SUBTEST( test_scalar<float>(45) ); CALL_SUBTEST( test_scalar<double>(45) );
CALL_SUBTEST( test_scalar<float>(50) ); CALL_SUBTEST( test_scalar<double>(50) );
CALL_SUBTEST( test_scalar<float>(256) ); CALL_SUBTEST( test_scalar<double>(256) );
CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) );
#ifdef EIGEN_HAS_FFTWL
CALL_SUBTEST( test_complex<long double>(32) );
CALL_SUBTEST( test_complex<long double>(256) );
CALL_SUBTEST( test_complex<long double>(3*8) );
CALL_SUBTEST( test_complex<long double>(5*32) );
CALL_SUBTEST( test_complex<long double>(2*3*4) );
CALL_SUBTEST( test_complex<long double>(2*3*4*5) );
CALL_SUBTEST( test_complex<long double>(2*3*4*5*7) );
CALL_SUBTEST( test_scalar<long double>(32) );
CALL_SUBTEST( test_scalar<long double>(45) );
CALL_SUBTEST( test_scalar<long double>(50) );
CALL_SUBTEST( test_scalar<long double>(256) );
CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
#endif
}
#define EIGEN_FFTW_DEFAULT 1
#include "fft_test_shared.h"

View File

@@ -0,0 +1,472 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) Essex Edwards <essex.edwards@gmail.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_RUNTIME_NO_MALLOC
#include "main.h"
#include <unsupported/Eigen/NNLS>
/// Check that 'x' solves the NNLS optimization problem `min ||A*x-b|| s.t. 0 <= x`.
/// The \p tolerance parameter is the absolute tolerance on the gradient, A'*(A*x-b).
template <typename MatrixType, typename VectorB, typename VectorX, typename Scalar>
static void verify_nnls_optimality(const MatrixType &A, const VectorB &b, const VectorX &x, const Scalar tolerance) {
// The NNLS optimality conditions are:
//
// * 0 = A'*A*x - A'*b - lambda
// * 0 <= x[i] \forall i
// * 0 <= lambda[i] \forall i
// * 0 = x[i]*lambda[i] \forall i
//
// we don't know lambda, but by assuming the first optimality condition is true,
// we can derive it and then check the others conditions.
const VectorX lambda = A.transpose() * (A * x - b);
// NNLS solutions are EXACTLY not negative.
VERIFY_LE(0, x.minCoeff());
// Exact lambda would be non-negative, but computed lambda might leak a little
VERIFY_LE(-tolerance, lambda.minCoeff());
// x[i]*lambda[i] == 0 <~~> (x[i]==0) || (lambda[i] is small)
VERIFY(((x.array() == Scalar(0)) || (lambda.array() <= tolerance)).all());
}
template <typename MatrixType, typename VectorB, typename VectorX>
static void test_nnls_known_solution(const MatrixType &A, const VectorB &b, const VectorX &x_expected) {
using Scalar = typename MatrixType::Scalar;
using std::sqrt;
const Scalar tolerance = sqrt(Eigen::GenericNumTraits<Scalar>::epsilon());
Index max_iter = 5 * A.cols(); // A heuristic guess.
NNLS<MatrixType> nnls(A, max_iter, tolerance);
const VectorX x = nnls.solve(b);
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
VERIFY_IS_APPROX(x, x_expected);
verify_nnls_optimality(A, b, x, tolerance);
}
template <typename MatrixType>
static void test_nnls_random_problem() {
//
// SETUP
//
Index cols = MatrixType::ColsAtCompileTime;
if (cols == Dynamic) cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
Index rows = MatrixType::RowsAtCompileTime;
if (rows == Dynamic) rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
VERIFY_LE(cols, rows); // To have a unique LS solution: cols <= rows.
// Make some sort of random test problem from a wide range of scales and condition numbers.
using std::pow;
using Scalar = typename MatrixType::Scalar;
const Scalar sqrtConditionNumber = pow(Scalar(10), internal::random<Scalar>(Scalar(0), Scalar(2)));
const Scalar scaleA = pow(Scalar(10), internal::random<Scalar>(Scalar(-3), Scalar(3)));
const Scalar minSingularValue = scaleA / sqrtConditionNumber;
const Scalar maxSingularValue = scaleA * sqrtConditionNumber;
MatrixType A(rows, cols);
generateRandomMatrixSvs(setupRangeSvs<Matrix<Scalar, Dynamic, 1>>(cols, minSingularValue, maxSingularValue), rows,
cols, A);
// Make a random RHS also with a random scaling.
using VectorB = decltype(A.col(0).eval());
const Scalar scaleB = pow(Scalar(10), internal::random<Scalar>(Scalar(-3), Scalar(3)));
const VectorB b = scaleB * VectorB::Random(A.rows());
//
// ACT
//
using Scalar = typename MatrixType::Scalar;
using std::sqrt;
const Scalar tolerance =
sqrt(Eigen::GenericNumTraits<Scalar>::epsilon()) * b.cwiseAbs().maxCoeff() * A.cwiseAbs().maxCoeff();
Index max_iter = 5 * A.cols(); // A heuristic guess.
NNLS<MatrixType> nnls(A, max_iter, tolerance);
const typename NNLS<MatrixType>::SolutionVectorType &x = nnls.solve(b);
//
// VERIFY
//
// In fact, NNLS can fail on some problems, but they are rare in practice.
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
verify_nnls_optimality(A, b, x, tolerance);
}
static void test_nnls_handles_zero_rhs() {
//
// SETUP
//
const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
const MatrixXd A = MatrixXd::Random(rows, cols);
const VectorXd b = VectorXd::Zero(rows);
//
// ACT
//
NNLS<MatrixXd> nnls(A);
const VectorXd x = nnls.solve(b);
//
// VERIFY
//
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
VERIFY_LE(nnls.iterations(), 1); // 0 or 1 would be be fine for an edge case like this.
VERIFY_IS_EQUAL(x, VectorXd::Zero(cols));
}
static void test_nnls_handles_Mx0_matrix() {
//
// SETUP
//
const Index rows = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
const MatrixXd A(rows, 0);
const VectorXd b = VectorXd::Random(rows);
//
// ACT
//
NNLS<MatrixXd> nnls(A);
const VectorXd x = nnls.solve(b);
//
// VERIFY
//
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
VERIFY_LE(nnls.iterations(), 0);
VERIFY_IS_EQUAL(x.size(), 0);
}
static void test_nnls_handles_0x0_matrix() {
//
// SETUP
//
const MatrixXd A(0, 0);
const VectorXd b(0);
//
// ACT
//
NNLS<MatrixXd> nnls(A);
const VectorXd x = nnls.solve(b);
//
// VERIFY
//
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
VERIFY_LE(nnls.iterations(), 0);
VERIFY_IS_EQUAL(x.size(), 0);
}
static void test_nnls_handles_dependent_columns() {
//
// SETUP
//
const Index rank = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE / 2);
const Index cols = 2 * rank;
const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
const MatrixXd A = MatrixXd::Random(rows, rank) * MatrixXd::Random(rank, cols);
const VectorXd b = VectorXd::Random(rows);
//
// ACT
//
const double tolerance = 1e-8;
NNLS<MatrixXd> nnls(A);
const VectorXd &x = nnls.solve(b);
//
// VERIFY
//
// What should happen when the input 'A' has dependent columns?
// We might still succeed. Or we might not converge.
// Either outcome is fine. If Success is indicated,
// then 'x' must actually be a solution vector.
if (nnls.info() == ComputationInfo::Success) {
verify_nnls_optimality(A, b, x, tolerance);
}
}
static void test_nnls_handles_wide_matrix() {
//
// SETUP
//
const Index cols = internal::random<Index>(2, EIGEN_TEST_MAX_SIZE);
const Index rows = internal::random<Index>(2, cols - 1);
const MatrixXd A = MatrixXd::Random(rows, cols);
const VectorXd b = VectorXd::Random(rows);
//
// ACT
//
const double tolerance = 1e-8;
NNLS<MatrixXd> nnls(A);
const VectorXd &x = nnls.solve(b);
//
// VERIFY
//
// What should happen when the input 'A' is wide?
// The unconstrained least-squares problem has infinitely many solutions.
// Subject the the non-negativity constraints,
// the solution might actually be unique (e.g. it is [0,0,..,0]).
// So, NNLS might succeed or it might fail.
// Either outcome is fine. If Success is indicated,
// then 'x' must actually be a solution vector.
if (nnls.info() == ComputationInfo::Success) {
verify_nnls_optimality(A, b, x, tolerance);
}
}
// 4x2 problem, unconstrained solution positive
static void test_nnls_known_1() {
Matrix<double, 4, 2> A(4, 2);
Matrix<double, 4, 1> b(4);
Matrix<double, 2, 1> x(2);
A << 1, 1, 2, 4, 3, 9, 4, 16;
b << 0.6, 2.2, 4.8, 8.4;
x << 0.1, 0.5;
return test_nnls_known_solution(A, b, x);
}
// 4x3 problem, unconstrained solution positive
static void test_nnls_known_2() {
Matrix<double, 4, 3> A(4, 3);
Matrix<double, 4, 1> b(4);
Matrix<double, 3, 1> x(3);
A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
b << 0.73, 3.24, 8.31, 16.72;
x << 0.1, 0.5, 0.13;
test_nnls_known_solution(A, b, x);
}
// Simple 4x4 problem, unconstrained solution non-negative
static void test_nnls_known_3() {
Matrix<double, 4, 4> A(4, 4);
Matrix<double, 4, 1> b(4);
Matrix<double, 4, 1> x(4);
A << 1, 1, 1, 1, 2, 4, 8, 16, 3, 9, 27, 81, 4, 16, 64, 256;
b << 0.73, 3.24, 8.31, 16.72;
x << 0.1, 0.5, 0.13, 0;
test_nnls_known_solution(A, b, x);
}
// Simple 4x3 problem, unconstrained solution non-negative
static void test_nnls_known_4() {
Matrix<double, 4, 3> A(4, 3);
Matrix<double, 4, 1> b(4);
Matrix<double, 3, 1> x(3);
A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
b << 0.23, 1.24, 3.81, 8.72;
x << 0.1, 0, 0.13;
test_nnls_known_solution(A, b, x);
}
// Simple 4x3 problem, unconstrained solution indefinite
static void test_nnls_known_5() {
Matrix<double, 4, 3> A(4, 3);
Matrix<double, 4, 1> b(4);
Matrix<double, 3, 1> x(3);
A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
b << 0.13, 0.84, 2.91, 7.12;
// Solution obtained by original nnls() implementation in Fortran
x << 0.0, 0.0, 0.1106544;
test_nnls_known_solution(A, b, x);
}
static void test_nnls_small_reference_problems() {
test_nnls_known_1();
test_nnls_known_2();
test_nnls_known_3();
test_nnls_known_4();
test_nnls_known_5();
}
static void test_nnls_with_half_precision() {
// The random matrix generation tools don't work with `half`,
// so here's a simpler setup mostly just to check that NNLS compiles & runs with custom scalar types.
using Mat = Matrix<half, 8, 2>;
using VecB = Matrix<half, 8, 1>;
using VecX = Matrix<half, 2, 1>;
Mat A = Mat::Random(); // full-column rank with high probability.
VecB b = VecB::Random();
NNLS<Mat> nnls(A, 20, half(1e-2f));
const VecX x = nnls.solve(b);
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
verify_nnls_optimality(A, b, x, half(1e-1));
}
static void test_nnls_special_case_solves_in_zero_iterations() {
// The particular NNLS algorithm that is implemented starts with all variables
// in the active set.
// This test builds a system where all constraints are active at the solution,
// so that initial guess is already correct.
//
// If the implementation changes to another algorithm that does not have this property,
// then this test will need to change (e.g. starting from all constraints inactive,
// or using ADMM, or an interior point solver).
const Index n = 10;
const Index m = 3 * n;
const VectorXd b = VectorXd::Random(m);
// With high probability, this is full column rank, which we need for uniqueness.
MatrixXd A = MatrixXd::Random(m, n);
// Make every column of `A` such that adding it to the active set only /increases/ the objective,
// this ensuring the NNLS solution is all zeros.
const VectorXd alignment = -(A.transpose() * b).cwiseSign();
A = A * alignment.asDiagonal();
NNLS<MatrixXd> nnls(A);
nnls.solve(b);
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
VERIFY(nnls.iterations() == 0);
}
static void test_nnls_special_case_solves_in_n_iterations() {
// The particular NNLS algorithm that is implemented starts with all variables
// in the active set and then adds one variable to the inactive set each iteration.
// This test builds a system where all variables are inactive at the solution,
// so it should take 'n' iterations to get there.
//
// If the implementation changes to another algorithm that does not have this property,
// then this test will need to change (e.g. starting from all constraints inactive,
// or using ADMM, or an interior point solver).
const Index n = 10;
const Index m = 3 * n;
// With high probability, this is full column rank, which we need for uniqueness.
const MatrixXd A = MatrixXd::Random(m, n);
const VectorXd x = VectorXd::Random(n).cwiseAbs().array() + 1; // all positive.
const VectorXd b = A * x;
NNLS<MatrixXd> nnls(A);
nnls.solve(b);
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
VERIFY(nnls.iterations() == n);
}
static void test_nnls_returns_NoConvergence_when_maxIterations_is_too_low() {
// Using the special case that takes `n` iterations,
// from `test_nnls_special_case_solves_in_n_iterations`,
// we can set max iterations too low and that should cause the solve to fail.
const Index n = 10;
const Index m = 3 * n;
// With high probability, this is full column rank, which we need for uniqueness.
const MatrixXd A = MatrixXd::Random(m, n);
const VectorXd x = VectorXd::Random(n).cwiseAbs().array() + 1; // all positive.
const VectorXd b = A * x;
NNLS<MatrixXd> nnls(A);
const Index max_iters = n - 1;
nnls.setMaxIterations(max_iters);
nnls.solve(b);
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::NoConvergence);
VERIFY(nnls.iterations() == max_iters);
}
static void test_nnls_default_maxIterations_is_twice_column_count() {
const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
const MatrixXd A = MatrixXd::Random(rows, cols);
NNLS<MatrixXd> nnls(A);
VERIFY_IS_EQUAL(nnls.maxIterations(), 2 * cols);
}
static void test_nnls_does_not_allocate_during_solve() {
const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
const MatrixXd A = MatrixXd::Random(rows, cols);
const VectorXd b = VectorXd::Random(rows);
NNLS<MatrixXd> nnls(A);
internal::set_is_malloc_allowed(false);
nnls.solve(b);
internal::set_is_malloc_allowed(true);
}
static void test_nnls_repeated_calls_to_compute_and_solve() {
const Index cols2 = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
const Index rows2 = internal::random<Index>(cols2, EIGEN_TEST_MAX_SIZE);
const MatrixXd A2 = MatrixXd::Random(rows2, cols2);
const VectorXd b2 = VectorXd::Random(rows2);
NNLS<MatrixXd> nnls;
for (int i = 0; i < 4; ++i) {
const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
const MatrixXd A = MatrixXd::Random(rows, cols);
nnls.compute(A);
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
for (int j = 0; j < 3; ++j) {
const VectorXd b = VectorXd::Random(rows);
const VectorXd x = nnls.solve(b);
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
verify_nnls_optimality(A, b, x, 1e-4);
}
}
}
EIGEN_DECLARE_TEST(NNLS) {
// Small matrices with known solutions:
CALL_SUBTEST_1(test_nnls_small_reference_problems());
CALL_SUBTEST_1(test_nnls_handles_Mx0_matrix());
CALL_SUBTEST_1(test_nnls_handles_0x0_matrix());
for (int i = 0; i < g_repeat; i++) {
// Essential NNLS properties, across different types.
CALL_SUBTEST_2(test_nnls_random_problem<MatrixXf>());
CALL_SUBTEST_3(test_nnls_random_problem<MatrixXd>());
using MatFixed = Matrix<double, 12, 5>;
CALL_SUBTEST_4(test_nnls_random_problem<MatFixed>());
CALL_SUBTEST_5(test_nnls_with_half_precision());
// Robustness tests:
CALL_SUBTEST_6(test_nnls_handles_zero_rhs());
CALL_SUBTEST_6(test_nnls_handles_dependent_columns());
CALL_SUBTEST_6(test_nnls_handles_wide_matrix());
// Properties specific to the implementation,
// not NNLS in general.
CALL_SUBTEST_7(test_nnls_special_case_solves_in_zero_iterations());
CALL_SUBTEST_7(test_nnls_special_case_solves_in_n_iterations());
CALL_SUBTEST_7(test_nnls_returns_NoConvergence_when_maxIterations_is_too_low());
CALL_SUBTEST_7(test_nnls_default_maxIterations_is_twice_column_count());
CALL_SUBTEST_8(test_nnls_repeated_calls_to_compute_and_solve());
// This test fails. It hits allocations in HouseholderSequence.h
// test_nnls_does_not_allocate_during_solve();
}
}

View File

@@ -12,14 +12,10 @@
// It is intended to be done for this test only.
#include <Eigen/src/Core/util/DisableStupidWarnings.h>
// tolerance for chekcing number of iterations
#define LM_EVAL_COUNT_TOL 4/3
// tolerance for checking number of iterations
#define LM_EVAL_COUNT_TOL 2
#define LM_CHECK_N_ITERS(SOLVER,NFEV,NJEV) { \
++g_test_level; \
VERIFY_IS_EQUAL(SOLVER.nfev, NFEV); \
VERIFY_IS_EQUAL(SOLVER.njev, NJEV); \
--g_test_level; \
VERIFY(SOLVER.nfev <= NFEV * LM_EVAL_COUNT_TOL); \
VERIFY(SOLVER.njev <= NJEV * LM_EVAL_COUNT_TOL); \
}
@@ -113,10 +109,10 @@ void testChkder()
}
// Generic functor
template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
struct Functor
{
typedef _Scalar Scalar;
typedef Scalar_ Scalar;
enum {
InputsAtCompileTime = NX,
ValuesAtCompileTime = NY
@@ -186,9 +182,10 @@ void testLmder1()
lmder_functor functor;
LevenbergMarquardt<lmder_functor> lm(functor);
info = lm.lmder1(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 6, 5);
// check norm
@@ -214,9 +211,10 @@ void testLmder()
lmder_functor functor;
LevenbergMarquardt<lmder_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return values
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 6, 5);
// check norm
@@ -298,9 +296,10 @@ void testHybrj1()
hybrj_functor functor;
HybridNonLinearSolver<hybrj_functor> solver(functor);
info = solver.hybrj1(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(solver, 11, 1);
// check norm
@@ -332,9 +331,10 @@ void testHybrj()
solver.diag.setConstant(n, 1.);
solver.useExternalScaling = true;
info = solver.solve(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(solver, 11, 1);
// check norm
@@ -385,10 +385,11 @@ void testHybrd1()
hybrd_functor functor;
HybridNonLinearSolver<hybrd_functor> solver(functor);
info = solver.hybrd1(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(solver.nfev, 20);
// VERIFY_IS_EQUAL(info, 1);
VERIFY(solver.nfev <= 20*LM_EVAL_COUNT_TOL);
// check norm
VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
@@ -416,10 +417,11 @@ void testHybrd()
solver.diag.setConstant(n, 1.);
solver.useExternalScaling = true;
info = solver.solveNumericalDiff(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(solver.nfev, 14);
// VERIFY_IS_EQUAL(info, 1);
VERIFY(solver.nfev <= 14*LM_EVAL_COUNT_TOL);
// check norm
VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
@@ -487,9 +489,10 @@ void testLmstr1()
lmstr_functor functor;
LevenbergMarquardt<lmstr_functor> lm(functor);
info = lm.lmstr1(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 6, 5);
// check norm
@@ -515,9 +518,10 @@ void testLmstr()
lmstr_functor functor;
LevenbergMarquardt<lmstr_functor> lm(functor);
info = lm.minimizeOptimumStorage(x);
EIGEN_UNUSED_VARIABLE(info)
// check return values
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 6, 5);
// check norm
@@ -570,10 +574,11 @@ void testLmdif1()
lmdif_functor functor;
DenseIndex nfev = -1; // initialize to avoid maybe-uninitialized warning
info = LevenbergMarquardt<lmdif_functor>::lmdif1(functor, x, &nfev);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(nfev, 26);
// VERIFY_IS_EQUAL(info, 1);
VERIFY( nfev <= 26*LM_EVAL_COUNT_TOL);
// check norm
functor(x, fvec);
@@ -601,10 +606,11 @@ void testLmdif()
NumericalDiff<lmdif_functor> numDiff(functor);
LevenbergMarquardt<NumericalDiff<lmdif_functor> > lm(numDiff);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return values
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev, 26);
// VERIFY_IS_EQUAL(info, 1);
VERIFY(lm.nfev <= 26*LM_EVAL_COUNT_TOL);
// check norm
fnorm = lm.fvec.blueNorm();
@@ -686,9 +692,10 @@ void testNistChwirut2(void)
chwirut2_functor functor;
LevenbergMarquardt<chwirut2_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 10, 8);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02);
@@ -706,9 +713,10 @@ void testNistChwirut2(void)
lm.parameters.ftol = 1.E6*NumTraits<double>::epsilon();
lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 7, 6);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02);
@@ -764,9 +772,10 @@ void testNistMisra1a(void)
misra1a_functor functor;
LevenbergMarquardt<misra1a_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 19, 15);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01);
@@ -780,9 +789,10 @@ void testNistMisra1a(void)
x<< 250., 0.0005;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 5, 4);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01);
@@ -852,9 +862,10 @@ void testNistHahn1(void)
hahn1_functor functor;
LevenbergMarquardt<hahn1_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 11, 10);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00);
@@ -873,9 +884,10 @@ void testNistHahn1(void)
x<< .1, -.1, .005, -.000001, -.005, .0001, -.0000001;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 11, 10);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00);
@@ -936,9 +948,10 @@ void testNistMisra1d(void)
misra1d_functor functor;
LevenbergMarquardt<misra1d_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 3);
// VERIFY_IS_EQUAL(info, 3);
LM_CHECK_N_ITERS(lm, 9, 7);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02);
@@ -952,9 +965,10 @@ void testNistMisra1d(void)
x<< 450., 0.0003;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 4, 3);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02);
@@ -1012,13 +1026,14 @@ void testNistLanczos1(void)
lanczos1_functor functor;
LevenbergMarquardt<lanczos1_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 2);
// VERIFY_IS_EQUAL(info, 2);
LM_CHECK_N_ITERS(lm, 79, 72);
// check norm^2
std::cout.precision(30);
std::cout << lm.fvec.squaredNorm() << "\n";
// std::cout.precision(30);
// std::cout << lm.fvec.squaredNorm() << "\n";
VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
// check x
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
@@ -1034,9 +1049,10 @@ void testNistLanczos1(void)
x<< 0.5, 0.7, 3.6, 4.2, 4., 6.3;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 2);
// VERIFY_IS_EQUAL(info, 2);
LM_CHECK_N_ITERS(lm, 9, 8);
// check norm^2
VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
@@ -1098,9 +1114,10 @@ void testNistRat42(void)
rat42_functor functor;
LevenbergMarquardt<rat42_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 10, 8);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00);
@@ -1115,9 +1132,10 @@ void testNistRat42(void)
x<< 75., 2.5, 0.07;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 6, 5);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00);
@@ -1174,9 +1192,10 @@ void testNistMGH10(void)
MGH10_functor functor;
LevenbergMarquardt<MGH10_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 2);
// VERIFY_IS_EQUAL(info, 2);
LM_CHECK_N_ITERS(lm, 284, 249);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01);
@@ -1191,9 +1210,10 @@ void testNistMGH10(void)
x<< 0.02, 4000., 250.;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 3);
// VERIFY_IS_EQUAL(info, 3);
LM_CHECK_N_ITERS(lm, 126, 116);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01);
@@ -1251,9 +1271,10 @@ void testNistBoxBOD(void)
lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
lm.parameters.factor = 10.;
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 31, 25);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
@@ -1270,10 +1291,11 @@ void testNistBoxBOD(void)
lm.parameters.ftol = NumTraits<double>::epsilon();
lm.parameters.xtol = NumTraits<double>::epsilon();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 15, 14);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 20, 14);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
// check x
@@ -1331,6 +1353,7 @@ void testNistMGH17(void)
lm.parameters.xtol = NumTraits<double>::epsilon();
lm.parameters.maxfev = 1000;
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
@@ -1342,7 +1365,7 @@ void testNistMGH17(void)
VERIFY_IS_APPROX(x[4], 2.2122699662E-02);
// check return value
VERIFY_IS_EQUAL(info, 2);
// VERIFY_IS_EQUAL(info, 2);
LM_CHECK_N_ITERS(lm, 602, 545);
/*
@@ -1352,9 +1375,10 @@ void testNistMGH17(void)
// do the computation
lm.resetParameters();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 18, 15);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
@@ -1417,9 +1441,10 @@ void testNistMGH09(void)
LevenbergMarquardt<MGH09_functor> lm(functor);
lm.parameters.maxfev = 1000;
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 490, 376);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04);
@@ -1436,9 +1461,10 @@ void testNistMGH09(void)
// do the computation
lm.resetParameters();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 18, 16);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04);
@@ -1501,9 +1527,10 @@ void testNistBennett5(void)
LevenbergMarquardt<Bennett5_functor> lm(functor);
lm.parameters.maxfev = 1000;
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 758, 744);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04);
@@ -1518,9 +1545,10 @@ void testNistBennett5(void)
// do the computation
lm.resetParameters();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 203, 192);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04);
@@ -1587,9 +1615,10 @@ void testNistThurber(void)
lm.parameters.ftol = 1.E4*NumTraits<double>::epsilon();
lm.parameters.xtol = 1.E4*NumTraits<double>::epsilon();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 39,36);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03);
@@ -1611,9 +1640,10 @@ void testNistThurber(void)
lm.parameters.ftol = 1.E4*NumTraits<double>::epsilon();
lm.parameters.xtol = 1.E4*NumTraits<double>::epsilon();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 29, 28);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03);
@@ -1677,9 +1707,10 @@ void testNistRat43(void)
lm.parameters.ftol = 1.E6*NumTraits<double>::epsilon();
lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 27, 20);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03);
@@ -1698,9 +1729,10 @@ void testNistRat43(void)
lm.parameters.ftol = 1.E5*NumTraits<double>::epsilon();
lm.parameters.xtol = 1.E5*NumTraits<double>::epsilon();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 9, 8);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03);
@@ -1760,9 +1792,10 @@ void testNistEckerle4(void)
eckerle4_functor functor;
LevenbergMarquardt<eckerle4_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 18, 15);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03);
@@ -1777,9 +1810,10 @@ void testNistEckerle4(void)
x<< 1.5, 5., 450.;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
LM_CHECK_N_ITERS(lm, 7, 6);
// check norm^2
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03);

View File

@@ -9,10 +9,10 @@
#include <unsupported/Eigen/NumericalDiff>
// Generic functor
template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
struct Functor
{
typedef _Scalar Scalar;
typedef Scalar_ Scalar;
enum {
InputsAtCompileTime = NX,
ValuesAtCompileTime = NY

View File

@@ -7,6 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_NO_STATIC_ASSERT
#include "main.h"
#include <unsupported/Eigen/AlignedVector3>

View File

@@ -29,10 +29,10 @@ EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p)
return (p-Vector(Scalar(-1),Scalar(1.))).norm() + (p.array() * p.array()).sum() + p.dot(p);
}
template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
struct TestFunc1
{
typedef _Scalar Scalar;
typedef Scalar_ Scalar;
enum {
InputsAtCompileTime = NX,
ValuesAtCompileTime = NY
@@ -106,7 +106,6 @@ struct TestFunc1
};
#if EIGEN_HAS_VARIADIC_TEMPLATES
/* Test functor for the C++11 features. */
template <typename Scalar>
struct integratorFunctor
@@ -186,7 +185,6 @@ template<typename Func> void forward_jacobian_cpp11(const Func& f)
VERIFY_IS_APPROX(y, yref);
VERIFY_IS_APPROX(j, jref);
}
#endif
template<typename Func> void forward_jacobian(const Func& f)
{
@@ -247,9 +245,7 @@ void test_autodiff_jacobian()
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,2>()) ));
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,3>()) ));
CALL_SUBTEST(( forward_jacobian(TestFunc1<double>(3,3)) ));
#if EIGEN_HAS_VARIADIC_TEMPLATES
CALL_SUBTEST(( forward_jacobian_cpp11(integratorFunctor<double>(10)) ));
#endif
}

View File

@@ -84,9 +84,7 @@ void check_limits_specialization()
// workaround "unused typedef" warning:
VERIFY(!bool(internal::is_same<B, A>::value));
#if EIGEN_HAS_CXX11
VERIFY(bool(std::is_base_of<B, A>::value));
#endif
}
EIGEN_DECLARE_TEST(autodiff_scalar)

View File

@@ -0,0 +1,31 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
// Copyright (C) 2012 Kolja Brix <brix@igpm.rwth-aaachen.de>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "../../test/sparse_solver.h"
#include <Eigen/IterativeSolvers>
template<typename T> void test_bicgstabl_T()
{
BiCGSTABL<SparseMatrix<T>, DiagonalPreconditioner<T> > bicgstabl_colmajor_diag;
BiCGSTABL<SparseMatrix<T>, IncompleteLUT<T> > bicgstabl_colmajor_ilut;
//This does not change the tolerance of the test, only the tolerance of the solver.
bicgstabl_colmajor_diag.setTolerance(NumTraits<T>::epsilon()*20);
bicgstabl_colmajor_ilut.setTolerance(NumTraits<T>::epsilon()*20);
CALL_SUBTEST( check_sparse_square_solving(bicgstabl_colmajor_diag) );
CALL_SUBTEST( check_sparse_square_solving(bicgstabl_colmajor_ilut) );
}
EIGEN_DECLARE_TEST(bicgstabl)
{
CALL_SUBTEST_1(test_bicgstabl_T<double>());
CALL_SUBTEST_2(test_bicgstabl_T<std::complex<double> >());
}

View File

@@ -14,57 +14,57 @@
using Eigen::Tensor;
using Eigen::array;
using Eigen::Tuple;
using Eigen::Pair;
template <int DataLayout>
static void test_simple_index_tuples()
static void test_simple_index_pairs()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
index_tuples = tensor.index_tuples();
Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
index_pairs = tensor.index_pairs();
for (DenseIndex n = 0; n < 2*3*5*7; ++n) {
const Tuple<DenseIndex, float>& v = index_tuples.coeff(n);
const Pair<DenseIndex, float>& v = index_pairs.coeff(n);
VERIFY_IS_EQUAL(v.first, n);
VERIFY_IS_EQUAL(v.second, tensor.coeff(n));
}
}
template <int DataLayout>
static void test_index_tuples_dim()
static void test_index_pairs_dim()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
index_tuples = tensor.index_tuples();
index_pairs = tensor.index_pairs();
for (Eigen::DenseIndex n = 0; n < tensor.size(); ++n) {
const Tuple<DenseIndex, float>& v = index_tuples(n); //(i, j, k, l);
const Pair<DenseIndex, float>& v = index_pairs(n); //(i, j, k, l);
VERIFY_IS_EQUAL(v.first, n);
VERIFY_IS_EQUAL(v.second, tensor(n));
}
}
template <int DataLayout>
static void test_argmax_tuple_reducer()
static void test_argmax_pair_reducer()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
index_tuples = tensor.index_tuples();
Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
index_pairs = tensor.index_pairs();
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
Tensor<Pair<DenseIndex, float>, 0, DataLayout> reduced;
DimensionList<DenseIndex, 4> dims;
reduced = index_tuples.reduce(
dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
reduced = index_pairs.reduce(
dims, internal::ArgMaxPairReducer<Pair<DenseIndex, float> >());
Tensor<float, 0, DataLayout> maxi = tensor.maximum();
@@ -72,9 +72,9 @@ static void test_argmax_tuple_reducer()
array<DenseIndex, 3> reduce_dims;
for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
reduced_by_dims = index_tuples.reduce(
reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
Tensor<Pair<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
reduced_by_dims = index_pairs.reduce(
reduce_dims, internal::ArgMaxPairReducer<Pair<DenseIndex, float> >());
Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims);
@@ -84,19 +84,19 @@ static void test_argmax_tuple_reducer()
}
template <int DataLayout>
static void test_argmin_tuple_reducer()
static void test_argmin_pair_reducer()
{
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
tensor.setRandom();
tensor = (tensor + tensor.constant(0.5)).log();
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
index_tuples = tensor.index_tuples();
Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
index_pairs = tensor.index_pairs();
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
Tensor<Pair<DenseIndex, float>, 0, DataLayout> reduced;
DimensionList<DenseIndex, 4> dims;
reduced = index_tuples.reduce(
dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
reduced = index_pairs.reduce(
dims, internal::ArgMinPairReducer<Pair<DenseIndex, float> >());
Tensor<float, 0, DataLayout> mini = tensor.minimum();
@@ -104,9 +104,9 @@ static void test_argmin_tuple_reducer()
array<DenseIndex, 3> reduce_dims;
for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
reduced_by_dims = index_tuples.reduce(
reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
Tensor<Pair<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
reduced_by_dims = index_pairs.reduce(
reduce_dims, internal::ArgMinPairReducer<Pair<DenseIndex, float> >());
Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims);
@@ -275,14 +275,14 @@ static void test_argmin_dim()
EIGEN_DECLARE_TEST(cxx11_tensor_argmax)
{
CALL_SUBTEST(test_simple_index_tuples<RowMajor>());
CALL_SUBTEST(test_simple_index_tuples<ColMajor>());
CALL_SUBTEST(test_index_tuples_dim<RowMajor>());
CALL_SUBTEST(test_index_tuples_dim<ColMajor>());
CALL_SUBTEST(test_argmax_tuple_reducer<RowMajor>());
CALL_SUBTEST(test_argmax_tuple_reducer<ColMajor>());
CALL_SUBTEST(test_argmin_tuple_reducer<RowMajor>());
CALL_SUBTEST(test_argmin_tuple_reducer<ColMajor>());
CALL_SUBTEST(test_simple_index_pairs<RowMajor>());
CALL_SUBTEST(test_simple_index_pairs<ColMajor>());
CALL_SUBTEST(test_index_pairs_dim<RowMajor>());
CALL_SUBTEST(test_index_pairs_dim<ColMajor>());
CALL_SUBTEST(test_argmax_pair_reducer<RowMajor>());
CALL_SUBTEST(test_argmax_pair_reducer<ColMajor>());
CALL_SUBTEST(test_argmin_pair_reducer<RowMajor>());
CALL_SUBTEST(test_argmin_pair_reducer<ColMajor>());
CALL_SUBTEST(test_simple_argmax<RowMajor>());
CALL_SUBTEST(test_simple_argmax<ColMajor>());
CALL_SUBTEST(test_simple_argmin<RowMajor>());

View File

@@ -16,7 +16,6 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#define EIGEN_HAS_CONSTEXPR 1
#include "main.h"

View File

@@ -25,10 +25,8 @@ static void test_1d()
vec1(4) = 23; vec2(4) = 4;
vec1(5) = 42; vec2(5) = 5;
int col_major[6];
int row_major[6];
memset(col_major, 0, 6*sizeof(int));
memset(row_major, 0, 6*sizeof(int));
int col_major[6] = {0};
int row_major[6] = {0};
TensorMap<Tensor<int, 1> > vec3(col_major, 6);
TensorMap<Tensor<int, 1, RowMajor> > vec4(row_major, 6);
@@ -88,10 +86,8 @@ static void test_2d()
mat2(1,1) = 4;
mat2(1,2) = 5;
int col_major[6];
int row_major[6];
memset(col_major, 0, 6*sizeof(int));
memset(row_major, 0, 6*sizeof(int));
int col_major[6] = {0};
int row_major[6] = {0};
TensorMap<Tensor<int, 2> > mat3(row_major, 2, 3);
TensorMap<Tensor<int, 2, RowMajor> > mat4(col_major, 2, 3);
@@ -148,10 +144,8 @@ static void test_3d()
}
}
int col_major[2*3*7];
int row_major[2*3*7];
memset(col_major, 0, 2*3*7*sizeof(int));
memset(row_major, 0, 2*3*7*sizeof(int));
int col_major[2*3*7] = {0};
int row_major[2*3*7] = {0};
TensorMap<Tensor<int, 3> > mat3(col_major, 2, 3, 7);
TensorMap<Tensor<int, 3, RowMajor> > mat4(row_major, 2, 3, 7);
@@ -286,7 +280,6 @@ static void test_compound_assign()
}
static void test_std_initializers_tensor() {
#if EIGEN_HAS_VARIADIC_TEMPLATES
Tensor<int, 1> a(3);
a.setValues({0, 1, 2});
VERIFY_IS_EQUAL(a(0), 0);
@@ -355,7 +348,6 @@ static void test_std_initializers_tensor() {
VERIFY_IS_EQUAL(c(2, 1, 1), 25);
VERIFY_IS_EQUAL(c(2, 1, 2), 26);
VERIFY_IS_EQUAL(c(2, 1, 3), 27);
#endif // EIGEN_HAS_VARIADIC_TEMPLATES
}
EIGEN_DECLARE_TEST(cxx11_tensor_assign)

View File

@@ -244,7 +244,7 @@ static void test_eval_tensor_binary_with_unary_expr_block() {
rhs.setRandom();
VerifyBlockEvaluator<T, NumDims, Layout>(
(lhs.square() + rhs.square()).sqrt(),
(lhs.abs() + rhs.abs()).sqrt(),
[&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
}

View File

@@ -91,15 +91,7 @@ static void test_vectorized_broadcasting()
}
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
tensor.resize(11,3,5);
#else
array<Index, 3> new_dims;
new_dims[0] = 11;
new_dims[1] = 3;
new_dims[2] = 5;
tensor.resize(new_dims);
#endif
tensor.setRandom();
broadcast = tensor.broadcast(broadcasts);
@@ -124,15 +116,7 @@ static void test_static_broadcasting()
Tensor<float, 3, DataLayout> tensor(8,3,5);
tensor.setRandom();
#if defined(EIGEN_HAS_INDEX_LIST)
Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> broadcasts;
#else
Eigen::array<int, 3> broadcasts;
broadcasts[0] = 2;
broadcasts[1] = 3;
broadcasts[2] = 4;
#endif
Tensor<float, 3, DataLayout> broadcast;
broadcast = tensor.broadcast(broadcasts);
@@ -148,15 +132,7 @@ static void test_static_broadcasting()
}
}
#if EIGEN_HAS_VARIADIC_TEMPLATES
tensor.resize(11,3,5);
#else
array<Index, 3> new_dims;
new_dims[0] = 11;
new_dims[1] = 3;
new_dims[2] = 5;
tensor.resize(new_dims);
#endif
tensor.setRandom();
broadcast = tensor.broadcast(broadcasts);
@@ -256,6 +232,22 @@ static void test_simple_broadcasting_n_by_one()
}
}
template <int DataLayout>
static void test_size_one_broadcasting()
{
Tensor<float, 1, DataLayout> tensor(1);
tensor.setRandom();
array<ptrdiff_t, 1> broadcasts = {64};
Tensor<float, 1, DataLayout> broadcast;
broadcast = tensor.broadcast(broadcasts);
VERIFY_IS_EQUAL(broadcast.dimension(0), broadcasts[0]);
for (int i = 0; i < broadcasts[0]; ++i) {
VERIFY_IS_EQUAL(tensor(0), broadcast(i));
}
}
template <int DataLayout>
static void test_simple_broadcasting_one_by_n_by_one_1d()
{
@@ -328,4 +320,6 @@ EIGEN_DECLARE_TEST(cxx11_tensor_broadcasting)
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<ColMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_1d<RowMajor>());
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<RowMajor>());
CALL_SUBTEST(test_size_one_broadcasting<ColMajor>());
CALL_SUBTEST(test_size_one_broadcasting<RowMajor>());
}

View File

@@ -38,24 +38,24 @@ template <typename T> T cwiseMin(T x, T y) { return cl::sycl::min(x, y); }
}
}
struct EqualAssignement {
struct EqualAssignment {
template <typename Lhs, typename Rhs>
void operator()(Lhs& lhs, const Rhs& rhs) { lhs = rhs; }
};
struct PlusEqualAssignement {
struct PlusEqualAssignment {
template <typename Lhs, typename Rhs>
void operator()(Lhs& lhs, const Rhs& rhs) { lhs += rhs; }
};
template <typename DataType, int DataLayout,
typename Assignement, typename Operator>
typename Assignment, typename Operator>
void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
const array<int64_t, 3>& tensor_range) {
Operator op;
Assignement asgn;
Assignment asgn;
{
/* Assignement(out, Operator(in)) */
/* Assignment(out, Operator(in)) */
Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
in = in.random() + DataType(0.01);
@@ -84,9 +84,10 @@ void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
sycl_device.deallocate(gpu_data_out);
}
{
/* Assignement(out, Operator(out)) */
/* Assignment(out, Operator(out)) */
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
out = out.random() + DataType(0.01);
// Offset with 1 to avoid tiny output (< 1e-6) as they can easily fail.
out = out.random() + DataType(1);
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
DataType *gpu_data_out = static_cast<DataType *>(
sycl_device.allocate(out.size() * sizeof(DataType)));
@@ -137,11 +138,11 @@ DECLARE_UNARY_STRUCT(isnan)
DECLARE_UNARY_STRUCT(isfinite)
DECLARE_UNARY_STRUCT(isinf)
template <typename DataType, int DataLayout, typename Assignement>
template <typename DataType, int DataLayout, typename Assignment>
void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device,
const array<int64_t, 3>& tensor_range) {
#define RUN_UNARY_TEST(FUNC) \
test_unary_builtins_for_scalar<DataType, DataLayout, Assignement, \
test_unary_builtins_for_scalar<DataType, DataLayout, Assignment, \
op_##FUNC>(sycl_device, tensor_range)
RUN_UNARY_TEST(abs);
RUN_UNARY_TEST(sqrt);
@@ -190,9 +191,9 @@ template <typename DataType, int DataLayout>
void test_unary_builtins(const Eigen::SyclDevice& sycl_device,
const array<int64_t, 3>& tensor_range) {
test_unary_builtins_for_assignement<DataType, DataLayout,
PlusEqualAssignement>(sycl_device, tensor_range);
PlusEqualAssignment>(sycl_device, tensor_range);
test_unary_builtins_for_assignement<DataType, DataLayout,
EqualAssignement>(sycl_device, tensor_range);
EqualAssignment>(sycl_device, tensor_range);
test_unary_builtins_return_bool<DataType, DataLayout,
op_isnan>(sycl_device, tensor_range);
test_unary_builtins_return_bool<DataType, DataLayout,

View File

@@ -149,7 +149,7 @@ struct test_cast_runner {
// Only certain types allow cast from std::complex<>.
template<typename Scalar>
struct test_cast_runner<Scalar, typename internal::enable_if<NumTraits<Scalar>::IsComplex>::type> {
struct test_cast_runner<Scalar, std::enable_if_t<NumTraits<Scalar>::IsComplex>> {
static void run() {
test_type_cast<Scalar, half>();
test_type_cast<Scalar, bfloat16>();

View File

@@ -25,10 +25,6 @@ typedef Tensor<float, 1>::DimensionPair DimPair;
template<int DataLayout>
void test_gpu_contraction(int m_size, int k_size, int n_size)
{
std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
// with these dimensions, the output has 300 * 140 elements, which is
// more than 30 * 1024, which is the number of threads in blocks on
// a 15 SM GK110 GPU
Tensor<float, 2, DataLayout> t_left(m_size, k_size);
Tensor<float, 2, DataLayout> t_right(k_size, n_size);
Tensor<float, 2, DataLayout> t_result(m_size, n_size);
@@ -171,25 +167,45 @@ void test_gpu_contraction_n() {
template<int DataLayout>
void test_gpu_contraction_sizes() {
int m_sizes[] = { 31, 39, 63, 64, 65,
127, 129, 255, 257 , 511,
512, 513, 1023, 1024, 1025};
int m_sizes[3][5] = {{ 31, 39, 63, 64, 65},
{127, 129, 255, 257 , 511},
{512, 513, 1023, 1024, 1025}};
int n_sizes[] = { 31, 39, 63, 64, 65,
127, 129, 255, 257, 511,
512, 513, 1023, 1024, 1025};
int n_sizes[3][5] = {{ 31, 39, 63, 64, 65},
{127, 129, 255, 257, 511},
{512, 513, 1023, 1024, 1025}};
int k_sizes[] = { 31, 39, 63, 64, 65,
95, 96, 127, 129, 255,
257, 511, 512, 513, 1023,
1024, 1025};
int k_sizes[3][6] = {{ 31, 39, 63, 64, 65, 95},
{ 96, 127, 129, 255, 257, 511},
{512, 513, 725, 1023, 1024, 1025}};
for (int i = 0; i < 15; i++) {
for (int j = 0; j < 15; j++) {
for (int k = 0; k < 17; k++) {
test_gpu_contraction<DataLayout>(m_sizes[i], n_sizes[j], k_sizes[k]);
// Some selection of specific cases.
// - m changes rows each iteration
// - n changes rows each 3 iterations
// - k changes rows each 9 iterations
// - within a row, advance once column each iteration
const int m_cols = 5;
const int n_cols = 5;
const int k_cols = 6;
int m_offset = 0;
int n_offset = 1;
int k_offset = 2;
for (int i = 0; i < 3; ++i) {
for (int j = 0; j < 3; ++j) {
for (int l = 0; l < 3; ++l) {
int m = m_sizes[l][m_offset];
int n = n_sizes[j][n_offset];
int k = k_sizes[i][k_offset];
test_gpu_contraction<DataLayout>(m, n, k);
n_offset = (n_offset + 1) % n_cols;
k_offset = (k_offset + 1) % k_cols;
}
m_offset = (m_offset + 1) % m_cols;
if (j < 2) {
n_offset = (n_offset + n_cols - 3) % n_cols; // Rewind 3.
}
}
k_offset = (k_offset + 2 * k_cols - 9) % k_cols; // Rewind 9.
}
}

View File

@@ -20,7 +20,6 @@ using Eigen::Tensor;
template <int DataLayout>
static void test_map_as_index()
{
#ifdef EIGEN_HAS_SFINAE
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
tensor.setRandom();
@@ -35,14 +34,12 @@ static void test_map_as_index()
VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
#endif
}
template <int DataLayout>
static void test_matrix_as_index()
{
#ifdef EIGEN_HAS_SFINAE
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
tensor.setRandom();
@@ -53,14 +50,12 @@ static void test_matrix_as_index()
VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
#endif
}
template <int DataLayout>
static void test_varlist_as_index()
{
#ifdef EIGEN_HAS_SFINAE
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
tensor.setRandom();
@@ -68,14 +63,12 @@ static void test_varlist_as_index()
VERIFY_IS_EQUAL(tensor.coeff({1,2,4,1}), tensor.coeff(coeff));
VERIFY_IS_EQUAL(tensor.coeffRef({1,2,4,1}), tensor.coeffRef(coeff));
#endif
}
template <int DataLayout>
static void test_sizes_as_index()
{
#ifdef EIGEN_HAS_SFINAE
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
tensor.setRandom();
@@ -84,7 +77,6 @@ static void test_sizes_as_index()
VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
#endif
}

View File

@@ -14,6 +14,7 @@
#define EIGEN_USE_GPU
#include "main.h"
#include "OffByOneScalar.h"
#include <unsupported/Eigen/CXX11/Tensor>
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
@@ -175,6 +176,44 @@ void test_3d_convolution(Context* context)
context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims);
}
// Helper method to synchronize device.
template<typename Device>
void synchronize(Device& device) { /*nothing*/ }
template<>
void synchronize(Eigen::GpuDevice& device) {
device.synchronize();
}
template <typename DataType, typename TensorDevice>
void test_device_memory(const TensorDevice& device) {
int count = 100;
Eigen::array<int, 1> tensorRange = {{count}};
Eigen::Tensor<DataType, 1> host(tensorRange);
Eigen::Tensor<DataType, 1> expected(tensorRange);
DataType* device_data = static_cast<DataType*>(device.allocate(count * sizeof(DataType)));
// memset
const char byte_value = static_cast<char>(0xAB);
device.memset(device_data, byte_value, count * sizeof(DataType));
device.memcpyDeviceToHost(host.data(), device_data, count * sizeof(DataType));
synchronize(device);
memset(expected.data(), byte_value, count * sizeof(DataType));
for (size_t i=0; i<count; i++) {
VERIFY_IS_EQUAL(host(i), expected(i));
}
// fill
DataType fill_value = DataType(7);
std::fill_n(expected.data(), count, fill_value);
device.fill(device_data, device_data + count, fill_value);
device.memcpyDeviceToHost(host.data(), device_data, count * sizeof(DataType));
synchronize(device);
for (int i=0; i<count; i++) {
VERIFY_IS_EQUAL(host(i), expected(i));
}
device.deallocate(device_data);
}
void test_cpu() {
Eigen::Tensor<float, 3> in1(40,50,70);
@@ -266,6 +305,9 @@ void test_cpu() {
}
}
}
test_device_memory<float>(context.device());
test_device_memory<OffByOneScalar<int>>(context.device());
}
void test_gpu() {
@@ -386,6 +428,8 @@ void test_gpu() {
#endif
test_device_memory<float>(context.device());
test_device_memory<OffByOneScalar<int>>(context.device());
}

View File

@@ -18,26 +18,36 @@
#define EIGEN_USE_SYCL
#include "main.h"
#include "OffByOneScalar.h"
#include <unsupported/Eigen/CXX11/Tensor>
#include <stdint.h>
#include <iostream>
template <typename DataType, int DataLayout, typename IndexType>
void test_device_memory(const Eigen::SyclDevice &sycl_device) {
std::cout << "Running on : "
<< sycl_device.sycl_queue().get_device(). template get_info<cl::sycl::info::device::name>()
<<std::endl;
IndexType sizeDim1 = 100;
array<IndexType, 1> tensorRange = {{sizeDim1}};
Tensor<DataType, 1, DataLayout,IndexType> in(tensorRange);
Tensor<DataType, 1, DataLayout,IndexType> in1(tensorRange);
memset(in1.data(), 1, in1.size() * sizeof(DataType));
DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType)));
// memset
memset(in1.data(), 1, in1.size() * sizeof(DataType));
sycl_device.memset(gpu_in_data, 1, in.size()*sizeof(DataType));
sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
for (IndexType i=0; i<in.size(); i++) {
VERIFY_IS_EQUAL(in(i), in1(i));
}
// fill
DataType value = DataType(7);
std::fill_n(in1.data(), in1.size(), value);
sycl_device.fill(gpu_in_data, gpu_in_data + in.size(), value);
sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
for (IndexType i=0; i<in.size(); i++) {
VERIFY_IS_EQUAL(in(i), in1(i));
}
sycl_device.deallocate(gpu_in_data);
}
@@ -58,6 +68,31 @@ void test_device_exceptions(const Eigen::SyclDevice &sycl_device) {
sycl_device.deallocate(gpu_data);
}
template<typename DataType, int DataLayout, typename IndexType>
void test_device_attach_buffer(const Eigen::SyclDevice &sycl_device) {
IndexType sizeDim1 = 100;
array<IndexType, 1> tensorRange = {{sizeDim1}};
Tensor<DataType, 1, DataLayout, IndexType> in(tensorRange);
cl::sycl::buffer<buffer_scalar_t, 1> buffer(cl::sycl::range<1>(sizeDim1 * sizeof(DataType)));
DataType* gpu_in_data = static_cast<DataType*>(sycl_device.attach_buffer(buffer));
// fill
DataType value = DataType(7);
std::fill_n(in.data(), in.size(), value);
sycl_device.fill(gpu_in_data, gpu_in_data + in.size(), value);
// Check that buffer is filled with the correct value.
auto reint = buffer.reinterpret<DataType>(cl::sycl::range<1>(sizeDim1));
auto access = reint.template get_access<cl::sycl::access::mode::read>();
for (IndexType i=0; i<in.size(); i++) {
VERIFY_IS_EQUAL(in(i), access[i]);
}
sycl_device.detach_buffer(gpu_in_data);
}
template<typename DataType> void sycl_device_test_per_device(const cl::sycl::device& d){
std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
QueueInterface queueInterface(d);
@@ -68,10 +103,12 @@ template<typename DataType> void sycl_device_test_per_device(const cl::sycl::dev
//test_device_exceptions<DataType, RowMajor>(sycl_device);
/// this test throw an exception. enable it if you want to see the exception
//test_device_exceptions<DataType, ColMajor>(sycl_device);
test_device_attach_buffer<DataType, ColMajor, int64_t>(sycl_device);
}
EIGEN_DECLARE_TEST(cxx11_tensor_device_sycl) {
for (const auto& device :Eigen::get_sycl_supported_devices()) {
CALL_SUBTEST(sycl_device_test_per_device<float>(device));
CALL_SUBTEST(sycl_device_test_per_device<OffByOneScalar<int>>(device));
}
}

View File

@@ -612,43 +612,42 @@ static void test_async_execute_binary_expr(Device d)
}
}
#ifdef EIGEN_DONT_VECTORIZE
#define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL
#else
#define VECTORIZABLE(VAL) VAL
#ifndef EIGEN_DONT_VECTORIZE
#define EIGEN_DONT_VECTORIZE 0
#endif
#define VECTORIZABLE(T, VAL) !EIGEN_DONT_VECTORIZE && Eigen::internal::packet_traits<T>::Vectorizable && VAL
#define CALL_SUBTEST_PART(PART) \
CALL_SUBTEST_##PART
#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(default_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(tp_device)))
// NOTE: Currently only ThreadPoolDevice supports async expression evaluation.
#define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(tp_device))); \
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(tp_device)))
EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
Eigen::DefaultDevice default_device;

View File

@@ -130,7 +130,7 @@ static void test_3d()
Tensor<float, 3, RowMajor> mat4(2,3,7);
mat4 = mat2 * 3.14f;
Tensor<float, 3> mat5(2,3,7);
mat5 = mat1.inverse().log();
mat5 = (mat1 + mat1.constant(1)).inverse().log();
Tensor<float, 3, RowMajor> mat6(2,3,7);
mat6 = mat2.pow(0.5f) * 3.14f;
Tensor<float, 3> mat7(2,3,7);
@@ -150,7 +150,7 @@ static void test_3d()
for (int k = 0; k < 7; ++k) {
VERIFY_IS_APPROX(mat3(i,j,k), val + val);
VERIFY_IS_APPROX(mat4(i,j,k), val * 3.14f);
VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/val));
VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/(val + 1)));
VERIFY_IS_APPROX(mat6(i,j,k), sqrtf(val) * 3.14f);
VERIFY_IS_APPROX(mat7(i,j,k), expf((std::max)(val, mat5(i,j,k) * 2.0f)));
VERIFY_IS_APPROX(mat8(i,j,k), expf(-val) * 3.14f);
@@ -305,10 +305,10 @@ void test_minmax_nan_propagation_templ() {
const Scalar kNaN = std::numeric_limits<Scalar>::quiet_NaN();
const Scalar kInf = std::numeric_limits<Scalar>::infinity();
const Scalar kZero(0);
Tensor<Scalar, 1> vec_all_nan(size);
Tensor<Scalar, 1> vec_full_nan(size);
Tensor<Scalar, 1> vec_one_nan(size);
Tensor<Scalar, 1> vec_zero(size);
vec_all_nan.setConstant(kNaN);
vec_full_nan.setConstant(kNaN);
vec_zero.setZero();
vec_one_nan.setZero();
vec_one_nan(size/2) = kNaN;
@@ -330,12 +330,12 @@ void test_minmax_nan_propagation_templ() {
// max(nan, 0) = nan
// max(0, nan) = nan
// max(0, 0) = 0
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(kNaN));
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(vec_all_nan));
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(kZero));
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(vec_zero));
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(kNaN));
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(vec_full_nan));
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(kZero));
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(vec_zero));
verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(kNaN));
verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(vec_all_nan));
verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(vec_full_nan));
verify_all_zero(vec_zero.template cwiseMax<PropagateNaN>(kZero));
verify_all_zero(vec_zero.template cwiseMax<PropagateNaN>(vec_zero));
@@ -344,12 +344,12 @@ void test_minmax_nan_propagation_templ() {
// max(nan, 0) = 0
// max(0, nan) = 0
// max(0, 0) = 0
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNumbers>(kNaN));
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNumbers>(vec_all_nan));
verify_all_zero(vec_all_nan.template cwiseMax<PropagateNumbers>(kZero));
verify_all_zero(vec_all_nan.template cwiseMax<PropagateNumbers>(vec_zero));
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNumbers>(kNaN));
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNumbers>(vec_full_nan));
verify_all_zero(vec_full_nan.template cwiseMax<PropagateNumbers>(kZero));
verify_all_zero(vec_full_nan.template cwiseMax<PropagateNumbers>(vec_zero));
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(kNaN));
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_all_nan));
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_full_nan));
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(kZero));
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_zero));
@@ -358,12 +358,12 @@ void test_minmax_nan_propagation_templ() {
// min(nan, 0) = nan
// min(0, nan) = nan
// min(0, 0) = 0
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(kNaN));
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(vec_all_nan));
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(kZero));
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(vec_zero));
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(kNaN));
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(vec_full_nan));
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(kZero));
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(vec_zero));
verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(kNaN));
verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(vec_all_nan));
verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(vec_full_nan));
verify_all_zero(vec_zero.template cwiseMin<PropagateNaN>(kZero));
verify_all_zero(vec_zero.template cwiseMin<PropagateNaN>(vec_zero));
@@ -372,12 +372,12 @@ void test_minmax_nan_propagation_templ() {
// min(nan, 0) = 0
// min(0, nan) = 0
// min(0, 0) = 0
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNumbers>(kNaN));
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNumbers>(vec_all_nan));
verify_all_zero(vec_all_nan.template cwiseMin<PropagateNumbers>(kZero));
verify_all_zero(vec_all_nan.template cwiseMin<PropagateNumbers>(vec_zero));
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNumbers>(kNaN));
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNumbers>(vec_full_nan));
verify_all_zero(vec_full_nan.template cwiseMin<PropagateNumbers>(kZero));
verify_all_zero(vec_full_nan.template cwiseMin<PropagateNumbers>(vec_zero));
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(kNaN));
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_all_nan));
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_full_nan));
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(kZero));
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_zero));
@@ -397,13 +397,13 @@ void test_minmax_nan_propagation_templ() {
VERIFY_IS_EQUAL(val(), kZero);
// Test NaN propagation for tensor of all NaNs.
val = vec_all_nan.template minimum<PropagateNaN>();
val = vec_full_nan.template minimum<PropagateNaN>();
VERIFY((numext::isnan)(val()));
val = vec_all_nan.template minimum<PropagateNumbers>();
val = vec_full_nan.template minimum<PropagateNumbers>();
VERIFY_IS_EQUAL(val(), kInf);
val = vec_all_nan.template maximum<PropagateNaN>();
val = vec_full_nan.template maximum<PropagateNaN>();
VERIFY((numext::isnan)(val()));
val = vec_all_nan.template maximum<PropagateNumbers>();
val = vec_full_nan.template maximum<PropagateNumbers>();
VERIFY_IS_EQUAL(val(), -kInf);
// Test NaN propagation for tensor with a single NaN.

View File

@@ -186,7 +186,7 @@ static void test_fft_real_input_energy() {
}
const DSizes<ptrdiff_t, TensorRank> arr = dimensions;
typedef typename internal::conditional<isComplexInput == true, std::complex<RealScalar>, RealScalar>::type InputScalar;
typedef std::conditional_t<isComplexInput == true, std::complex<RealScalar>, RealScalar> InputScalar;
Tensor<InputScalar, TensorRank, DataLayout> input;
input.resize(arr);
@@ -197,7 +197,7 @@ static void test_fft_real_input_energy() {
fft[i] = i;
}
typedef typename internal::conditional<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar>::type OutputScalar;
typedef std::conditional_t<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar> OutputScalar;
Tensor<OutputScalar, TensorRank, DataLayout> output;
output = input.template fft<FFTResultType, FFTDirection>(fft);

View File

@@ -17,8 +17,6 @@
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
#define EIGEN_GPU_TEST_C99_MATH EIGEN_HAS_CXX11
using Eigen::Tensor;
void test_gpu_nullary() {
@@ -66,6 +64,47 @@ void test_gpu_nullary() {
gpuFree(d_in2);
}
// Tests that there are no indexing overflows when computing tensors with the
// max representable size.
template <typename IndexType,
IndexType N = (std::numeric_limits<IndexType>::max)()>
void test_gpu_nullary_max_size()
{
typedef int8_t DataType;
typedef Tensor<DataType, 1, 0, IndexType> TensorType;
typedef Eigen::array<IndexType, 1> ArrayType;
const IndexType n = N;
TensorType in1((ArrayType(n)));
in1.setZero();
std::size_t in1_bytes = in1.size() * sizeof(DataType);
DataType* d_in1;
gpuMalloc((void**)(&d_in1), in1_bytes);
gpuMemcpy(d_in1, in1.data(), in1_bytes, gpuMemcpyHostToDevice);
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
Eigen::TensorMap<TensorType> gpu_in1(d_in1, ArrayType(n));
gpu_in1.device(gpu_device) = gpu_in1.constant(123);
TensorType new1((ArrayType(n)));
assert(gpuMemcpyAsync(new1.data(), d_in1, in1_bytes, gpuMemcpyDeviceToHost,
gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
for (IndexType i = 0; i < n; ++i) {
VERIFY_IS_EQUAL(new1(ArrayType(i)), 123);
}
gpuFree(d_in1);
}
void test_gpu_elementwise_small() {
Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>(2));
Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>(2));
@@ -619,7 +658,6 @@ void test_gpu_convolution_3d()
}
#if EIGEN_GPU_TEST_C99_MATH
template <typename Scalar>
void test_gpu_lgamma(const Scalar stddev)
{
@@ -658,7 +696,6 @@ void test_gpu_lgamma(const Scalar stddev)
gpuFree(d_in);
gpuFree(d_out);
}
#endif
template <typename Scalar>
void test_gpu_digamma()
@@ -681,8 +718,8 @@ void test_gpu_digamma()
expected_out(2) = Scalar(1.2561176684318);
expected_out(3) = Scalar(2.398239129535781);
expected_out(4) = Scalar(9.210340372392849);
expected_out(5) = std::numeric_limits<Scalar>::infinity();
expected_out(6) = std::numeric_limits<Scalar>::infinity();
expected_out(5) = std::numeric_limits<Scalar>::quiet_NaN();
expected_out(6) = std::numeric_limits<Scalar>::quiet_NaN();
std::size_t bytes = in.size() * sizeof(Scalar);
@@ -704,11 +741,8 @@ void test_gpu_digamma()
assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
for (int i = 0; i < 5; ++i) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
for (int i = 5; i < 7; ++i) {
VERIFY_IS_EQUAL(out(i), expected_out(i));
for (int i = 0; i < 7; ++i) {
VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
}
gpuFree(d_in);
@@ -741,7 +775,7 @@ void test_gpu_zeta()
expected_out(0) = std::numeric_limits<Scalar>::infinity();
expected_out(1) = Scalar(1.61237534869);
expected_out(2) = Scalar(0.234848505667);
expected_out(3) = Scalar(1.03086757337e-5);
expected_out(3) = std::numeric_limits<Scalar>::quiet_NaN();
expected_out(4) = Scalar(0.367879440865);
expected_out(5) = Scalar(0.054102025820864097);
@@ -769,13 +803,8 @@ void test_gpu_zeta()
assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
VERIFY_IS_EQUAL(out(0), expected_out(0));
VERIFY((std::isnan)(out(3)));
for (int i = 1; i < 6; ++i) {
if (i != 3) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
for (int i = 0; i < 6; ++i) {
VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
}
gpuFree(d_in_x);
@@ -990,7 +1019,6 @@ void test_gpu_igammac()
gpuFree(d_out);
}
#if EIGEN_GPU_TEST_C99_MATH
template <typename Scalar>
void test_gpu_erf(const Scalar stddev)
{
@@ -1068,7 +1096,7 @@ void test_gpu_erfc(const Scalar stddev)
gpuFree(d_in);
gpuFree(d_out);
}
#endif
template <typename Scalar>
void test_gpu_ndtri()
{
@@ -1117,13 +1145,8 @@ void test_gpu_ndtri()
assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
VERIFY_IS_EQUAL(out(0), expected_out(0));
VERIFY((std::isnan)(out(3)));
for (int i = 1; i < 6; ++i) {
if (i != 3) {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
for (int i = 0; i < 6; ++i) {
VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
}
gpuFree(d_in_x);
@@ -1262,12 +1285,8 @@ void test_gpu_betainc()
assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
for (int i = 1; i < 125; ++i) {
if ((std::isnan)(expected_out(i))) {
VERIFY((std::isnan)(out(i)));
} else {
VERIFY_IS_APPROX(out(i), expected_out(i));
}
for (int i = 0; i < 125; ++i) {
VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
}
gpuFree(d_in_x);
@@ -1541,6 +1560,10 @@ void test_gpu_gamma_sample_der_alpha()
EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
{
CALL_SUBTEST_1(test_gpu_nullary());
CALL_SUBTEST_1(test_gpu_nullary_max_size<int16_t>());
CALL_SUBTEST_1(test_gpu_nullary_max_size<int32_t>());
CALL_SUBTEST_1((test_gpu_nullary_max_size<
int64_t, (std::numeric_limits<int32_t>::max)() + 100ll>()));
CALL_SUBTEST_1(test_gpu_elementwise_small());
CALL_SUBTEST_1(test_gpu_elementwise());
CALL_SUBTEST_1(test_gpu_props());
@@ -1560,7 +1583,6 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
CALL_SUBTEST_3(test_gpu_convolution_3d<RowMajor>());
#endif
#if EIGEN_GPU_TEST_C99_MATH
// std::erf, std::erfc, and so on where only added in c++11. We use them
// as a golden reference to validate the results produced by Eigen. Therefore
// we can only run these tests if we use a c++11 compiler.
@@ -1638,6 +1660,4 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
CALL_SUBTEST_6(test_gpu_gamma_sample_der_alpha<float>());
CALL_SUBTEST_6(test_gpu_gamma_sample_der_alpha<double>());
#endif
#endif
}

View File

@@ -11,8 +11,6 @@
#include <Eigen/CXX11/Tensor>
#ifdef EIGEN_HAS_INDEX_LIST
static void test_static_index_list()
{
Tensor<float, 4> tensor(2,3,5,7);
@@ -26,6 +24,8 @@ static void test_static_index_list()
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[1]), 1);
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[2]), 2);
VERIFY_IS_EQUAL(reduction_axis.size(), std::size_t(3));
EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_axis) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::array_get<1>(reduction_axis) == 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_axis) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -370,16 +370,12 @@ static void test_dim_check()
}
#endif
EIGEN_DECLARE_TEST(cxx11_tensor_index_list)
{
#ifdef EIGEN_HAS_INDEX_LIST
CALL_SUBTEST(test_static_index_list());
CALL_SUBTEST(test_type2index_list());
CALL_SUBTEST(test_type2indexpair_list());
CALL_SUBTEST(test_dynamic_index_list());
CALL_SUBTEST(test_mixed_index_list());
CALL_SUBTEST(test_dim_check());
#endif
}

View File

@@ -6,131 +6,137 @@
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <sstream>
#include <string>
#include <Eigen/CXX11/Tensor>
template <typename Scalar, int rank, int Layout>
struct test_tensor_ostream_impl {};
template<int DataLayout>
static void test_output_0d()
{
Tensor<int, 0, DataLayout> tensor;
tensor() = 123;
std::stringstream os;
os << tensor;
std::string expected("123");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
}
template<int DataLayout>
static void test_output_1d()
{
Tensor<int, 1, DataLayout> tensor(5);
for (int i = 0; i < 5; ++i) {
tensor(i) = i;
template<typename Scalar, int Layout>
struct test_tensor_ostream_impl<Scalar, 0, Layout> {
static void run() {
Eigen::Tensor<Scalar, 0> t;
t.setValues(1);
std::ostringstream os;
os << t.format(Eigen::TensorIOFormat::Plain());
VERIFY(os.str() == "1");
}
};
std::stringstream os;
os << tensor;
std::string expected("0\n1\n2\n3\n4");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
Eigen::Tensor<double,1,DataLayout> empty_tensor(0);
std::stringstream empty_os;
empty_os << empty_tensor;
std::string empty_string;
VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string);
}
template<int DataLayout>
static void test_output_2d()
{
Tensor<int, 2, DataLayout> tensor(5, 3);
for (int i = 0; i < 5; ++i) {
for (int j = 0; j < 3; ++j) {
tensor(i, j) = i*j;
}
template<typename Scalar, int Layout>
struct test_tensor_ostream_impl<Scalar, 1, Layout> {
static void run() {
Eigen::Tensor<Scalar, 1> t = {3};
t.setValues({1, 2, 3});
std::ostringstream os;
os << t.format(Eigen::TensorIOFormat::Plain());
VERIFY(os.str() == "1 2 3");
}
};
std::stringstream os;
os << tensor;
std::string expected("0 0 0\n0 1 2\n0 2 4\n0 3 6\n0 4 8");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
}
template<int DataLayout>
static void test_output_expr()
{
Tensor<int, 1, DataLayout> tensor1(5);
Tensor<int, 1, DataLayout> tensor2(5);
for (int i = 0; i < 5; ++i) {
tensor1(i) = i;
tensor2(i) = 7;
template<typename Scalar, int Layout>
struct test_tensor_ostream_impl<Scalar, 2, Layout> {
static void run() {
Eigen::Tensor<Scalar, 2> t = {3, 2};
t.setValues({{1, 2}, {3, 4}, {5, 6}});
std::ostringstream os;
os << t.format(Eigen::TensorIOFormat::Plain());
VERIFY(os.str() == "1 2\n3 4\n5 6");
}
};
std::stringstream os;
os << tensor1 + tensor2;
std::string expected(" 7\n 8\n 9\n10\n11");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
}
template<int DataLayout>
static void test_output_string()
{
Tensor<std::string, 2, DataLayout> tensor(5, 3);
tensor.setConstant(std::string("foo"));
std::cout << tensor << std::endl;
std::stringstream os;
os << tensor;
std::string expected("foo foo foo\nfoo foo foo\nfoo foo foo\nfoo foo foo\nfoo foo foo");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
}
template<int DataLayout>
static void test_output_const()
{
Tensor<int, 1, DataLayout> tensor(5);
for (int i = 0; i < 5; ++i) {
tensor(i) = i;
template<typename Scalar, int Layout>
struct test_tensor_ostream_impl<Scalar, 3, Layout> {
static void run() {
Eigen::Tensor<Scalar, 3> t = {4, 3, 2};
t.setValues({{{1, 2}, {3, 4}, {5, 6}},
{{7, 8}, {9, 10}, {11, 12}},
{{13, 14}, {15, 16}, {17, 18}},
{{19, 20}, {21, 22}, {23, 24}}});
std::ostringstream os;
os << t.format(Eigen::TensorIOFormat::Plain());
VERIFY(os.str() == " 1 2\n 3 4\n 5 6\n\n 7 8\n 9 10\n11 12\n\n13 14\n15 16\n17 18\n\n19 20\n21 22\n23 24");
}
};
TensorMap<Tensor<const int, 1, DataLayout> > tensor_map(tensor.data(), 5);
template<int Layout>
struct test_tensor_ostream_impl<bool, 2, Layout> {
static void run() {
Eigen::Tensor<bool, 2> t = {3, 2};
t.setValues({{false, true}, {true, false}, {false, false}});
std::ostringstream os;
os << t.format(Eigen::TensorIOFormat::Plain());
VERIFY(os.str() == "0 1\n1 0\n0 0");
}
};
std::stringstream os;
os << tensor_map;
template<typename Scalar, int Layout>
struct test_tensor_ostream_impl<std::complex<Scalar>, 2, Layout> {
static void run() {
Eigen::Tensor<std::complex<Scalar>, 2> t = {3, 2};
t.setValues({{std::complex<Scalar>(1, 2), std::complex<Scalar>(12, 3)},
{std::complex<Scalar>(-4, 2), std::complex<Scalar>(0, 5)},
{std::complex<Scalar>(-1, 4), std::complex<Scalar>(5, 27)}});
std::ostringstream os;
os << t.format(Eigen::TensorIOFormat::Plain());
VERIFY(os.str() == " (1,2) (12,3)\n(-4,2) (0,5)\n(-1,4) (5,27)");
}
};
std::string expected("0\n1\n2\n3\n4");
VERIFY_IS_EQUAL(std::string(os.str()), expected);
template <typename Scalar, int rank, int Layout>
void test_tensor_ostream() {
test_tensor_ostream_impl<Scalar, rank, Layout>::run();
}
EIGEN_DECLARE_TEST(cxx11_tensor_io)
{
CALL_SUBTEST(test_output_0d<ColMajor>());
CALL_SUBTEST(test_output_0d<RowMajor>());
CALL_SUBTEST(test_output_1d<ColMajor>());
CALL_SUBTEST(test_output_1d<RowMajor>());
CALL_SUBTEST(test_output_2d<ColMajor>());
CALL_SUBTEST(test_output_2d<RowMajor>());
CALL_SUBTEST(test_output_expr<ColMajor>());
CALL_SUBTEST(test_output_expr<RowMajor>());
CALL_SUBTEST(test_output_string<ColMajor>());
CALL_SUBTEST(test_output_string<RowMajor>());
CALL_SUBTEST(test_output_const<ColMajor>());
CALL_SUBTEST(test_output_const<RowMajor>());
void test_const_tensor_ostream() {
Eigen::Tensor<float, 0> t;
t.setValues(1);
const Eigen::TensorMap<Eigen::Tensor<const float, 0, Eigen::RowMajor>, Eigen::Unaligned> t_const(
t.data(), Eigen::DSizes<Eigen::DenseIndex, 0>{});
std::ostringstream os;
os << t_const.format(Eigen::TensorIOFormat::Plain());
VERIFY(os.str() == "1");
}
EIGEN_DECLARE_TEST(cxx11_tensor_io) {
CALL_SUBTEST((test_tensor_ostream<float, 0, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<float, 1, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<float, 2, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<float, 3, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<double, 0, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<double, 1, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<double, 2, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<double, 3, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<int, 0, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<int, 1, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<int, 2, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<int, 3, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<float, 0, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<float, 1, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<float, 2, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<float, 3, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<double, 0, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<double, 1, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<double, 2, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<double, 3, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<int, 0, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<int, 1, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<int, 2, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<int, 3, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<bool, 2, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<bool, 2, Eigen::RowMajor>()));
CALL_SUBTEST((test_tensor_ostream<std::complex<double>, 2, Eigen::ColMajor>()));
CALL_SUBTEST((test_tensor_ostream<std::complex<float>, 2, Eigen::ColMajor>()));
// Test printing TensorMap with const elements.
CALL_SUBTEST((test_const_tensor_ostream()));
}

View File

@@ -43,7 +43,6 @@ static void test_simple_reshape()
template <typename>
static void test_static_reshape() {
#if defined(EIGEN_HAS_INDEX_LIST)
using Eigen::type2index;
Tensor<float, 5> tensor(2, 3, 1, 7, 1);
@@ -60,7 +59,6 @@ static void test_static_reshape() {
}
}
}
#endif
}
template <typename>

View File

@@ -0,0 +1,487 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2021 Rohit Santhanam <rohit.santhanam@amd.com>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#define EIGEN_TEST_NO_LONGDOUBLE
#define EIGEN_TEST_NO_COMPLEX
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
#define EIGEN_USE_GPU
#include "main.h"
#include <unsupported/Eigen/CXX11/Tensor>
using Eigen::Tensor;
template<typename>
void test_gpu_numext() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
bool* d_res_bfloat16 = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_bfloat16(
d_res_bfloat16, num_elem);
Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, num_elem);
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>());
// Test bfloat16 specific isnan op.
gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().unaryExpr(Eigen::internal::scalar_isnan_op<Eigen::bfloat16>());
Tensor<bool, 1> bfloat16_prec(num_elem);
Tensor<bool, 1> full_prec(num_elem);
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(bool));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
VERIFY_IS_EQUAL(full_prec(i), bfloat16_prec(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_bfloat16);
gpu_device.deallocate(d_res_float);
}
#ifdef EIGEN_HAS_GPU_BF16
template<typename>
void test_gpu_conversion() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::bfloat16* d_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_bfloat16(
d_bfloat16, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
d_conv, num_elem);
gpu_float.device(gpu_device) = gpu_float.random();
gpu_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>();
gpu_conv.device(gpu_device) = gpu_bfloat16.cast<float>();
Tensor<float, 1> initial(num_elem);
Tensor<float, 1> final(num_elem);
gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
for (int i = 0; i < num_elem; ++i) {
VERIFY_IS_APPROX(static_cast<Eigen::bfloat16>(initial(i)), static_cast<Eigen::bfloat16>(final(i)));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_bfloat16);
gpu_device.deallocate(d_conv);
}
template<typename>
void test_gpu_unary() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_bfloat16 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16(
d_res_bfloat16, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, num_elem);
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
gpu_float.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().cast<float>();
gpu_res_float.device(gpu_device) = gpu_float.abs();
gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().cast<float>();
Tensor<float, 1> bfloat16_prec(num_elem);
Tensor<float, 1> full_prec(num_elem);
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
VERIFY_IS_APPROX(full_prec(i), bfloat16_prec(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_bfloat16);
gpu_device.deallocate(d_res_float);
}
template<typename>
void test_gpu_elementwise() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_bfloat16 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
d_float1, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
d_float2, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16(
d_res_bfloat16, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, num_elem);
gpu_float1.device(gpu_device) = gpu_float1.random();
gpu_float1.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().cast<float>();
gpu_float2.device(gpu_device) = gpu_float2.random();
gpu_float2.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>().cast<float>();
gpu_res_float.device(gpu_device) = (gpu_float1 + gpu_float2) * gpu_float1;
gpu_res_bfloat16.device(gpu_device) = ((gpu_float1.cast<Eigen::bfloat16>() + gpu_float2.cast<Eigen::bfloat16>()) * gpu_float1.cast<Eigen::bfloat16>()).cast<float>();
Tensor<float, 1> bfloat16_prec(num_elem);
Tensor<float, 1> full_prec(num_elem);
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
VERIFY_IS_APPROX(static_cast<Eigen::bfloat16>(full_prec(i)), static_cast<Eigen::bfloat16>(bfloat16_prec(i)));
}
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
gpu_device.deallocate(d_res_bfloat16);
gpu_device.deallocate(d_res_float);
}
template<typename>
void test_gpu_trancendental() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float3 = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::bfloat16* d_res1_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
Eigen::bfloat16* d_res1_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
Eigen::bfloat16* d_res2_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
Eigen::bfloat16* d_res2_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
Eigen::bfloat16* d_res3_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
Eigen::bfloat16* d_res3_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(d_float1, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(d_float2, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float3(d_float3, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res1_bfloat16(d_res1_bfloat16, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res1_float(d_res1_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res2_bfloat16(d_res2_bfloat16, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res3_bfloat16(d_res3_bfloat16, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res4_bfloat16(d_res3_bfloat16, num_elem);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res4_float(d_res3_float, num_elem);
gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
gpu_float1.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().cast<float>();
gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
gpu_float2.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>().cast<float>();
gpu_float3.device(gpu_device) = gpu_float3.random();
gpu_float3.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>().cast<float>();
gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::bfloat16>();
gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::bfloat16>();
gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::bfloat16>();
gpu_res4_float.device(gpu_device) = gpu_float3.expm1().cast<Eigen::bfloat16>();
gpu_res1_bfloat16.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>();
gpu_res1_bfloat16.device(gpu_device) = gpu_res1_bfloat16.exp();
gpu_res2_bfloat16.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>();
gpu_res2_bfloat16.device(gpu_device) = gpu_res2_bfloat16.log();
gpu_res3_bfloat16.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>();
gpu_res3_bfloat16.device(gpu_device) = gpu_res3_bfloat16.log1p();
gpu_res3_bfloat16.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>();
gpu_res3_bfloat16.device(gpu_device) = gpu_res3_bfloat16.expm1();
Tensor<float, 1> input1(num_elem);
Tensor<Eigen::bfloat16, 1> bfloat16_prec1(num_elem);
Tensor<Eigen::bfloat16, 1> full_prec1(num_elem);
Tensor<float, 1> input2(num_elem);
Tensor<Eigen::bfloat16, 1> bfloat16_prec2(num_elem);
Tensor<Eigen::bfloat16, 1> full_prec2(num_elem);
Tensor<float, 1> input3(num_elem);
Tensor<Eigen::bfloat16, 1> bfloat16_prec3(num_elem);
Tensor<Eigen::bfloat16, 1> full_prec3(num_elem);
gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(input3.data(), d_float3, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(bfloat16_prec1.data(), d_res1_bfloat16, num_elem*sizeof(Eigen::bfloat16));
gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::bfloat16));
gpu_device.memcpyDeviceToHost(bfloat16_prec2.data(), d_res2_bfloat16, num_elem*sizeof(Eigen::bfloat16));
gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::bfloat16));
gpu_device.memcpyDeviceToHost(bfloat16_prec3.data(), d_res3_bfloat16, num_elem*sizeof(Eigen::bfloat16));
gpu_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float, num_elem*sizeof(Eigen::bfloat16));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
VERIFY_IS_APPROX(full_prec1(i), bfloat16_prec1(i));
}
for (int i = 0; i < num_elem; ++i) {
if(std::abs(input2(i)-1.f)<0.05f) // log lacks accuracy nearby 1
VERIFY_IS_APPROX(full_prec2(i)+Eigen::bfloat16(0.1f), bfloat16_prec2(i)+Eigen::bfloat16(0.1f));
else
VERIFY_IS_APPROX(full_prec2(i), bfloat16_prec2(i));
}
for (int i = 0; i < num_elem; ++i) {
VERIFY_IS_APPROX(full_prec3(i), bfloat16_prec3(i));
}
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
gpu_device.deallocate(d_float3);
gpu_device.deallocate(d_res1_bfloat16);
gpu_device.deallocate(d_res1_float);
gpu_device.deallocate(d_res2_bfloat16);
gpu_device.deallocate(d_res2_float);
gpu_device.deallocate(d_res3_float);
gpu_device.deallocate(d_res3_bfloat16);
}
template<typename>
void test_gpu_contractions() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int rows = 23;
int cols = 23;
int num_elem = rows*cols;
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
d_float1, rows, cols);
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
d_float2, rows, cols);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 2>, Eigen::Aligned> gpu_res_bfloat16(
d_res_bfloat16, rows, cols);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 2>, Eigen::Aligned> gpu_res_float(
d_res_float, rows, cols);
gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f);
typedef Tensor<float, 2>::DimensionPair DimPair;
Eigen::array<DimPair, 1> dims(DimPair(1, 0));
gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::bfloat16>();
gpu_res_bfloat16.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().contract(gpu_float2.cast<Eigen::bfloat16>(), dims);
Tensor<Eigen::bfloat16, 2> bfloat16_prec(rows, cols);
Tensor<Eigen::bfloat16, 2> full_prec(rows, cols);
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(Eigen::bfloat16));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(Eigen::bfloat16));
gpu_device.synchronize();
for (int i = 0; i < rows; ++i) {
for (int j = 0; j < cols; ++j) {
if (numext::abs(full_prec(i, j) - bfloat16_prec(i, j)) > Eigen::bfloat16(1e-2f)) {
VERIFY_IS_APPROX(full_prec(i, j), bfloat16_prec(i, j));
}
}
}
gpu_device.deallocate(d_float1);
gpu_device.deallocate(d_float2);
gpu_device.deallocate(d_res_bfloat16);
gpu_device.deallocate(d_res_float);
}
template<typename>
void test_gpu_reductions(int size1, int size2, int redux) {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = size1*size2;
int result_size = (redux == 1 ? size1 : size2);
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(result_size * sizeof(Eigen::bfloat16));
Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(result_size * sizeof(Eigen::bfloat16));
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
d_float, size1, size2);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res_bfloat16(
d_res_bfloat16, result_size);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, result_size);
gpu_float.device(gpu_device) = gpu_float.random() * 2.0f;
Eigen::array<int, 1> redux_dim = {redux};
gpu_res_float.device(gpu_device) = gpu_float.sum(redux_dim).cast<Eigen::bfloat16>();
gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().sum(redux_dim);
Tensor<Eigen::bfloat16, 1> bfloat16_prec(result_size);
Tensor<Eigen::bfloat16, 1> full_prec(result_size);
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, result_size*sizeof(Eigen::bfloat16));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, result_size*sizeof(Eigen::bfloat16));
gpu_device.synchronize();
for (int i = 0; i < result_size; ++i) {
VERIFY_IS_APPROX(full_prec(i), bfloat16_prec(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_bfloat16);
gpu_device.deallocate(d_res_float);
}
template<typename>
void test_gpu_reductions() {
test_gpu_reductions<void>(13, 13, 0);
test_gpu_reductions<void>(13, 13, 1);
test_gpu_reductions<void>(35, 36, 0);
test_gpu_reductions<void>(35, 36, 1);
test_gpu_reductions<void>(36, 35, 0);
test_gpu_reductions<void>(36, 35, 1);
}
template<typename>
void test_gpu_full_reductions() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int size = 13;
int num_elem = size*size;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(1 * sizeof(Eigen::bfloat16));
Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(1 * sizeof(Eigen::bfloat16));
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
d_float, size, size);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 0>, Eigen::Aligned> gpu_res_bfloat16(
d_res_bfloat16);
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 0>, Eigen::Aligned> gpu_res_float(
d_res_float);
gpu_float.device(gpu_device) = gpu_float.random();
gpu_res_float.device(gpu_device) = gpu_float.sum().cast<Eigen::bfloat16>();
gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().sum();
Tensor<Eigen::bfloat16, 0> bfloat16_prec;
Tensor<Eigen::bfloat16, 0> full_prec;
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, sizeof(Eigen::bfloat16));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::bfloat16));
gpu_device.synchronize();
VERIFY_IS_APPROX(full_prec(), bfloat16_prec());
gpu_res_float.device(gpu_device) = gpu_float.maximum().cast<Eigen::bfloat16>();
gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().maximum();
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, sizeof(Eigen::bfloat16));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::bfloat16));
gpu_device.synchronize();
VERIFY_IS_APPROX(full_prec(), bfloat16_prec());
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_bfloat16);
gpu_device.deallocate(d_res_float);
}
template<typename>
void test_gpu_forced_evals() {
Eigen::GpuStreamDevice stream;
Eigen::GpuDevice gpu_device(&stream);
int num_elem = 101;
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_bfloat16_1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_bfloat16_2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
d_float, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16_1(
d_res_bfloat16_1, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_bfloat16_2(
d_res_bfloat16_2, num_elem);
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
d_res_float, num_elem);
Eigen::array<int, 1> no_bcast;
no_bcast[0] = 1;
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
gpu_float.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().cast<float>();
gpu_res_float.device(gpu_device) = gpu_float.abs();
gpu_res_bfloat16_1.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().eval().cast<float>();
gpu_res_bfloat16_2.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().broadcast(no_bcast).eval().cast<float>();
Tensor<float, 1> bfloat16_prec1(num_elem);
Tensor<float, 1> bfloat16_prec2(num_elem);
Tensor<float, 1> full_prec(num_elem);
gpu_device.memcpyDeviceToHost(bfloat16_prec1.data(), d_res_bfloat16_1, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(bfloat16_prec2.data(), d_res_bfloat16_2, num_elem*sizeof(float));
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
gpu_device.synchronize();
for (int i = 0; i < num_elem; ++i) {
VERIFY_IS_APPROX(full_prec(i), bfloat16_prec1(i));
VERIFY_IS_APPROX(full_prec(i), bfloat16_prec2(i));
}
gpu_device.deallocate(d_float);
gpu_device.deallocate(d_res_bfloat16_1);
gpu_device.deallocate(d_res_bfloat16_2);
gpu_device.deallocate(d_res_float);
}
#endif
EIGEN_DECLARE_TEST(cxx11_tensor_of_bfloat16_gpu)
{
CALL_SUBTEST_1(test_gpu_numext<void>());
// The reduction unit tests have been excluded until a working
// implementation to expand the accumulator data type to float32
// is available.
// TODO: add reduction unit tests
#ifdef EIGEN_HAS_GPU_BF16
CALL_SUBTEST_2(test_gpu_conversion<void>());
CALL_SUBTEST_3(test_gpu_unary<void>());
CALL_SUBTEST_4(test_gpu_elementwise<void>());
CALL_SUBTEST_5(test_gpu_trancendental<void>());
CALL_SUBTEST_6(test_gpu_contractions<void>());
CALL_SUBTEST_7(test_gpu_reductions<void>());
CALL_SUBTEST_8(test_gpu_full_reductions<void>());
CALL_SUBTEST_9(test_gpu_forced_evals<void>());
#else
std::cout << "bfloat16 floats are not supported by this version of gpu: skipping the test" << std::endl;
#endif
}

View File

@@ -47,6 +47,20 @@ static void test_abs()
}
}
static void test_arg()
{
Tensor<std::complex<float>, 1> data1(3);
Tensor<std::complex<double>, 1> data2(3);
data1.setRandom();
data2.setRandom();
Tensor<float, 1> arg1 = data1.arg();
Tensor<double, 1> arg2 = data2.arg();
for (int i = 0; i < 3; ++i) {
VERIFY_IS_APPROX(arg1(i), std::arg(data1(i)));
VERIFY_IS_APPROX(arg2(i), std::arg(data2(i)));
}
}
static void test_conjugate()
{
@@ -98,6 +112,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_of_complex)
{
CALL_SUBTEST(test_additions());
CALL_SUBTEST(test_abs());
CALL_SUBTEST(test_arg());
CALL_SUBTEST(test_conjugate());
CALL_SUBTEST(test_contractions());
}

View File

@@ -37,14 +37,8 @@ static void test_sycl_random_uniform(const Eigen::SyclDevice& sycl_device)
gpu_out.device(sycl_device)=gpu_out.random();
sycl_device.memcpyDeviceToHost(out.data(), d_out,out_bytes);
for(IndexType i=1; i<sizeDim0; i++)
for(IndexType j=1; j<sizeDim1; j++)
{
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j));
VERIFY_IS_NOT_EQUAL(out(i,j), out(i,j-1));
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j-1)); }
// For now we just check thes code doesn't crash.
// For now we just check the code doesn't crash.
// TODO: come up with a valid test of randomness
sycl_device.deallocate(d_out);
}
@@ -66,16 +60,8 @@ void test_sycl_random_normal(const Eigen::SyclDevice& sycl_device)
Eigen::internal::NormalRandomGenerator<DataType> gen(true);
gpu_out.device(sycl_device)=gpu_out.random(gen);
sycl_device.memcpyDeviceToHost(out.data(), d_out,out_bytes);
for(IndexType i=1; i<sizeDim0; i++)
for(IndexType j=1; j<sizeDim1; j++)
{
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j));
VERIFY_IS_NOT_EQUAL(out(i,j), out(i,j-1));
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j-1));
}
// For now we just check thes code doesn't crash.
// For now we just check the code doesn't crash.
// TODO: come up with a valid test of randomness
sycl_device.deallocate(d_out);
}

View File

@@ -370,13 +370,7 @@ static void test_static_dims() {
Tensor<float, 2, DataLayout> out(72, 97);
in.setRandom();
#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 1;
reduction_axis[1] = 3;
#else
Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<3> > reduction_axis;
#endif
out = in.maximum(reduction_axis);
@@ -400,14 +394,8 @@ static void test_innermost_last_dims() {
in.setRandom();
// Reduce on the innermost dimensions.
#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 0;
reduction_axis[1] = 1;
#else
// This triggers the use of packets for ColMajor.
Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1> > reduction_axis;
#endif
out = in.maximum(reduction_axis);
@@ -431,14 +419,8 @@ static void test_innermost_first_dims() {
in.setRandom();
// Reduce on the innermost dimensions.
#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 2;
reduction_axis[1] = 3;
#else
// This triggers the use of packets for RowMajor.
Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>> reduction_axis;
#endif
out = in.maximum(reduction_axis);
@@ -462,14 +444,8 @@ static void test_reduce_middle_dims() {
in.setRandom();
// Reduce on the innermost dimensions.
#if !EIGEN_HAS_CONSTEXPR
array<int, 2> reduction_axis;
reduction_axis[0] = 1;
reduction_axis[1] = 2;
#else
// This triggers the use of packets for RowMajor.
Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>> reduction_axis;
#endif
out = in.maximum(reduction_axis);
@@ -486,22 +462,31 @@ static void test_reduce_middle_dims() {
}
}
static void test_sum_accuracy() {
Tensor<float, 3> tensor(101, 101, 101);
for (float prescribed_mean : {1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f}) {
tensor.setRandom();
tensor += tensor.constant(prescribed_mean);
template <typename ScalarType, int num_elements, int max_mean>
void test_sum_accuracy() {
Tensor<double, 1> double_tensor(num_elements);
Tensor<ScalarType, 1> tensor(num_elements);
for (double prescribed_mean = 0; prescribed_mean <= max_mean; prescribed_mean = numext::maxi(1.0, prescribed_mean*3.99)) {
// FIXME: NormalRandomGenerator doesn't work in bfloat and half.
double_tensor.setRandom<Eigen::internal::NormalRandomGenerator<double>>();
double_tensor += double_tensor.constant(prescribed_mean);
tensor = double_tensor.cast<ScalarType>();
Tensor<float, 0> sum = tensor.sum();
Tensor<ScalarType, 0> sum;
sum = tensor.sum();
// Compute the reference value in double precsion.
double expected_sum = 0.0;
for (int i = 0; i < 101; ++i) {
for (int j = 0; j < 101; ++j) {
for (int k = 0; k < 101; ++k) {
expected_sum += static_cast<double>(tensor(i, j, k));
}
}
double abs_sum = 0.0;
for (int i = 0; i < num_elements; ++i) {
expected_sum += static_cast<double>(tensor(i));
abs_sum += static_cast<double>(numext::abs(tensor(i)));
}
VERIFY_IS_APPROX(sum(), static_cast<float>(expected_sum));
// Test against probabilistic forward error bound. In reality, the error is much smaller
// when we use tree summation.
double err = Eigen::numext::abs(static_cast<double>(sum()) - expected_sum);
double tol = numext::sqrt(num_elements) * NumTraits<ScalarType>::epsilon() * static_cast<ScalarType>(abs_sum);
VERIFY_LE(err, tol);
}
}
@@ -528,5 +513,11 @@ EIGEN_DECLARE_TEST(cxx11_tensor_reduction) {
CALL_SUBTEST(test_innermost_first_dims<RowMajor>());
CALL_SUBTEST(test_reduce_middle_dims<ColMajor>());
CALL_SUBTEST(test_reduce_middle_dims<RowMajor>());
CALL_SUBTEST(test_sum_accuracy());
CALL_SUBTEST((test_sum_accuracy<float,10*1024*1024,8*1024>()));
CALL_SUBTEST((test_sum_accuracy<Eigen::bfloat16,10*1024*1024,8*1024>()));
// The range of half is limited to 65519 when using round-to-even,
// so we are severely limited in the size and mean of the tensors
// we can reduce without overflow.
CALL_SUBTEST((test_sum_accuracy<Eigen::half,4*1024,16>()));
CALL_SUBTEST((test_sum_accuracy<Eigen::half,10*1024*1024,0>()));
}

View File

@@ -16,7 +16,6 @@
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
#define EIGEN_USE_SYCL
#define EIGEN_HAS_CONSTEXPR 1
#include "main.h"

View File

@@ -0,0 +1,277 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2009 Mark Borgerding mark a borgerding net
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "main.h"
#include <unsupported/Eigen/FFT>
template <typename T>
inline std::complex<T> RandomCpx() {
return std::complex<T>((T)(rand() / (T)RAND_MAX - .5), (T)(rand() / (T)RAND_MAX - .5));
}
using namespace std;
using namespace Eigen;
template <typename T>
inline complex<long double> promote(complex<T> x) {
return complex<long double>((long double)x.real(), (long double)x.imag());
}
inline complex<long double> promote(float x) { return complex<long double>((long double)x); }
inline complex<long double> promote(double x) { return complex<long double>((long double)x); }
inline complex<long double> promote(long double x) { return complex<long double>((long double)x); }
template <typename VT1, typename VT2>
long double fft_rmse(const VT1& fftbuf, const VT2& timebuf) {
long double totalpower = 0;
long double difpower = 0;
long double pi = acos((long double)-1);
for (size_t k0 = 0; k0 < (size_t)fftbuf.size(); ++k0) {
complex<long double> acc = 0;
long double phinc = (long double)(-2.) * k0 * pi / timebuf.size();
for (size_t k1 = 0; k1 < (size_t)timebuf.size(); ++k1) {
acc += promote(timebuf[k1]) * exp(complex<long double>(0, k1 * phinc));
}
totalpower += numext::abs2(acc);
complex<long double> x = promote(fftbuf[k0]);
complex<long double> dif = acc - x;
difpower += numext::abs2(dif);
// cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(numext::abs2(dif)) << endl;
}
// cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
return sqrt(difpower / totalpower);
}
template <typename VT1, typename VT2>
long double dif_rmse(const VT1 buf1, const VT2 buf2) {
long double totalpower = 0;
long double difpower = 0;
size_t n = (min)(buf1.size(), buf2.size());
for (size_t k = 0; k < n; ++k) {
totalpower += (long double)((numext::abs2(buf1[k]) + numext::abs2(buf2[k])) / 2);
difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
}
return sqrt(difpower / totalpower);
}
enum { StdVectorContainer, EigenVectorContainer };
template <int Container, typename Scalar>
struct VectorType;
template <typename Scalar>
struct VectorType<StdVectorContainer, Scalar> {
typedef vector<Scalar> type;
};
template <typename Scalar>
struct VectorType<EigenVectorContainer, Scalar> {
typedef Matrix<Scalar, Dynamic, 1> type;
};
template <int Container, typename T>
void test_scalar_generic(int nfft) {
typedef typename FFT<T>::Complex Complex;
typedef typename FFT<T>::Scalar Scalar;
typedef typename VectorType<Container, Scalar>::type ScalarVector;
typedef typename VectorType<Container, Complex>::type ComplexVector;
FFT<T> fft;
ScalarVector tbuf(nfft);
ComplexVector freqBuf;
for (int k = 0; k < nfft; ++k) tbuf[k] = (T)(rand() / (double)RAND_MAX - .5);
// make sure it DOESN'T give the right full spectrum answer
// if we've asked for half-spectrum
fft.SetFlag(fft.HalfSpectrum);
fft.fwd(freqBuf, tbuf);
VERIFY((size_t)freqBuf.size() == (size_t)((nfft >> 1) + 1));
VERIFY(T(fft_rmse(freqBuf, tbuf)) < test_precision<T>()); // gross check
fft.ClearFlag(fft.HalfSpectrum);
fft.fwd(freqBuf, tbuf);
VERIFY((size_t)freqBuf.size() == (size_t)nfft);
VERIFY(T(fft_rmse(freqBuf, tbuf)) < test_precision<T>()); // gross check
if (nfft & 1) return; // odd FFTs get the wrong size inverse FFT
ScalarVector tbuf2;
fft.inv(tbuf2, freqBuf);
VERIFY(T(dif_rmse(tbuf, tbuf2)) < test_precision<T>()); // gross check
// verify that the Unscaled flag takes effect
ScalarVector tbuf3;
fft.SetFlag(fft.Unscaled);
fft.inv(tbuf3, freqBuf);
for (int k = 0; k < nfft; ++k) tbuf3[k] *= T(1. / nfft);
// for (size_t i=0;i<(size_t) tbuf.size();++i)
// cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " - in=" << tbuf[i] << " => " << (tbuf3[i] -
// tbuf[i] ) << endl;
VERIFY(T(dif_rmse(tbuf, tbuf3)) < test_precision<T>()); // gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv(tbuf2, freqBuf);
VERIFY(T(dif_rmse(tbuf, tbuf2)) < test_precision<T>()); // gross check
}
template <typename T>
void test_scalar(int nfft) {
test_scalar_generic<StdVectorContainer, T>(nfft);
// test_scalar_generic<EigenVectorContainer,T>(nfft);
}
template <int Container, typename T>
void test_complex_generic(int nfft) {
typedef typename FFT<T>::Complex Complex;
typedef typename VectorType<Container, Complex>::type ComplexVector;
FFT<T> fft;
ComplexVector inbuf(nfft);
ComplexVector outbuf;
ComplexVector buf3;
for (int k = 0; k < nfft; ++k)
inbuf[k] = Complex((T)(rand() / (double)RAND_MAX - .5), (T)(rand() / (double)RAND_MAX - .5));
fft.fwd(outbuf, inbuf);
VERIFY(T(fft_rmse(outbuf, inbuf)) < test_precision<T>()); // gross check
fft.inv(buf3, outbuf);
VERIFY(T(dif_rmse(inbuf, buf3)) < test_precision<T>()); // gross check
// verify that the Unscaled flag takes effect
ComplexVector buf4;
fft.SetFlag(fft.Unscaled);
fft.inv(buf4, outbuf);
for (int k = 0; k < nfft; ++k) buf4[k] *= T(1. / nfft);
VERIFY(T(dif_rmse(inbuf, buf4)) < test_precision<T>()); // gross check
// verify that ClearFlag works
fft.ClearFlag(fft.Unscaled);
fft.inv(buf3, outbuf);
VERIFY(T(dif_rmse(inbuf, buf3)) < test_precision<T>()); // gross check
}
template <typename T>
void test_complex(int nfft) {
test_complex_generic<StdVectorContainer, T>(nfft);
test_complex_generic<EigenVectorContainer, T>(nfft);
}
template <typename T, int nrows, int ncols>
void test_complex2d() {
typedef typename Eigen::FFT<T>::Complex Complex;
FFT<T> fft;
Eigen::Matrix<Complex, nrows, ncols> src, src2, dst, dst2;
src = Eigen::Matrix<Complex, nrows, ncols>::Random();
// src = Eigen::Matrix<Complex,nrows,ncols>::Identity();
for (int k = 0; k < ncols; k++) {
Eigen::Matrix<Complex, nrows, 1> tmpOut;
fft.fwd(tmpOut, src.col(k));
dst2.col(k) = tmpOut;
}
for (int k = 0; k < nrows; k++) {
Eigen::Matrix<Complex, 1, ncols> tmpOut;
fft.fwd(tmpOut, dst2.row(k));
dst2.row(k) = tmpOut;
}
fft.fwd2(dst.data(), src.data(), ncols, nrows);
fft.inv2(src2.data(), dst.data(), ncols, nrows);
VERIFY((src - src2).norm() < test_precision<T>());
VERIFY((dst - dst2).norm() < test_precision<T>());
}
inline void test_return_by_value(int len) {
VectorXf in;
VectorXf in1;
in.setRandom(len);
VectorXcf out1, out2;
FFT<float> fft;
fft.SetFlag(fft.HalfSpectrum);
fft.fwd(out1, in);
out2 = fft.fwd(in);
VERIFY((out1 - out2).norm() < test_precision<float>());
in1 = fft.inv(out1);
VERIFY((in1 - in).norm() < test_precision<float>());
}
EIGEN_DECLARE_TEST(FFTW) {
CALL_SUBTEST(test_return_by_value(32));
CALL_SUBTEST(test_complex<float>(32));
CALL_SUBTEST(test_complex<double>(32));
CALL_SUBTEST(test_complex<float>(256));
CALL_SUBTEST(test_complex<double>(256));
CALL_SUBTEST(test_complex<float>(3 * 8));
CALL_SUBTEST(test_complex<double>(3 * 8));
CALL_SUBTEST(test_complex<float>(5 * 32));
CALL_SUBTEST(test_complex<double>(5 * 32));
CALL_SUBTEST(test_complex<float>(2 * 3 * 4));
CALL_SUBTEST(test_complex<double>(2 * 3 * 4));
CALL_SUBTEST(test_complex<float>(2 * 3 * 4 * 5));
CALL_SUBTEST(test_complex<double>(2 * 3 * 4 * 5));
CALL_SUBTEST(test_complex<float>(2 * 3 * 4 * 5 * 7));
CALL_SUBTEST(test_complex<double>(2 * 3 * 4 * 5 * 7));
CALL_SUBTEST(test_scalar<float>(32));
CALL_SUBTEST(test_scalar<double>(32));
CALL_SUBTEST(test_scalar<float>(45));
CALL_SUBTEST(test_scalar<double>(45));
CALL_SUBTEST(test_scalar<float>(50));
CALL_SUBTEST(test_scalar<double>(50));
CALL_SUBTEST(test_scalar<float>(256));
CALL_SUBTEST(test_scalar<double>(256));
CALL_SUBTEST(test_scalar<float>(2 * 3 * 4 * 5 * 7));
CALL_SUBTEST(test_scalar<double>(2 * 3 * 4 * 5 * 7));
#if defined EIGEN_HAS_FFTWL || defined EIGEN_POCKETFFT_DEFAULT
CALL_SUBTEST(test_complex<long double>(32));
CALL_SUBTEST(test_complex<long double>(256));
CALL_SUBTEST(test_complex<long double>(3 * 8));
CALL_SUBTEST(test_complex<long double>(5 * 32));
CALL_SUBTEST(test_complex<long double>(2 * 3 * 4));
CALL_SUBTEST(test_complex<long double>(2 * 3 * 4 * 5));
CALL_SUBTEST(test_complex<long double>(2 * 3 * 4 * 5 * 7));
CALL_SUBTEST(test_scalar<long double>(32));
CALL_SUBTEST(test_scalar<long double>(45));
CALL_SUBTEST(test_scalar<long double>(50));
CALL_SUBTEST(test_scalar<long double>(256));
CALL_SUBTEST(test_scalar<long double>(2 * 3 * 4 * 5 * 7));
CALL_SUBTEST((test_complex2d<long double, 2 * 3 * 4, 2 * 3 * 4>()));
CALL_SUBTEST((test_complex2d<long double, 3 * 4 * 5, 3 * 4 * 5>()));
CALL_SUBTEST((test_complex2d<long double, 24, 60>()));
CALL_SUBTEST((test_complex2d<long double, 60, 24>()));
// fail to build since Eigen limit the stack allocation size,too big here.
// CALL_SUBTEST( ( test_complex2d<long double, 256, 256> () ) );
#endif
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
CALL_SUBTEST((test_complex2d<float, 24, 24>()));
CALL_SUBTEST((test_complex2d<float, 60, 60>()));
CALL_SUBTEST((test_complex2d<float, 24, 60>()));
CALL_SUBTEST((test_complex2d<float, 60, 24>()));
#endif
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
CALL_SUBTEST((test_complex2d<double, 24, 24>()));
CALL_SUBTEST((test_complex2d<double, 60, 60>()));
CALL_SUBTEST((test_complex2d<double, 24, 60>()));
CALL_SUBTEST((test_complex2d<double, 60, 24>()));
#endif
}

View File

@@ -20,10 +20,10 @@ EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p)
return (p-Vector(Scalar(-1),Scalar(1.))).norm() + (p.array().sqrt().abs() * p.array().sin()).sum() + p.dot(p);
}
template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
struct TestFunc1
{
typedef _Scalar Scalar;
typedef Scalar_ Scalar;
enum {
InputsAtCompileTime = NX,
ValuesAtCompileTime = NY

View File

@@ -0,0 +1,28 @@
// This file is part of Eigen, a lightweight C++ template library
// for linear algebra.
//
// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
//
// This Source Code Form is subject to the terms of the Mozilla
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
#include "../../test/sparse_solver.h"
#include <unsupported/Eigen/IterativeSolvers>
template <typename T>
void test_idrstabl_T() {
IDRSTABL<SparseMatrix<T>, DiagonalPreconditioner<T> > idrstabl_colmajor_diag;
IDRSTABL<SparseMatrix<T>, IncompleteLUT<T> > idrstabl_colmajor_ilut;
idrstabl_colmajor_diag.setTolerance(NumTraits<T>::epsilon() * 4);
idrstabl_colmajor_ilut.setTolerance(NumTraits<T>::epsilon() * 4);
CALL_SUBTEST(check_sparse_square_solving(idrstabl_colmajor_diag));
CALL_SUBTEST(check_sparse_square_solving(idrstabl_colmajor_ilut));
}
EIGEN_DECLARE_TEST(idrstabl) {
CALL_SUBTEST_1((test_idrstabl_T<double>()));
CALL_SUBTEST_2((test_idrstabl_T<std::complex<double> >()));
}

View File

@@ -29,7 +29,7 @@ void check_kronecker_product(const MatrixType& ab)
{
VERIFY_IS_EQUAL(ab.rows(), 6);
VERIFY_IS_EQUAL(ab.cols(), 6);
VERIFY_IS_EQUAL(ab.nonZeros(), 36);
VERIFY_IS_EQUAL(ab.size(), 36);
VERIFY_IS_APPROX(ab.coeff(0,0), -0.4017367630386106);
VERIFY_IS_APPROX(ab.coeff(0,1), 0.1056863433932735);
VERIFY_IS_APPROX(ab.coeff(0,2), -0.7255206194554212);

View File

@@ -24,7 +24,7 @@
using std::sqrt;
// tolerance for chekcing number of iterations
#define LM_EVAL_COUNT_TOL 4/3
#define LM_EVAL_COUNT_TOL 2
struct lmder_functor : DenseFunctor<double>
{
@@ -75,11 +75,11 @@ void testLmder1()
lmder_functor functor;
LevenbergMarquardt<lmder_functor> lm(functor);
info = lm.lmder1(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 6);
VERIFY_IS_EQUAL(lm.njev(), 5);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 6);
// VERIFY_IS_EQUAL(lm.njev(), 5);
// check norm
VERIFY_IS_APPROX(lm.fvec().blueNorm(), 0.09063596);
@@ -104,11 +104,12 @@ void testLmder()
lmder_functor functor;
LevenbergMarquardt<lmder_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return values
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 6);
VERIFY_IS_EQUAL(lm.njev(), 5);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 6);
// VERIFY_IS_EQUAL(lm.njev(), 5);
// check norm
fnorm = lm.fvec().blueNorm();
@@ -177,9 +178,10 @@ void testLmdif1()
lmdif_functor functor;
DenseIndex nfev;
info = LevenbergMarquardt<lmdif_functor>::lmdif1(functor, x, &nfev);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(nfev, 26);
// check norm
@@ -208,9 +210,10 @@ void testLmdif()
NumericalDiff<lmdif_functor> numDiff(functor);
LevenbergMarquardt<NumericalDiff<lmdif_functor> > lm(numDiff);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return values
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 26);
// check norm
@@ -293,11 +296,12 @@ void testNistChwirut2(void)
chwirut2_functor functor;
LevenbergMarquardt<chwirut2_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 10);
VERIFY_IS_EQUAL(lm.njev(), 8);
// VERIFY_IS_EQUAL(lm.njev(), 8);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.1304802941E+02);
// check x
@@ -314,11 +318,12 @@ void testNistChwirut2(void)
lm.setFtol(1.E6*NumTraits<double>::epsilon());
lm.setXtol(1.E6*NumTraits<double>::epsilon());
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 7);
VERIFY_IS_EQUAL(lm.njev(), 6);
// VERIFY_IS_EQUAL(lm.njev(), 6);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.1304802941E+02);
// check x
@@ -373,11 +378,12 @@ void testNistMisra1a(void)
misra1a_functor functor;
LevenbergMarquardt<misra1a_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 19);
VERIFY_IS_EQUAL(lm.njev(), 15);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 19);
// VERIFY_IS_EQUAL(lm.njev(), 15);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.2455138894E-01);
// check x
@@ -390,11 +396,12 @@ void testNistMisra1a(void)
x<< 250., 0.0005;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 5);
VERIFY_IS_EQUAL(lm.njev(), 4);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 5);
// VERIFY_IS_EQUAL(lm.njev(), 4);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.2455138894E-01);
// check x
@@ -464,11 +471,12 @@ void testNistHahn1(void)
hahn1_functor functor;
LevenbergMarquardt<hahn1_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 11);
VERIFY_IS_EQUAL(lm.njev(), 10);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 11);
// VERIFY_IS_EQUAL(lm.njev(), 10);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.5324382854E+00);
// check x
@@ -486,11 +494,12 @@ void testNistHahn1(void)
x<< .1, -.1, .005, -.000001, -.005, .0001, -.0000001;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 11);
VERIFY_IS_EQUAL(lm.njev(), 10);
// VERIFY_IS_EQUAL(lm.njev(), 10);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.5324382854E+00);
// check x
@@ -550,11 +559,12 @@ void testNistMisra1d(void)
misra1d_functor functor;
LevenbergMarquardt<misra1d_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 9);
VERIFY_IS_EQUAL(lm.njev(), 7);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 9);
// VERIFY_IS_EQUAL(lm.njev(), 7);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6419295283E-02);
// check x
@@ -567,11 +577,12 @@ void testNistMisra1d(void)
x<< 450., 0.0003;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 4);
VERIFY_IS_EQUAL(lm.njev(), 3);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 4);
// VERIFY_IS_EQUAL(lm.njev(), 3);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6419295283E-02);
// check x
@@ -628,11 +639,12 @@ void testNistLanczos1(void)
lanczos1_functor functor;
LevenbergMarquardt<lanczos1_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
VERIFY_IS_EQUAL(lm.nfev(), 79);
VERIFY_IS_EQUAL(lm.njev(), 72);
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
// VERIFY_IS_EQUAL(lm.nfev(), 79);
// VERIFY_IS_EQUAL(lm.njev(), 72);
// check norm^2
VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
// check x
@@ -649,11 +661,12 @@ void testNistLanczos1(void)
x<< 0.5, 0.7, 3.6, 4.2, 4., 6.3;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
VERIFY_IS_EQUAL(lm.nfev(), 9);
VERIFY_IS_EQUAL(lm.njev(), 8);
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
// VERIFY_IS_EQUAL(lm.nfev(), 9);
// VERIFY_IS_EQUAL(lm.njev(), 8);
// check norm^2
VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
// check x
@@ -714,11 +727,12 @@ void testNistRat42(void)
rat42_functor functor;
LevenbergMarquardt<rat42_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
VERIFY_IS_EQUAL(lm.nfev(), 10);
VERIFY_IS_EQUAL(lm.njev(), 8);
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
// VERIFY_IS_EQUAL(lm.nfev(), 10);
// VERIFY_IS_EQUAL(lm.njev(), 8);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.0565229338E+00);
// check x
@@ -732,11 +746,12 @@ void testNistRat42(void)
x<< 75., 2.5, 0.07;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
VERIFY_IS_EQUAL(lm.nfev(), 6);
VERIFY_IS_EQUAL(lm.njev(), 5);
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
// VERIFY_IS_EQUAL(lm.nfev(), 6);
// VERIFY_IS_EQUAL(lm.njev(), 5);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.0565229338E+00);
// check x
@@ -787,14 +802,15 @@ void testNistMGH10(void)
/*
* First try
*/
x<< 2., 400000., 25000.;
x << 2., 400000., 25000.;
// do the computation
MGH10_functor functor;
LevenbergMarquardt<MGH10_functor> lm(functor);
info = lm.minimize(x);
++g_test_level;
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
--g_test_level;
EIGEN_UNUSED_VARIABLE(info)
// ++g_test_level;
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
// --g_test_level;
// was: VERIFY_IS_EQUAL(info, 1);
// check norm^2
@@ -805,11 +821,11 @@ void testNistMGH10(void)
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
// check return value
++g_test_level;
VERIFY_IS_EQUAL(lm.nfev(), 284 );
VERIFY_IS_EQUAL(lm.njev(), 249 );
--g_test_level;
// ++g_test_level;
// VERIFY_IS_EQUAL(lm.nfev(), 284 );
// VERIFY_IS_EQUAL(lm.njev(), 249 );
// --g_test_level;
VERIFY(lm.nfev() < 284 * LM_EVAL_COUNT_TOL);
VERIFY(lm.njev() < 249 * LM_EVAL_COUNT_TOL);
@@ -819,11 +835,12 @@ void testNistMGH10(void)
x<< 0.02, 4000., 250.;
// do the computation
info = lm.minimize(x);
++g_test_level;
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
// was: VERIFY_IS_EQUAL(info, 1);
--g_test_level;
EIGEN_UNUSED_VARIABLE(info)
// ++g_test_level;
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
// // was: VERIFY_IS_EQUAL(info, 1);
// --g_test_level;
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
// check x
@@ -832,10 +849,10 @@ void testNistMGH10(void)
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
// check return value
++g_test_level;
VERIFY_IS_EQUAL(lm.nfev(), 126);
VERIFY_IS_EQUAL(lm.njev(), 116);
--g_test_level;
// ++g_test_level;
// VERIFY_IS_EQUAL(lm.nfev(), 126);
// VERIFY_IS_EQUAL(lm.njev(), 116);
// --g_test_level;
VERIFY(lm.nfev() < 126 * LM_EVAL_COUNT_TOL);
VERIFY(lm.njev() < 116 * LM_EVAL_COUNT_TOL);
}
@@ -888,6 +905,7 @@ void testNistBoxBOD(void)
lm.setXtol(1.E6*NumTraits<double>::epsilon());
lm.setFactor(10);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03);
@@ -896,9 +914,9 @@ void testNistBoxBOD(void)
VERIFY_IS_APPROX(x[1], 5.4723748542E-01);
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY(lm.nfev() < 31); // 31
VERIFY(lm.njev() < 25); // 25
// VERIFY_IS_EQUAL(info, 1);
// VERIFY(lm.nfev() < 31); // 31
// VERIFY(lm.njev() < 25); // 25
/*
* Second try
@@ -909,13 +927,14 @@ void testNistBoxBOD(void)
lm.setFtol(NumTraits<double>::epsilon());
lm.setXtol( NumTraits<double>::epsilon());
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
++g_test_level;
VERIFY_IS_EQUAL(lm.nfev(), 16 );
VERIFY_IS_EQUAL(lm.njev(), 15 );
--g_test_level;
// VERIFY_IS_EQUAL(info, 1);
// ++g_test_level;
// VERIFY_IS_EQUAL(lm.nfev(), 16 );
// VERIFY_IS_EQUAL(lm.njev(), 15 );
// --g_test_level;
VERIFY(lm.nfev() < 16 * LM_EVAL_COUNT_TOL);
VERIFY(lm.njev() < 15 * LM_EVAL_COUNT_TOL);
// check norm^2
@@ -975,6 +994,7 @@ void testNistMGH17(void)
lm.setXtol(NumTraits<double>::epsilon());
lm.setMaxfev(1000);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.4648946975E-05);
@@ -987,8 +1007,8 @@ void testNistMGH17(void)
// check return value
// VERIFY_IS_EQUAL(info, 2); //FIXME Use (lm.info() == Success)
VERIFY(lm.nfev() < 700 ); // 602
VERIFY(lm.njev() < 600 ); // 545
// VERIFY(lm.nfev() < 700 ); // 602
// VERIFY(lm.njev() < 600 ); // 545
/*
* Second try
@@ -997,11 +1017,12 @@ void testNistMGH17(void)
// do the computation
lm.resetParameters();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 18);
VERIFY_IS_EQUAL(lm.njev(), 15);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 18);
// VERIFY_IS_EQUAL(lm.njev(), 15);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.4648946975E-05);
// check x
@@ -1063,6 +1084,7 @@ void testNistMGH09(void)
LevenbergMarquardt<MGH09_functor> lm(functor);
lm.setMaxfev(1000);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 3.0750560385E-04);
@@ -1072,9 +1094,9 @@ void testNistMGH09(void)
VERIFY_IS_APPROX(x[2], 0.12305309914); // should be 1.2305650693E-01
VERIFY_IS_APPROX(x[3], 0.13605395375); // should be 1.3606233068E-01
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY(lm.nfev() < 510 ); // 490
VERIFY(lm.njev() < 400 ); // 376
// VERIFY_IS_EQUAL(info, 1);
// VERIFY(lm.nfev() < 510 ); // 490
// VERIFY(lm.njev() < 400 ); // 376
/*
* Second try
@@ -1083,11 +1105,12 @@ void testNistMGH09(void)
// do the computation
lm.resetParameters();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 18);
VERIFY_IS_EQUAL(lm.njev(), 16);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 18);
// VERIFY_IS_EQUAL(lm.njev(), 16);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 3.0750560385E-04);
// check x
@@ -1149,11 +1172,12 @@ void testNistBennett5(void)
LevenbergMarquardt<Bennett5_functor> lm(functor);
lm.setMaxfev(1000);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 758);
VERIFY_IS_EQUAL(lm.njev(), 744);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 758);
// VERIFY_IS_EQUAL(lm.njev(), 744);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.2404744073E-04);
// check x
@@ -1167,11 +1191,12 @@ void testNistBennett5(void)
// do the computation
lm.resetParameters();
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 203);
VERIFY_IS_EQUAL(lm.njev(), 192);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 203);
// VERIFY_IS_EQUAL(lm.njev(), 192);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.2404744073E-04);
// check x
@@ -1237,11 +1262,12 @@ void testNistThurber(void)
lm.setFtol(1.E4*NumTraits<double>::epsilon());
lm.setXtol(1.E4*NumTraits<double>::epsilon());
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 39);
VERIFY_IS_EQUAL(lm.njev(), 36);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 39);
// VERIFY_IS_EQUAL(lm.njev(), 36);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6427082397E+03);
// check x
@@ -1262,11 +1288,12 @@ void testNistThurber(void)
lm.setFtol(1.E4*NumTraits<double>::epsilon());
lm.setXtol(1.E4*NumTraits<double>::epsilon());
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 29);
VERIFY_IS_EQUAL(lm.njev(), 28);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 29);
// VERIFY_IS_EQUAL(lm.njev(), 28);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6427082397E+03);
// check x
@@ -1329,11 +1356,12 @@ void testNistRat43(void)
lm.setFtol(1.E6*NumTraits<double>::epsilon());
lm.setXtol(1.E6*NumTraits<double>::epsilon());
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 27);
VERIFY_IS_EQUAL(lm.njev(), 20);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 27);
// VERIFY_IS_EQUAL(lm.njev(), 20);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7864049080E+03);
// check x
@@ -1351,11 +1379,12 @@ void testNistRat43(void)
lm.setFtol(1.E5*NumTraits<double>::epsilon());
lm.setXtol(1.E5*NumTraits<double>::epsilon());
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 9);
VERIFY_IS_EQUAL(lm.njev(), 8);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 9);
// VERIFY_IS_EQUAL(lm.njev(), 8);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7864049080E+03);
// check x
@@ -1414,11 +1443,12 @@ void testNistEckerle4(void)
eckerle4_functor functor;
LevenbergMarquardt<eckerle4_functor> lm(functor);
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 18);
VERIFY_IS_EQUAL(lm.njev(), 15);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 18);
// VERIFY_IS_EQUAL(lm.njev(), 15);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.4635887487E-03);
// check x
@@ -1432,11 +1462,12 @@ void testNistEckerle4(void)
x<< 1.5, 5., 450.;
// do the computation
info = lm.minimize(x);
EIGEN_UNUSED_VARIABLE(info)
// check return value
VERIFY_IS_EQUAL(info, 1);
VERIFY_IS_EQUAL(lm.nfev(), 7);
VERIFY_IS_EQUAL(lm.njev(), 6);
// VERIFY_IS_EQUAL(info, 1);
// VERIFY_IS_EQUAL(lm.nfev(), 7);
// VERIFY_IS_EQUAL(lm.njev(), 6);
// check norm^2
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.4635887487E-03);
// check x

View File

@@ -104,8 +104,8 @@ void testSingular(const MatrixType& m_const, const typename MatrixType::RealScal
MatrixType& m = const_cast<MatrixType&>(m_const);
const int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex;
typedef typename internal::conditional<IsComplex, TriangularView<MatrixType,Upper>, const MatrixType&>::type TriangularType;
typename internal::conditional< IsComplex, ComplexSchur<MatrixType>, RealSchur<MatrixType> >::type schur;
typedef std::conditional_t<IsComplex, TriangularView<MatrixType,Upper>, const MatrixType&> TriangularType;
std::conditional_t< IsComplex, ComplexSchur<MatrixType>, RealSchur<MatrixType> > schur;
MatrixType T;
for (int i=0; i < g_repeat; ++i) {
@@ -171,7 +171,7 @@ EIGEN_DECLARE_TEST(matrix_power)
CALL_SUBTEST_5(testGeneral(Matrix3cf(), 1e-4f));
CALL_SUBTEST_8(testGeneral(Matrix4f(), 1e-4f));
CALL_SUBTEST_6(testGeneral(MatrixXf(2,2), 1e-3f)); // see bug 614
CALL_SUBTEST_9(testGeneral(MatrixXe(7,7), 1e-13L));
CALL_SUBTEST_9(testGeneral(MatrixXe(7,7), 1e-12L));
CALL_SUBTEST_10(testGeneral(Matrix3d(), 1e-13));
CALL_SUBTEST_11(testGeneral(Matrix3f(), 1e-4f));
CALL_SUBTEST_12(testGeneral(Matrix3e(), 1e-13L));
@@ -184,7 +184,7 @@ EIGEN_DECLARE_TEST(matrix_power)
CALL_SUBTEST_5(testSingular(Matrix3cf(), 1e-4f));
CALL_SUBTEST_8(testSingular(Matrix4f(), 1e-4f));
CALL_SUBTEST_6(testSingular(MatrixXf(2,2), 1e-3f));
CALL_SUBTEST_9(testSingular(MatrixXe(7,7), 1e-13L));
CALL_SUBTEST_9(testSingular(MatrixXe(7,7), 1e-12L));
CALL_SUBTEST_10(testSingular(Matrix3d(), 1e-13));
CALL_SUBTEST_11(testSingular(Matrix3f(), 1e-4f));
CALL_SUBTEST_12(testSingular(Matrix3e(), 1e-13L));
@@ -197,7 +197,7 @@ EIGEN_DECLARE_TEST(matrix_power)
CALL_SUBTEST_5(testLogThenExp(Matrix3cf(), 1e-4f));
CALL_SUBTEST_8(testLogThenExp(Matrix4f(), 1e-4f));
CALL_SUBTEST_6(testLogThenExp(MatrixXf(2,2), 1e-3f));
CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7), 1e-13L));
CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7), 1e-12L));
CALL_SUBTEST_10(testLogThenExp(Matrix3d(), 1e-13));
CALL_SUBTEST_11(testLogThenExp(Matrix3f(), 1e-4f));
CALL_SUBTEST_12(testLogThenExp(Matrix3e(), 1e-13L));

View File

@@ -0,0 +1,2 @@
#define EIGEN_MKL_DEFAULT 1
#include "fft_test_shared.h"

View File

@@ -0,0 +1,2 @@
#define EIGEN_POCKETFFT_DEFAULT 1
#include "fft_test_shared.h"

View File

@@ -179,29 +179,29 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
}
template<typename _Scalar, int _Deg>
template<typename Scalar_, int Deg_>
void polynomialsolver(int deg)
{
typedef typename NumTraits<_Scalar>::Real RealScalar;
typedef internal::increment_if_fixed_size<_Deg> Dim;
typedef Matrix<_Scalar,Dim::ret,1> PolynomialType;
typedef Matrix<_Scalar,_Deg,1> EvalRootsType;
typedef Matrix<RealScalar,_Deg,1> RealRootsType;
typedef typename NumTraits<Scalar_>::Real RealScalar;
typedef internal::increment_if_fixed_size<Deg_> Dim;
typedef Matrix<Scalar_,Dim::ret,1> PolynomialType;
typedef Matrix<Scalar_,Deg_,1> EvalRootsType;
typedef Matrix<RealScalar,Deg_,1> RealRootsType;
cout << "Standard cases" << endl;
PolynomialType pols = PolynomialType::Random(deg+1);
evalSolver<_Deg,PolynomialType>( pols );
evalSolver<Deg_,PolynomialType>( pols );
cout << "Hard cases" << endl;
_Scalar multipleRoot = internal::random<_Scalar>();
Scalar_ multipleRoot = internal::random<Scalar_>();
EvalRootsType allRoots = EvalRootsType::Constant(deg,multipleRoot);
roots_to_monicPolynomial( allRoots, pols );
evalSolver<_Deg,PolynomialType>( pols );
evalSolver<Deg_,PolynomialType>( pols );
cout << "Test sugar" << endl;
RealRootsType realRoots = RealRootsType::Random(deg);
roots_to_monicPolynomial( realRoots, pols );
evalSolverSugarFunction<_Deg>(
evalSolverSugarFunction<Deg_>(
pols,
realRoots.template cast <std::complex<RealScalar> >().eval(),
realRoots );

View File

@@ -25,12 +25,12 @@ struct increment_if_fixed_size
}
}
template<typename _Scalar, int _Deg>
template<typename Scalar_, int Deg_>
void realRoots_to_monicPolynomial_test(int deg)
{
typedef internal::increment_if_fixed_size<_Deg> Dim;
typedef Matrix<_Scalar,Dim::ret,1> PolynomialType;
typedef Matrix<_Scalar,_Deg,1> EvalRootsType;
typedef internal::increment_if_fixed_size<Deg_> Dim;
typedef Matrix<Scalar_,Dim::ret,1> PolynomialType;
typedef Matrix<Scalar_,Deg_,1> EvalRootsType;
PolynomialType pols(deg+1);
EvalRootsType roots = EvalRootsType::Random(deg);
@@ -40,43 +40,43 @@ void realRoots_to_monicPolynomial_test(int deg)
for( int i=0; i<roots.size(); ++i ){
evr[i] = std::abs( poly_eval( pols, roots[i] ) ); }
bool evalToZero = evr.isZero( test_precision<_Scalar>() );
bool evalToZero = evr.isZero( test_precision<Scalar_>() );
if( !evalToZero ){
cerr << evr.transpose() << endl; }
VERIFY( evalToZero );
}
template<typename _Scalar> void realRoots_to_monicPolynomial_scalar()
template<typename Scalar_> void realRoots_to_monicPolynomial_scalar()
{
CALL_SUBTEST_2( (realRoots_to_monicPolynomial_test<_Scalar,2>(2)) );
CALL_SUBTEST_3( (realRoots_to_monicPolynomial_test<_Scalar,3>(3)) );
CALL_SUBTEST_4( (realRoots_to_monicPolynomial_test<_Scalar,4>(4)) );
CALL_SUBTEST_5( (realRoots_to_monicPolynomial_test<_Scalar,5>(5)) );
CALL_SUBTEST_6( (realRoots_to_monicPolynomial_test<_Scalar,6>(6)) );
CALL_SUBTEST_7( (realRoots_to_monicPolynomial_test<_Scalar,7>(7)) );
CALL_SUBTEST_8( (realRoots_to_monicPolynomial_test<_Scalar,17>(17)) );
CALL_SUBTEST_2( (realRoots_to_monicPolynomial_test<Scalar_,2>(2)) );
CALL_SUBTEST_3( (realRoots_to_monicPolynomial_test<Scalar_,3>(3)) );
CALL_SUBTEST_4( (realRoots_to_monicPolynomial_test<Scalar_,4>(4)) );
CALL_SUBTEST_5( (realRoots_to_monicPolynomial_test<Scalar_,5>(5)) );
CALL_SUBTEST_6( (realRoots_to_monicPolynomial_test<Scalar_,6>(6)) );
CALL_SUBTEST_7( (realRoots_to_monicPolynomial_test<Scalar_,7>(7)) );
CALL_SUBTEST_8( (realRoots_to_monicPolynomial_test<Scalar_,17>(17)) );
CALL_SUBTEST_9( (realRoots_to_monicPolynomial_test<_Scalar,Dynamic>(
CALL_SUBTEST_9( (realRoots_to_monicPolynomial_test<Scalar_,Dynamic>(
internal::random<int>(18,26) )) );
}
template<typename _Scalar, int _Deg>
template<typename Scalar_, int Deg_>
void CauchyBounds(int deg)
{
typedef internal::increment_if_fixed_size<_Deg> Dim;
typedef Matrix<_Scalar,Dim::ret,1> PolynomialType;
typedef Matrix<_Scalar,_Deg,1> EvalRootsType;
typedef internal::increment_if_fixed_size<Deg_> Dim;
typedef Matrix<Scalar_,Dim::ret,1> PolynomialType;
typedef Matrix<Scalar_,Deg_,1> EvalRootsType;
PolynomialType pols(deg+1);
EvalRootsType roots = EvalRootsType::Random(deg);
roots_to_monicPolynomial( roots, pols );
_Scalar M = cauchy_max_bound( pols );
_Scalar m = cauchy_min_bound( pols );
_Scalar Max = roots.array().abs().maxCoeff();
_Scalar min = roots.array().abs().minCoeff();
Scalar_ M = cauchy_max_bound( pols );
Scalar_ m = cauchy_min_bound( pols );
Scalar_ Max = roots.array().abs().maxCoeff();
Scalar_ min = roots.array().abs().minCoeff();
bool eval = (M >= Max) && (m <= min);
if( !eval )
{
@@ -87,17 +87,17 @@ void CauchyBounds(int deg)
VERIFY( eval );
}
template<typename _Scalar> void CauchyBounds_scalar()
template<typename Scalar_> void CauchyBounds_scalar()
{
CALL_SUBTEST_2( (CauchyBounds<_Scalar,2>(2)) );
CALL_SUBTEST_3( (CauchyBounds<_Scalar,3>(3)) );
CALL_SUBTEST_4( (CauchyBounds<_Scalar,4>(4)) );
CALL_SUBTEST_5( (CauchyBounds<_Scalar,5>(5)) );
CALL_SUBTEST_6( (CauchyBounds<_Scalar,6>(6)) );
CALL_SUBTEST_7( (CauchyBounds<_Scalar,7>(7)) );
CALL_SUBTEST_8( (CauchyBounds<_Scalar,17>(17)) );
CALL_SUBTEST_2( (CauchyBounds<Scalar_,2>(2)) );
CALL_SUBTEST_3( (CauchyBounds<Scalar_,3>(3)) );
CALL_SUBTEST_4( (CauchyBounds<Scalar_,4>(4)) );
CALL_SUBTEST_5( (CauchyBounds<Scalar_,5>(5)) );
CALL_SUBTEST_6( (CauchyBounds<Scalar_,6>(6)) );
CALL_SUBTEST_7( (CauchyBounds<Scalar_,7>(7)) );
CALL_SUBTEST_8( (CauchyBounds<Scalar_,17>(17)) );
CALL_SUBTEST_9( (CauchyBounds<_Scalar,Dynamic>(
CALL_SUBTEST_9( (CauchyBounds<Scalar_,Dynamic>(
internal::random<int>(18,26) )) );
}

View File

@@ -7,32 +7,8 @@
// Public License v. 2.0. If a copy of the MPL was not distributed
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
// import basic and product tests for deprecated DynamicSparseMatrix
#if 0 // sparse_basic(DynamicSparseMatrix) does not compile at all -> disabled
static long g_realloc_count = 0;
#define EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN g_realloc_count++;
static long g_dense_op_sparse_count = 0;
#define EIGEN_SPARSE_ASSIGNMENT_FROM_DENSE_OP_SPARSE_PLUGIN g_dense_op_sparse_count++;
#define EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_ADD_DENSE_PLUGIN g_dense_op_sparse_count+=10;
#define EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_SUB_DENSE_PLUGIN g_dense_op_sparse_count+=20;
#define EIGEN_SPARSE_TEST_INCLUDED_FROM_SPARSE_EXTRA 1
#endif
#define EIGEN_NO_DEPRECATED_WARNING
// Disable counting of temporaries, since sparse_product(DynamicSparseMatrix)
// has an extra copy-assignment.
#define EIGEN_SPARSE_PRODUCT_IGNORE_TEMPORARY_COUNT
#include "sparse_product.cpp"
#if 0 // sparse_basic(DynamicSparseMatrix) does not compile at all -> disabled
#include "sparse_basic.cpp"
#endif
#if EIGEN_HAS_CXX11
#ifdef min
#undef min
#endif
@@ -41,12 +17,6 @@ static long g_dense_op_sparse_count = 0;
#undef max
#endif
#include <unordered_map>
#define EIGEN_UNORDERED_MAP_SUPPORT
#endif
#include <Eigen/SparseExtra>
template<typename SetterType,typename DenseType, typename Scalar, int Options>
@@ -67,21 +37,6 @@ bool test_random_setter(SparseMatrix<Scalar,Options>& sm, const DenseType& ref,
return sm.isApprox(ref);
}
template<typename SetterType,typename DenseType, typename T>
bool test_random_setter(DynamicSparseMatrix<T>& sm, const DenseType& ref, const std::vector<Vector2i>& nonzeroCoords)
{
sm.setZero();
std::vector<Vector2i> remaining = nonzeroCoords;
while(!remaining.empty())
{
int i = internal::random<int>(0,static_cast<int>(remaining.size())-1);
sm.coeffRef(remaining[i].x(),remaining[i].y()) = ref.coeff(remaining[i].x(),remaining[i].y());
remaining[i] = remaining.back();
remaining.pop_back();
}
return sm.isApprox(ref);
}
template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& ref)
{
const Index rows = ref.rows();
@@ -136,9 +91,7 @@ template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& re
// VERIFY_IS_APPROX(m, refMat);
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdMapTraits> >(m,refMat,nonzeroCoords) ));
#ifdef EIGEN_UNORDERED_MAP_SUPPORT
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdUnorderedMapTraits> >(m,refMat,nonzeroCoords) ));
#endif
#ifdef EIGEN_GOOGLEHASH_SUPPORT
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleDenseHashMapTraits> >(m,refMat,nonzeroCoords) ));
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleSparseHashMapTraits> >(m,refMat,nonzeroCoords) ));
@@ -187,6 +140,72 @@ void check_marketio_vector()
VERIFY_IS_EQUAL(v1,v2);
}
template<typename DenseMatrixType>
void check_marketio_dense()
{
Index rows=DenseMatrixType::MaxRowsAtCompileTime;
if (DenseMatrixType::MaxRowsAtCompileTime==Dynamic){
rows=internal::random<Index>(1,100);
}else if(DenseMatrixType::RowsAtCompileTime==Dynamic){
rows=internal::random<Index>(1,DenseMatrixType::MaxRowsAtCompileTime);
}
Index cols =DenseMatrixType::MaxColsAtCompileTime;
if (DenseMatrixType::MaxColsAtCompileTime==Dynamic){
cols=internal::random<Index>(1,100);
}else if(DenseMatrixType::ColsAtCompileTime==Dynamic){
cols=internal::random<Index>(1,DenseMatrixType::MaxColsAtCompileTime);
}
DenseMatrixType m1, m2;
m1= DenseMatrixType::Random(rows,cols);
saveMarketDense(m1, "dense_extra.mtx");
loadMarketDense(m2, "dense_extra.mtx");
VERIFY_IS_EQUAL(m1,m2);
}
template <typename Scalar>
void check_sparse_inverse() {
typedef SparseMatrix<Scalar> MatrixType;
Matrix<Scalar, -1, -1> A;
A.resize(1000, 1000);
A.fill(0);
A.setIdentity();
A.col(0).array() += 1;
A.row(0).array() += 2;
A.col(2).array() += 3;
A.row(7).array() += 3;
A.col(9).array() += 3;
A.block(3, 4, 4, 2).array() += 9;
A.middleRows(10, 50).array() += 3;
A.middleCols(50, 50).array() += 40;
A.block(500, 300, 40, 20).array() += 10;
A.transposeInPlace();
Eigen::SparseLU<MatrixType> slu;
slu.compute(A.sparseView());
Matrix<Scalar, -1, -1> Id(A.rows(), A.cols());
Id.setIdentity();
Matrix<Scalar, -1, -1> inv = slu.solve(Id);
const MatrixType sparseInv = Eigen::SparseInverse<Scalar>().compute(A.sparseView()).inverse();
Scalar sumdiff = 0; // Check the diff only of the non-zero elements
for (Eigen::Index j = 0; j < A.cols(); j++) {
for (typename MatrixType::InnerIterator iter(sparseInv, j); iter; ++iter) {
const Scalar diff = std::abs(inv(iter.row(), iter.col()) - iter.value());
VERIFY_IS_APPROX_OR_LESS_THAN(diff, 1e-11);
if (iter.value() != 0) {
sumdiff += diff;
}
}
}
VERIFY_IS_APPROX_OR_LESS_THAN(sumdiff, 1e-10);
}
EIGEN_DECLARE_TEST(sparse_extra)
{
for(int i = 0; i < g_repeat; i++) {
@@ -195,22 +214,24 @@ EIGEN_DECLARE_TEST(sparse_extra)
CALL_SUBTEST_2( sparse_extra(SparseMatrix<std::complex<double> >(s, s)) );
CALL_SUBTEST_1( sparse_extra(SparseMatrix<double>(s, s)) );
CALL_SUBTEST_3( sparse_extra(DynamicSparseMatrix<double>(s, s)) );
// CALL_SUBTEST_3(( sparse_basic(DynamicSparseMatrix<double>(s, s)) ));
// CALL_SUBTEST_3(( sparse_basic(DynamicSparseMatrix<double,ColMajor,long int>(s, s)) ));
CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, ColMajor> >()) );
CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, RowMajor> >()) );
CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,int> >()) );
CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,int> >()) );
CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) );
CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) );
CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) );
CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) );
CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) );
CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) );
CALL_SUBTEST_3( (check_marketio<SparseMatrix<float,ColMajor,int> >()) );
CALL_SUBTEST_3( (check_marketio<SparseMatrix<double,ColMajor,int> >()) );
CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) );
CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) );
CALL_SUBTEST_3( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) );
CALL_SUBTEST_3( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) );
CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) );
CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) );
CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,Dynamic> >()) );
CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,Dynamic,RowMajor> >()) );
CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,Dynamic,Dynamic> >()) );
CALL_SUBTEST_4( (check_marketio_dense<Matrix<std::complex<float>,Dynamic,Dynamic> >()) );
CALL_SUBTEST_4( (check_marketio_dense<Matrix<std::complex<double>,Dynamic,Dynamic> >()) );
CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,3> >()) );
CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,3,Dynamic> >()) );
CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,3,4> >()) );
CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,Dynamic,Dynamic,ColMajor,5,5> >()) );
CALL_SUBTEST_5( (check_marketio_vector<Matrix<float,1,Dynamic> >()) );
CALL_SUBTEST_5( (check_marketio_vector<Matrix<double,1,Dynamic> >()) );
@@ -221,6 +242,8 @@ EIGEN_DECLARE_TEST(sparse_extra)
CALL_SUBTEST_5( (check_marketio_vector<Matrix<std::complex<float>,Dynamic,1> >()) );
CALL_SUBTEST_5( (check_marketio_vector<Matrix<std::complex<double>,Dynamic,1> >()) );
CALL_SUBTEST_6((check_sparse_inverse<double>()));
TEST_SET_BUT_UNUSED_VARIABLE(s);
}
}

View File

@@ -191,10 +191,10 @@ template<typename ArrayType> void array_special_functions()
// Check the zeta function against scipy.special.zeta
{
ArrayType x(10), q(10), res(10), ref(10);
x << 1.5, 4, 10.5, 10000.5, 3, 1, 0.9, 2, 3, 4;
q << 2, 1.5, 3, 1.0001, -2.5, 1.2345, 1.2345, -1, -2, -3;
ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan, plusinf, nan, plusinf;
ArrayType x(11), q(11), res(11), ref(11);
x << 1.5, 4, 10.5, 10000.5, 3, 1, 0.9, 2, 3, 4, 2000;
q << 2, 1.5, 3, 1.0001, -2.5, 1.2345, 1.2345, -1, -2, -3, 2000;
ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan, plusinf, nan, plusinf, 0;
CALL_SUBTEST( verify_component_wise(ref, ref); );
CALL_SUBTEST( res = x.zeta(q); verify_component_wise(res, ref); );
CALL_SUBTEST( res = zeta(x,q); verify_component_wise(res, ref); );

View File

@@ -114,7 +114,7 @@ template<typename Scalar,typename Packet> void packetmath_real()
Scalar(std::pow(Scalar(10), internal::random<Scalar>(Scalar(-1),Scalar(2))));
}
#if EIGEN_HAS_C99_MATH && (EIGEN_COMP_CXXVER >= 11)
#if EIGEN_HAS_C99_MATH
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLGamma, std::lgamma, internal::plgamma);
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErf, std::erf, internal::perf);
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErfc, std::erfc, internal::perfc);