ADD: added other eigen lib
This commit is contained in:
@@ -26,11 +26,7 @@ find_package(Adolc)
|
||||
if(Adolc_FOUND)
|
||||
include_directories(${ADOLC_INCLUDES})
|
||||
ei_add_property(EIGEN_TESTED_BACKENDS "Adolc, ")
|
||||
if(EIGEN_TEST_CXX11)
|
||||
ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES})
|
||||
else()
|
||||
message(STATUS "Adolc found, but tests require C++11 mode")
|
||||
endif()
|
||||
ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES})
|
||||
else()
|
||||
ei_add_property(EIGEN_MISSING_BACKENDS "Adolc, ")
|
||||
endif()
|
||||
@@ -56,14 +52,16 @@ ei_add_test(FFT)
|
||||
ei_add_test(EulerAngles)
|
||||
|
||||
find_package(MPREAL)
|
||||
if(MPREAL_FOUND AND EIGEN_COMPILER_SUPPORT_CPP11)
|
||||
if(MPREAL_FOUND)
|
||||
ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ")
|
||||
include_directories(${MPREAL_INCLUDES})
|
||||
ei_add_test(mpreal_support "-std=c++11" "${MPREAL_LIBRARIES}" )
|
||||
ei_add_test(mpreal_support "" "${MPREAL_LIBRARIES}" )
|
||||
else()
|
||||
ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ")
|
||||
endif()
|
||||
|
||||
ei_add_test(NNLS)
|
||||
|
||||
ei_add_test(sparse_extra "" "")
|
||||
|
||||
find_package(FFTW)
|
||||
@@ -79,6 +77,17 @@ else()
|
||||
ei_add_property(EIGEN_MISSING_BACKENDS "fftw, ")
|
||||
endif()
|
||||
|
||||
find_path(POCKETFFT pocketfft_hdronly.h)
|
||||
if(POCKETFFT)
|
||||
if(EIGEN_TEST_CXX11)
|
||||
ei_add_property(EIGEN_TESTED_BACKENDS "pocketfft, ")
|
||||
include_directories( ${POCKETFFT} )
|
||||
ei_add_test(pocketfft "-pthread" "${CMAKE_THREAD_LIBS_INIT}" "-DEIGEN_POCKETFFT_DEFAULT" )
|
||||
endif()
|
||||
else()
|
||||
ei_add_property(EIGEN_MISSING_BACKENDS "pocketfft, ")
|
||||
endif()
|
||||
|
||||
option(EIGEN_TEST_OPENGL "Enable OpenGL support in unit tests" OFF)
|
||||
if(EIGEN_TEST_OPENGL)
|
||||
find_package(OpenGL)
|
||||
@@ -103,229 +112,222 @@ ei_add_test(gmres)
|
||||
ei_add_test(dgmres)
|
||||
ei_add_test(minres)
|
||||
ei_add_test(idrs)
|
||||
ei_add_test(bicgstabl)
|
||||
ei_add_test(idrstabl)
|
||||
ei_add_test(levenberg_marquardt)
|
||||
ei_add_test(kronecker_product)
|
||||
ei_add_test(bessel_functions)
|
||||
ei_add_test(special_functions)
|
||||
ei_add_test(special_packetmath "-DEIGEN_FAST_MATH=1")
|
||||
|
||||
if(EIGEN_TEST_CXX11)
|
||||
if(EIGEN_TEST_SYCL)
|
||||
set(EIGEN_SYCL ON)
|
||||
# Forward CMake options as preprocessor definitions
|
||||
if(EIGEN_SYCL_USE_DEFAULT_SELECTOR)
|
||||
add_definitions(-DEIGEN_SYCL_USE_DEFAULT_SELECTOR=${EIGEN_SYCL_USE_DEFAULT_SELECTOR})
|
||||
endif()
|
||||
if(EIGEN_SYCL_NO_LOCAL_MEM)
|
||||
add_definitions(-DEIGEN_SYCL_NO_LOCAL_MEM=${EIGEN_SYCL_NO_LOCAL_MEM})
|
||||
endif()
|
||||
if(EIGEN_SYCL_LOCAL_MEM)
|
||||
add_definitions(-DEIGEN_SYCL_LOCAL_MEM=${EIGEN_SYCL_LOCAL_MEM})
|
||||
endif()
|
||||
if(EIGEN_SYCL_MAX_GLOBAL_RANGE)
|
||||
add_definitions(-DEIGEN_SYCL_MAX_GLOBAL_RANGE=${EIGEN_SYCL_MAX_GLOBAL_RANGE})
|
||||
endif()
|
||||
if(EIGEN_SYCL_LOCAL_THREAD_DIM0)
|
||||
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM0=${EIGEN_SYCL_LOCAL_THREAD_DIM0})
|
||||
endif()
|
||||
if(EIGEN_SYCL_LOCAL_THREAD_DIM1)
|
||||
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM1=${EIGEN_SYCL_LOCAL_THREAD_DIM1})
|
||||
endif()
|
||||
if(EIGEN_SYCL_REG_M)
|
||||
add_definitions(-DEIGEN_SYCL_REG_M=${EIGEN_SYCL_REG_M})
|
||||
endif()
|
||||
if(EIGEN_SYCL_REG_N)
|
||||
add_definitions(-DEIGEN_SYCL_REG_N=${EIGEN_SYCL_REG_N})
|
||||
endif()
|
||||
if(EIGEN_SYCL_USE_PROGRAM_CLASS)
|
||||
add_definitions(-DEIGEN_SYCL_USE_PROGRAM_CLASS=${EIGEN_SYCL_USE_PROGRAM_CLASS})
|
||||
endif()
|
||||
if(EIGEN_SYCL_ASYNC_EXECUTION)
|
||||
add_definitions(-DEIGEN_SYCL_ASYNC_EXECUTION=${EIGEN_SYCL_ASYNC_EXECUTION})
|
||||
endif()
|
||||
if(EIGEN_SYCL_DISABLE_SKINNY)
|
||||
add_definitions(-DEIGEN_SYCL_DISABLE_SKINNY=${EIGEN_SYCL_DISABLE_SKINNY})
|
||||
endif()
|
||||
if(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER)
|
||||
if(EIGEN_TEST_SYCL)
|
||||
set(EIGEN_SYCL ON)
|
||||
# Forward CMake options as preprocessor definitions
|
||||
if(EIGEN_SYCL_USE_DEFAULT_SELECTOR)
|
||||
add_definitions(-DEIGEN_SYCL_USE_DEFAULT_SELECTOR=${EIGEN_SYCL_USE_DEFAULT_SELECTOR})
|
||||
endif()
|
||||
if(EIGEN_SYCL_NO_LOCAL_MEM)
|
||||
add_definitions(-DEIGEN_SYCL_NO_LOCAL_MEM=${EIGEN_SYCL_NO_LOCAL_MEM})
|
||||
endif()
|
||||
if(EIGEN_SYCL_LOCAL_MEM)
|
||||
add_definitions(-DEIGEN_SYCL_LOCAL_MEM=${EIGEN_SYCL_LOCAL_MEM})
|
||||
endif()
|
||||
if(EIGEN_SYCL_MAX_GLOBAL_RANGE)
|
||||
add_definitions(-DEIGEN_SYCL_MAX_GLOBAL_RANGE=${EIGEN_SYCL_MAX_GLOBAL_RANGE})
|
||||
endif()
|
||||
if(EIGEN_SYCL_LOCAL_THREAD_DIM0)
|
||||
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM0=${EIGEN_SYCL_LOCAL_THREAD_DIM0})
|
||||
endif()
|
||||
if(EIGEN_SYCL_LOCAL_THREAD_DIM1)
|
||||
add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM1=${EIGEN_SYCL_LOCAL_THREAD_DIM1})
|
||||
endif()
|
||||
if(EIGEN_SYCL_REG_M)
|
||||
add_definitions(-DEIGEN_SYCL_REG_M=${EIGEN_SYCL_REG_M})
|
||||
endif()
|
||||
if(EIGEN_SYCL_REG_N)
|
||||
add_definitions(-DEIGEN_SYCL_REG_N=${EIGEN_SYCL_REG_N})
|
||||
endif()
|
||||
if(EIGEN_SYCL_USE_PROGRAM_CLASS)
|
||||
add_definitions(-DEIGEN_SYCL_USE_PROGRAM_CLASS=${EIGEN_SYCL_USE_PROGRAM_CLASS})
|
||||
endif()
|
||||
if(EIGEN_SYCL_ASYNC_EXECUTION)
|
||||
add_definitions(-DEIGEN_SYCL_ASYNC_EXECUTION=${EIGEN_SYCL_ASYNC_EXECUTION})
|
||||
endif()
|
||||
if(EIGEN_SYCL_DISABLE_SKINNY)
|
||||
add_definitions(-DEIGEN_SYCL_DISABLE_SKINNY=${EIGEN_SYCL_DISABLE_SKINNY})
|
||||
endif()
|
||||
if(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER)
|
||||
add_definitions(-DEIGEN_SYCL_DISABLE_DOUBLE_BUFFER=${EIGEN_SYCL_DISABLE_DOUBLE_BUFFER})
|
||||
endif()
|
||||
if(EIGEN_SYCL_DISABLE_RANK1)
|
||||
add_definitions(-DEIGEN_SYCL_DISABLE_RANK1=${EIGEN_SYCL_DISABLE_RANK1})
|
||||
endif()
|
||||
if(EIGEN_SYCL_DISABLE_SCALAR)
|
||||
add_definitions(-DEIGEN_SYCL_DISABLE_SCALAR=${EIGEN_SYCL_DISABLE_SCALAR})
|
||||
endif()
|
||||
if(EIGEN_SYCL_DISABLE_GEMV)
|
||||
add_definitions(-DEIGEN_SYCL_DISABLE_GEMV=${EIGEN_SYCL_DISABLE_GEMV})
|
||||
endif()
|
||||
if(EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION)
|
||||
add_definitions(-DEIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=${EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION})
|
||||
endif()
|
||||
|
||||
if(EIGEN_SYCL_TRISYCL)
|
||||
# triSYCL now requires c++17.
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
else()
|
||||
if(MSVC)
|
||||
# Set the host and device compilers C++ standard to C++14. On Windows setting this to C++11
|
||||
# can cause issues with the ComputeCpp device compiler parsing Visual Studio Headers.
|
||||
set(CMAKE_CXX_STANDARD 14)
|
||||
list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
|
||||
else()
|
||||
set(CMAKE_CXX_STANDARD 11)
|
||||
list(APPEND COMPUTECPP_USER_FLAGS -Wall)
|
||||
endif()
|
||||
# The following flags are not supported by Clang and can cause warnings
|
||||
# if used with -Werror so they are removed here.
|
||||
if(COMPUTECPP_USE_COMPILER_DRIVER)
|
||||
set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
|
||||
string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
endif()
|
||||
list(APPEND COMPUTECPP_USER_FLAGS
|
||||
-DEIGEN_NO_ASSERTION_CHECKING=1
|
||||
-no-serial-memop
|
||||
-Xclang
|
||||
-cl-mad-enable)
|
||||
endif()
|
||||
|
||||
ei_add_test(cxx11_tensor_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_image_op_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_math_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_forced_eval_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_broadcast_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_device_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_reduction_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_morphing_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_shuffling_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_padding_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_builtins_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_contract_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_concatenation_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_reverse_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_convolution_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_striding_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_chipping_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_layout_swap_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_inflation_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_random_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_generator_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_patch_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_image_patch_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_volume_patch_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_argmax_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_custom_op_sycl ${STD_CXX_FLAG})
|
||||
ei_add_test(cxx11_tensor_scan_sycl ${STD_CXX_FLAG})
|
||||
set(EIGEN_SYCL OFF)
|
||||
if(EIGEN_SYCL_DISABLE_RANK1)
|
||||
add_definitions(-DEIGEN_SYCL_DISABLE_RANK1=${EIGEN_SYCL_DISABLE_RANK1})
|
||||
endif()
|
||||
if(EIGEN_SYCL_DISABLE_SCALAR)
|
||||
add_definitions(-DEIGEN_SYCL_DISABLE_SCALAR=${EIGEN_SYCL_DISABLE_SCALAR})
|
||||
endif()
|
||||
if(EIGEN_SYCL_DISABLE_GEMV)
|
||||
add_definitions(-DEIGEN_SYCL_DISABLE_GEMV=${EIGEN_SYCL_DISABLE_GEMV})
|
||||
endif()
|
||||
if(EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION)
|
||||
add_definitions(-DEIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=${EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION})
|
||||
endif()
|
||||
|
||||
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
if(EIGEN_SYCL_TRISYCL)
|
||||
# triSYCL now requires c++17.
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
else()
|
||||
if(MSVC)
|
||||
list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
|
||||
else()
|
||||
list(APPEND COMPUTECPP_USER_FLAGS -Wall)
|
||||
endif()
|
||||
# The following flags are not supported by Clang and can cause warnings
|
||||
# if used with -Werror so they are removed here.
|
||||
if(COMPUTECPP_USE_COMPILER_DRIVER)
|
||||
set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
|
||||
string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
|
||||
endif()
|
||||
list(APPEND COMPUTECPP_USER_FLAGS
|
||||
-DEIGEN_NO_ASSERTION_CHECKING=1
|
||||
-no-serial-memop
|
||||
-Xclang
|
||||
-cl-mad-enable)
|
||||
endif()
|
||||
|
||||
ei_add_test(cxx11_meta)
|
||||
ei_add_test(cxx11_maxsizevector)
|
||||
ei_add_test(cxx11_tensor_argmax)
|
||||
ei_add_test(cxx11_tensor_assign)
|
||||
ei_add_test(cxx11_tensor_block_access)
|
||||
ei_add_test(cxx11_tensor_block_eval)
|
||||
ei_add_test(cxx11_tensor_block_io)
|
||||
ei_add_test(cxx11_tensor_broadcasting)
|
||||
ei_add_test(cxx11_tensor_casts)
|
||||
ei_add_test(cxx11_tensor_chipping)
|
||||
ei_add_test(cxx11_tensor_comparisons)
|
||||
ei_add_test(cxx11_tensor_concatenation)
|
||||
ei_add_test(cxx11_tensor_const)
|
||||
ei_add_test(cxx11_tensor_contraction)
|
||||
ei_add_test(cxx11_tensor_convolution)
|
||||
ei_add_test(cxx11_tensor_custom_index)
|
||||
ei_add_test(cxx11_tensor_custom_op)
|
||||
ei_add_test(cxx11_tensor_dimension)
|
||||
ei_add_test(cxx11_tensor_empty)
|
||||
ei_add_test(cxx11_tensor_executor "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_tensor_expr)
|
||||
ei_add_test(cxx11_tensor_fft)
|
||||
ei_add_test(cxx11_tensor_fixed_size)
|
||||
ei_add_test(cxx11_tensor_forced_eval)
|
||||
ei_add_test(cxx11_tensor_generator)
|
||||
ei_add_test(cxx11_tensor_ifft)
|
||||
ei_add_test(cxx11_tensor_image_patch)
|
||||
ei_add_test(cxx11_tensor_index_list)
|
||||
ei_add_test(cxx11_tensor_inflation)
|
||||
ei_add_test(cxx11_tensor_intdiv)
|
||||
ei_add_test(cxx11_tensor_io)
|
||||
ei_add_test(cxx11_tensor_layout_swap)
|
||||
ei_add_test(cxx11_tensor_lvalue)
|
||||
ei_add_test(cxx11_tensor_map)
|
||||
ei_add_test(cxx11_tensor_math)
|
||||
ei_add_test(cxx11_tensor_mixed_indices)
|
||||
ei_add_test(cxx11_tensor_morphing)
|
||||
ei_add_test(cxx11_tensor_move)
|
||||
ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_tensor_of_complex)
|
||||
ei_add_test(cxx11_tensor_of_const_values)
|
||||
ei_add_test(cxx11_tensor_of_strings)
|
||||
ei_add_test(cxx11_tensor_padding)
|
||||
ei_add_test(cxx11_tensor_patch)
|
||||
ei_add_test(cxx11_tensor_random)
|
||||
ei_add_test(cxx11_tensor_reduction)
|
||||
ei_add_test(cxx11_tensor_ref)
|
||||
ei_add_test(cxx11_tensor_roundings)
|
||||
ei_add_test(cxx11_tensor_scan)
|
||||
ei_add_test(cxx11_tensor_shuffling)
|
||||
ei_add_test(cxx11_tensor_simple)
|
||||
ei_add_test(cxx11_tensor_striding)
|
||||
ei_add_test(cxx11_tensor_sugar)
|
||||
ei_add_test(cxx11_tensor_thread_local "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_tensor_trace)
|
||||
ei_add_test(cxx11_tensor_volume_patch)
|
||||
ei_add_test(cxx11_tensor_sycl)
|
||||
ei_add_test(cxx11_tensor_image_op_sycl)
|
||||
ei_add_test(cxx11_tensor_math_sycl)
|
||||
ei_add_test(cxx11_tensor_forced_eval_sycl)
|
||||
ei_add_test(cxx11_tensor_broadcast_sycl)
|
||||
ei_add_test(cxx11_tensor_device_sycl)
|
||||
ei_add_test(cxx11_tensor_reduction_sycl)
|
||||
ei_add_test(cxx11_tensor_morphing_sycl)
|
||||
ei_add_test(cxx11_tensor_shuffling_sycl)
|
||||
ei_add_test(cxx11_tensor_padding_sycl)
|
||||
ei_add_test(cxx11_tensor_builtins_sycl)
|
||||
ei_add_test(cxx11_tensor_contract_sycl)
|
||||
ei_add_test(cxx11_tensor_concatenation_sycl)
|
||||
ei_add_test(cxx11_tensor_reverse_sycl)
|
||||
ei_add_test(cxx11_tensor_convolution_sycl)
|
||||
ei_add_test(cxx11_tensor_striding_sycl)
|
||||
ei_add_test(cxx11_tensor_chipping_sycl)
|
||||
ei_add_test(cxx11_tensor_layout_swap_sycl)
|
||||
ei_add_test(cxx11_tensor_inflation_sycl)
|
||||
ei_add_test(cxx11_tensor_random_sycl)
|
||||
ei_add_test(cxx11_tensor_generator_sycl)
|
||||
ei_add_test(cxx11_tensor_patch_sycl)
|
||||
ei_add_test(cxx11_tensor_image_patch_sycl)
|
||||
ei_add_test(cxx11_tensor_volume_patch_sycl)
|
||||
ei_add_test(cxx11_tensor_argmax_sycl)
|
||||
ei_add_test(cxx11_tensor_custom_op_sycl)
|
||||
ei_add_test(cxx11_tensor_scan_sycl)
|
||||
set(EIGEN_SYCL OFF)
|
||||
endif()
|
||||
|
||||
ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
|
||||
ei_add_test(cxx11_meta)
|
||||
ei_add_test(cxx11_maxsizevector)
|
||||
ei_add_test(cxx11_tensor_argmax)
|
||||
ei_add_test(cxx11_tensor_assign)
|
||||
ei_add_test(cxx11_tensor_block_access)
|
||||
ei_add_test(cxx11_tensor_block_eval)
|
||||
ei_add_test(cxx11_tensor_block_io)
|
||||
ei_add_test(cxx11_tensor_broadcasting)
|
||||
ei_add_test(cxx11_tensor_casts)
|
||||
ei_add_test(cxx11_tensor_chipping)
|
||||
ei_add_test(cxx11_tensor_comparisons)
|
||||
ei_add_test(cxx11_tensor_concatenation)
|
||||
ei_add_test(cxx11_tensor_const)
|
||||
ei_add_test(cxx11_tensor_contraction)
|
||||
ei_add_test(cxx11_tensor_convolution)
|
||||
ei_add_test(cxx11_tensor_custom_index)
|
||||
ei_add_test(cxx11_tensor_custom_op)
|
||||
ei_add_test(cxx11_tensor_dimension)
|
||||
ei_add_test(cxx11_tensor_empty)
|
||||
ei_add_test(cxx11_tensor_executor "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_tensor_expr)
|
||||
ei_add_test(cxx11_tensor_fft)
|
||||
ei_add_test(cxx11_tensor_fixed_size)
|
||||
ei_add_test(cxx11_tensor_forced_eval)
|
||||
ei_add_test(cxx11_tensor_generator)
|
||||
ei_add_test(cxx11_tensor_ifft)
|
||||
ei_add_test(cxx11_tensor_image_patch)
|
||||
ei_add_test(cxx11_tensor_index_list)
|
||||
ei_add_test(cxx11_tensor_inflation)
|
||||
ei_add_test(cxx11_tensor_intdiv)
|
||||
ei_add_test(cxx11_tensor_io)
|
||||
ei_add_test(cxx11_tensor_layout_swap)
|
||||
ei_add_test(cxx11_tensor_lvalue)
|
||||
ei_add_test(cxx11_tensor_map)
|
||||
ei_add_test(cxx11_tensor_math)
|
||||
ei_add_test(cxx11_tensor_mixed_indices)
|
||||
ei_add_test(cxx11_tensor_morphing)
|
||||
ei_add_test(cxx11_tensor_move)
|
||||
ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_tensor_of_complex)
|
||||
ei_add_test(cxx11_tensor_of_const_values)
|
||||
ei_add_test(cxx11_tensor_of_strings)
|
||||
ei_add_test(cxx11_tensor_padding)
|
||||
ei_add_test(cxx11_tensor_patch)
|
||||
ei_add_test(cxx11_tensor_random)
|
||||
ei_add_test(cxx11_tensor_reduction)
|
||||
ei_add_test(cxx11_tensor_ref)
|
||||
ei_add_test(cxx11_tensor_roundings)
|
||||
ei_add_test(cxx11_tensor_scan)
|
||||
ei_add_test(cxx11_tensor_shuffling)
|
||||
ei_add_test(cxx11_tensor_simple)
|
||||
ei_add_test(cxx11_tensor_striding)
|
||||
ei_add_test(cxx11_tensor_sugar)
|
||||
ei_add_test(cxx11_tensor_thread_local "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
|
||||
ei_add_test(cxx11_tensor_trace)
|
||||
ei_add_test(cxx11_tensor_volume_patch)
|
||||
# ei_add_test(cxx11_tensor_symmetry)
|
||||
if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
||||
# This test requires __uint128_t which is only available on 64bit systems
|
||||
ei_add_test(cxx11_tensor_uint128)
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
# These tests needs nvcc
|
||||
find_package(CUDA 7.0)
|
||||
find_package(CUDA 9.0)
|
||||
if(CUDA_FOUND AND EIGEN_TEST_CUDA)
|
||||
# Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
|
||||
# and -fno-check-new flags since they trigger thousands of compilation warnings
|
||||
# in the CUDA runtime
|
||||
# Also remove -ansi that is incompatible with std=c++11.
|
||||
string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
|
||||
|
||||
message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
|
||||
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
|
||||
set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
|
||||
endif()
|
||||
if(EIGEN_TEST_CUDA_CLANG)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}")
|
||||
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
|
||||
string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${ARCH}")
|
||||
endforeach()
|
||||
string(APPEND CMAKE_CXX_FLAGS " ${EIGEN_CUDA_CXX_FLAGS}")
|
||||
else()
|
||||
set(CUDA_PROPAGATE_HOST_FLAGS OFF)
|
||||
set(NVCC_ARCH_FLAGS)
|
||||
# Define an -arch=sm_<arch>, otherwise if GPU does not exactly match one of
|
||||
# those in the arch list for -gencode, the kernels will fail to run with
|
||||
# cudaErrorNoKernelImageForDevice
|
||||
# This can happen with newer cards (e.g. sm_75) and compiling with older
|
||||
# versions of nvcc (e.g. 9.2) that do not support their specific arch.
|
||||
list(LENGTH EIGEN_CUDA_COMPUTE_ARCH EIGEN_CUDA_COMPUTE_ARCH_SIZE)
|
||||
if(EIGEN_CUDA_COMPUTE_ARCH_SIZE)
|
||||
list(GET EIGEN_CUDA_COMPUTE_ARCH 0 EIGEN_CUDA_COMPUTE_DEFAULT)
|
||||
set(NVCC_ARCH_FLAGS " -arch=sm_${EIGEN_CUDA_COMPUTE_DEFAULT}")
|
||||
endif()
|
||||
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
|
||||
string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
|
||||
endforeach()
|
||||
set(CUDA_NVCC_FLAGS "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}")
|
||||
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
|
||||
endif()
|
||||
|
||||
set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr")
|
||||
if (${CUDA_VERSION} STREQUAL "7.0")
|
||||
set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
|
||||
endif()
|
||||
|
||||
set(NVCC_ARCH_FLAGS)
|
||||
foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
|
||||
string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
|
||||
endforeach()
|
||||
set(CUDA_NVCC_FLAGS "${EIGEN_CUDA_RELAXED_CONSTEXPR} -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS}")
|
||||
cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
|
||||
|
||||
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
||||
|
||||
ei_add_test(cxx11_tensor_complex_gpu)
|
||||
@@ -355,7 +357,6 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
|
||||
ei_add_test(cxx11_tensor_random_gpu)
|
||||
endif()
|
||||
|
||||
|
||||
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
|
||||
endif()
|
||||
|
||||
@@ -365,52 +366,46 @@ if (EIGEN_TEST_HIP)
|
||||
set(HIP_PATH "/opt/rocm/hip" CACHE STRING "Path to the HIP installation.")
|
||||
|
||||
if (EXISTS ${HIP_PATH})
|
||||
|
||||
list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake)
|
||||
|
||||
find_package(HIP REQUIRED)
|
||||
if (HIP_FOUND)
|
||||
|
||||
execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM)
|
||||
|
||||
if ((${HIP_PLATFORM} STREQUAL "hcc") OR (${HIP_PLATFORM} STREQUAL "amd"))
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
include_directories(${HIP_PATH}/include)
|
||||
|
||||
include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
||||
include_directories(${HIP_PATH}/include)
|
||||
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
||||
#
|
||||
# complex datatype is not yet supported by HIP
|
||||
# so leaving out those tests for now
|
||||
#
|
||||
# ei_add_test(cxx11_tensor_complex_gpu)
|
||||
# ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
|
||||
#
|
||||
ei_add_test(cxx11_tensor_reduction_gpu)
|
||||
ei_add_test(cxx11_tensor_argmax_gpu)
|
||||
ei_add_test(cxx11_tensor_cast_float16_gpu)
|
||||
ei_add_test(cxx11_tensor_scan_gpu)
|
||||
ei_add_test(cxx11_tensor_device)
|
||||
|
||||
set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")
|
||||
#
|
||||
# complex datatype is not yet supported by HIP
|
||||
# so leaving out those tests for now
|
||||
#
|
||||
# ei_add_test(cxx11_tensor_complex_gpu)
|
||||
# ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
|
||||
#
|
||||
ei_add_test(cxx11_tensor_reduction_gpu)
|
||||
ei_add_test(cxx11_tensor_argmax_gpu)
|
||||
ei_add_test(cxx11_tensor_cast_float16_gpu)
|
||||
ei_add_test(cxx11_tensor_scan_gpu)
|
||||
ei_add_test(cxx11_tensor_device)
|
||||
ei_add_test(cxx11_tensor_gpu)
|
||||
ei_add_test(cxx11_tensor_contract_gpu)
|
||||
ei_add_test(cxx11_tensor_of_float16_gpu)
|
||||
ei_add_test(cxx11_tensor_of_bfloat16_gpu)
|
||||
ei_add_test(cxx11_tensor_random_gpu)
|
||||
|
||||
ei_add_test(cxx11_tensor_gpu)
|
||||
ei_add_test(cxx11_tensor_contract_gpu)
|
||||
ei_add_test(cxx11_tensor_of_float16_gpu)
|
||||
ei_add_test(cxx11_tensor_random_gpu)
|
||||
|
||||
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
|
||||
unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
|
||||
|
||||
elseif ((${HIP_PLATFORM} STREQUAL "nvcc") OR (${HIP_PLATFORM} STREQUAL "nvidia"))
|
||||
message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
|
||||
message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
|
||||
else ()
|
||||
message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
|
||||
message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
else ()
|
||||
|
||||
message(FATAL_ERROR "EIGEN_TEST_HIP is ON, but the specified HIP_PATH (${HIP_PATH}) does not exist")
|
||||
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
#define test_FFTW test_FFT
|
||||
#include "FFTW.cpp"
|
||||
#define EIGEN_FFT_DEFAULT 1
|
||||
#include "fft_test_shared.h"
|
||||
|
||||
@@ -1,262 +1,2 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Mark Borgerding mark a borgerding net
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "main.h"
|
||||
#include <unsupported/Eigen/FFT>
|
||||
|
||||
template <typename T>
|
||||
std::complex<T> RandomCpx() { return std::complex<T>( (T)(rand()/(T)RAND_MAX - .5), (T)(rand()/(T)RAND_MAX - .5) ); }
|
||||
|
||||
using namespace std;
|
||||
using namespace Eigen;
|
||||
|
||||
|
||||
template < typename T>
|
||||
complex<long double> promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); }
|
||||
|
||||
complex<long double> promote(float x) { return complex<long double>((long double)x); }
|
||||
complex<long double> promote(double x) { return complex<long double>((long double)x); }
|
||||
complex<long double> promote(long double x) { return complex<long double>((long double)x); }
|
||||
|
||||
|
||||
template <typename VT1,typename VT2>
|
||||
long double fft_rmse( const VT1 & fftbuf,const VT2 & timebuf)
|
||||
{
|
||||
long double totalpower=0;
|
||||
long double difpower=0;
|
||||
long double pi = acos((long double)-1 );
|
||||
for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) {
|
||||
complex<long double> acc = 0;
|
||||
long double phinc = (long double)(-2.)*k0* pi / timebuf.size();
|
||||
for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
|
||||
acc += promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
|
||||
}
|
||||
totalpower += numext::abs2(acc);
|
||||
complex<long double> x = promote(fftbuf[k0]);
|
||||
complex<long double> dif = acc - x;
|
||||
difpower += numext::abs2(dif);
|
||||
//cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(numext::abs2(dif)) << endl;
|
||||
}
|
||||
cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
|
||||
return sqrt(difpower/totalpower);
|
||||
}
|
||||
|
||||
template <typename VT1,typename VT2>
|
||||
long double dif_rmse( const VT1 buf1,const VT2 buf2)
|
||||
{
|
||||
long double totalpower=0;
|
||||
long double difpower=0;
|
||||
size_t n = (min)( buf1.size(),buf2.size() );
|
||||
for (size_t k=0;k<n;++k) {
|
||||
totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2);
|
||||
difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
|
||||
}
|
||||
return sqrt(difpower/totalpower);
|
||||
}
|
||||
|
||||
enum { StdVectorContainer, EigenVectorContainer };
|
||||
|
||||
template<int Container, typename Scalar> struct VectorType;
|
||||
|
||||
template<typename Scalar> struct VectorType<StdVectorContainer,Scalar>
|
||||
{
|
||||
typedef vector<Scalar> type;
|
||||
};
|
||||
|
||||
template<typename Scalar> struct VectorType<EigenVectorContainer,Scalar>
|
||||
{
|
||||
typedef Matrix<Scalar,Dynamic,1> type;
|
||||
};
|
||||
|
||||
template <int Container, typename T>
|
||||
void test_scalar_generic(int nfft)
|
||||
{
|
||||
typedef typename FFT<T>::Complex Complex;
|
||||
typedef typename FFT<T>::Scalar Scalar;
|
||||
typedef typename VectorType<Container,Scalar>::type ScalarVector;
|
||||
typedef typename VectorType<Container,Complex>::type ComplexVector;
|
||||
|
||||
FFT<T> fft;
|
||||
ScalarVector tbuf(nfft);
|
||||
ComplexVector freqBuf;
|
||||
for (int k=0;k<nfft;++k)
|
||||
tbuf[k]= (T)( rand()/(double)RAND_MAX - .5);
|
||||
|
||||
// make sure it DOESN'T give the right full spectrum answer
|
||||
// if we've asked for half-spectrum
|
||||
fft.SetFlag(fft.HalfSpectrum );
|
||||
fft.fwd( freqBuf,tbuf);
|
||||
VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) );
|
||||
VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
|
||||
|
||||
fft.ClearFlag(fft.HalfSpectrum );
|
||||
fft.fwd( freqBuf,tbuf);
|
||||
VERIFY( (size_t)freqBuf.size() == (size_t)nfft);
|
||||
VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>() );// gross check
|
||||
|
||||
if (nfft&1)
|
||||
return; // odd FFTs get the wrong size inverse FFT
|
||||
|
||||
ScalarVector tbuf2;
|
||||
fft.inv( tbuf2 , freqBuf);
|
||||
VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
|
||||
|
||||
|
||||
// verify that the Unscaled flag takes effect
|
||||
ScalarVector tbuf3;
|
||||
fft.SetFlag(fft.Unscaled);
|
||||
|
||||
fft.inv( tbuf3 , freqBuf);
|
||||
|
||||
for (int k=0;k<nfft;++k)
|
||||
tbuf3[k] *= T(1./nfft);
|
||||
|
||||
|
||||
//for (size_t i=0;i<(size_t) tbuf.size();++i)
|
||||
// cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " - in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) << endl;
|
||||
|
||||
VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>() );// gross check
|
||||
|
||||
// verify that ClearFlag works
|
||||
fft.ClearFlag(fft.Unscaled);
|
||||
fft.inv( tbuf2 , freqBuf);
|
||||
VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>() );// gross check
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_scalar(int nfft)
|
||||
{
|
||||
test_scalar_generic<StdVectorContainer,T>(nfft);
|
||||
//test_scalar_generic<EigenVectorContainer,T>(nfft);
|
||||
}
|
||||
|
||||
|
||||
template <int Container, typename T>
|
||||
void test_complex_generic(int nfft)
|
||||
{
|
||||
typedef typename FFT<T>::Complex Complex;
|
||||
typedef typename VectorType<Container,Complex>::type ComplexVector;
|
||||
|
||||
FFT<T> fft;
|
||||
|
||||
ComplexVector inbuf(nfft);
|
||||
ComplexVector outbuf;
|
||||
ComplexVector buf3;
|
||||
for (int k=0;k<nfft;++k)
|
||||
inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
|
||||
fft.fwd( outbuf , inbuf);
|
||||
|
||||
VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>() );// gross check
|
||||
fft.inv( buf3 , outbuf);
|
||||
|
||||
VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
|
||||
|
||||
// verify that the Unscaled flag takes effect
|
||||
ComplexVector buf4;
|
||||
fft.SetFlag(fft.Unscaled);
|
||||
fft.inv( buf4 , outbuf);
|
||||
for (int k=0;k<nfft;++k)
|
||||
buf4[k] *= T(1./nfft);
|
||||
VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>() );// gross check
|
||||
|
||||
// verify that ClearFlag works
|
||||
fft.ClearFlag(fft.Unscaled);
|
||||
fft.inv( buf3 , outbuf);
|
||||
VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>() );// gross check
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_complex(int nfft)
|
||||
{
|
||||
test_complex_generic<StdVectorContainer,T>(nfft);
|
||||
test_complex_generic<EigenVectorContainer,T>(nfft);
|
||||
}
|
||||
/*
|
||||
template <typename T,int nrows,int ncols>
|
||||
void test_complex2d()
|
||||
{
|
||||
typedef typename Eigen::FFT<T>::Complex Complex;
|
||||
FFT<T> fft;
|
||||
Eigen::Matrix<Complex,nrows,ncols> src,src2,dst,dst2;
|
||||
|
||||
src = Eigen::Matrix<Complex,nrows,ncols>::Random();
|
||||
//src = Eigen::Matrix<Complex,nrows,ncols>::Identity();
|
||||
|
||||
for (int k=0;k<ncols;k++) {
|
||||
Eigen::Matrix<Complex,nrows,1> tmpOut;
|
||||
fft.fwd( tmpOut,src.col(k) );
|
||||
dst2.col(k) = tmpOut;
|
||||
}
|
||||
|
||||
for (int k=0;k<nrows;k++) {
|
||||
Eigen::Matrix<Complex,1,ncols> tmpOut;
|
||||
fft.fwd( tmpOut, dst2.row(k) );
|
||||
dst2.row(k) = tmpOut;
|
||||
}
|
||||
|
||||
fft.fwd2(dst.data(),src.data(),ncols,nrows);
|
||||
fft.inv2(src2.data(),dst.data(),ncols,nrows);
|
||||
VERIFY( (src-src2).norm() < test_precision<T>() );
|
||||
VERIFY( (dst-dst2).norm() < test_precision<T>() );
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
void test_return_by_value(int len)
|
||||
{
|
||||
VectorXf in;
|
||||
VectorXf in1;
|
||||
in.setRandom( len );
|
||||
VectorXcf out1,out2;
|
||||
FFT<float> fft;
|
||||
|
||||
fft.SetFlag(fft.HalfSpectrum );
|
||||
|
||||
fft.fwd(out1,in);
|
||||
out2 = fft.fwd(in);
|
||||
VERIFY( (out1-out2).norm() < test_precision<float>() );
|
||||
in1 = fft.inv(out1);
|
||||
VERIFY( (in1-in).norm() < test_precision<float>() );
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(FFTW)
|
||||
{
|
||||
CALL_SUBTEST( test_return_by_value(32) );
|
||||
//CALL_SUBTEST( ( test_complex2d<float,4,8> () ) ); CALL_SUBTEST( ( test_complex2d<double,4,8> () ) );
|
||||
//CALL_SUBTEST( ( test_complex2d<long double,4,8> () ) );
|
||||
CALL_SUBTEST( test_complex<float>(32) ); CALL_SUBTEST( test_complex<double>(32) );
|
||||
CALL_SUBTEST( test_complex<float>(256) ); CALL_SUBTEST( test_complex<double>(256) );
|
||||
CALL_SUBTEST( test_complex<float>(3*8) ); CALL_SUBTEST( test_complex<double>(3*8) );
|
||||
CALL_SUBTEST( test_complex<float>(5*32) ); CALL_SUBTEST( test_complex<double>(5*32) );
|
||||
CALL_SUBTEST( test_complex<float>(2*3*4) ); CALL_SUBTEST( test_complex<double>(2*3*4) );
|
||||
CALL_SUBTEST( test_complex<float>(2*3*4*5) ); CALL_SUBTEST( test_complex<double>(2*3*4*5) );
|
||||
CALL_SUBTEST( test_complex<float>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<double>(2*3*4*5*7) );
|
||||
|
||||
CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) );
|
||||
CALL_SUBTEST( test_scalar<float>(45) ); CALL_SUBTEST( test_scalar<double>(45) );
|
||||
CALL_SUBTEST( test_scalar<float>(50) ); CALL_SUBTEST( test_scalar<double>(50) );
|
||||
CALL_SUBTEST( test_scalar<float>(256) ); CALL_SUBTEST( test_scalar<double>(256) );
|
||||
CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) );
|
||||
|
||||
#ifdef EIGEN_HAS_FFTWL
|
||||
CALL_SUBTEST( test_complex<long double>(32) );
|
||||
CALL_SUBTEST( test_complex<long double>(256) );
|
||||
CALL_SUBTEST( test_complex<long double>(3*8) );
|
||||
CALL_SUBTEST( test_complex<long double>(5*32) );
|
||||
CALL_SUBTEST( test_complex<long double>(2*3*4) );
|
||||
CALL_SUBTEST( test_complex<long double>(2*3*4*5) );
|
||||
CALL_SUBTEST( test_complex<long double>(2*3*4*5*7) );
|
||||
|
||||
CALL_SUBTEST( test_scalar<long double>(32) );
|
||||
CALL_SUBTEST( test_scalar<long double>(45) );
|
||||
CALL_SUBTEST( test_scalar<long double>(50) );
|
||||
CALL_SUBTEST( test_scalar<long double>(256) );
|
||||
CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
|
||||
#endif
|
||||
}
|
||||
#define EIGEN_FFTW_DEFAULT 1
|
||||
#include "fft_test_shared.h"
|
||||
|
||||
472
libs/eigen/unsupported/test/NNLS.cpp
Normal file
472
libs/eigen/unsupported/test/NNLS.cpp
Normal file
@@ -0,0 +1,472 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) Essex Edwards <essex.edwards@gmail.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#define EIGEN_RUNTIME_NO_MALLOC
|
||||
|
||||
#include "main.h"
|
||||
#include <unsupported/Eigen/NNLS>
|
||||
|
||||
/// Check that 'x' solves the NNLS optimization problem `min ||A*x-b|| s.t. 0 <= x`.
|
||||
/// The \p tolerance parameter is the absolute tolerance on the gradient, A'*(A*x-b).
|
||||
template <typename MatrixType, typename VectorB, typename VectorX, typename Scalar>
|
||||
static void verify_nnls_optimality(const MatrixType &A, const VectorB &b, const VectorX &x, const Scalar tolerance) {
|
||||
// The NNLS optimality conditions are:
|
||||
//
|
||||
// * 0 = A'*A*x - A'*b - lambda
|
||||
// * 0 <= x[i] \forall i
|
||||
// * 0 <= lambda[i] \forall i
|
||||
// * 0 = x[i]*lambda[i] \forall i
|
||||
//
|
||||
// we don't know lambda, but by assuming the first optimality condition is true,
|
||||
// we can derive it and then check the others conditions.
|
||||
const VectorX lambda = A.transpose() * (A * x - b);
|
||||
|
||||
// NNLS solutions are EXACTLY not negative.
|
||||
VERIFY_LE(0, x.minCoeff());
|
||||
|
||||
// Exact lambda would be non-negative, but computed lambda might leak a little
|
||||
VERIFY_LE(-tolerance, lambda.minCoeff());
|
||||
|
||||
// x[i]*lambda[i] == 0 <~~> (x[i]==0) || (lambda[i] is small)
|
||||
VERIFY(((x.array() == Scalar(0)) || (lambda.array() <= tolerance)).all());
|
||||
}
|
||||
|
||||
template <typename MatrixType, typename VectorB, typename VectorX>
|
||||
static void test_nnls_known_solution(const MatrixType &A, const VectorB &b, const VectorX &x_expected) {
|
||||
using Scalar = typename MatrixType::Scalar;
|
||||
|
||||
using std::sqrt;
|
||||
const Scalar tolerance = sqrt(Eigen::GenericNumTraits<Scalar>::epsilon());
|
||||
Index max_iter = 5 * A.cols(); // A heuristic guess.
|
||||
NNLS<MatrixType> nnls(A, max_iter, tolerance);
|
||||
const VectorX x = nnls.solve(b);
|
||||
|
||||
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
|
||||
VERIFY_IS_APPROX(x, x_expected);
|
||||
verify_nnls_optimality(A, b, x, tolerance);
|
||||
}
|
||||
|
||||
template <typename MatrixType>
|
||||
static void test_nnls_random_problem() {
|
||||
//
|
||||
// SETUP
|
||||
//
|
||||
|
||||
Index cols = MatrixType::ColsAtCompileTime;
|
||||
if (cols == Dynamic) cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
|
||||
Index rows = MatrixType::RowsAtCompileTime;
|
||||
if (rows == Dynamic) rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
|
||||
VERIFY_LE(cols, rows); // To have a unique LS solution: cols <= rows.
|
||||
|
||||
// Make some sort of random test problem from a wide range of scales and condition numbers.
|
||||
using std::pow;
|
||||
using Scalar = typename MatrixType::Scalar;
|
||||
const Scalar sqrtConditionNumber = pow(Scalar(10), internal::random<Scalar>(Scalar(0), Scalar(2)));
|
||||
const Scalar scaleA = pow(Scalar(10), internal::random<Scalar>(Scalar(-3), Scalar(3)));
|
||||
const Scalar minSingularValue = scaleA / sqrtConditionNumber;
|
||||
const Scalar maxSingularValue = scaleA * sqrtConditionNumber;
|
||||
MatrixType A(rows, cols);
|
||||
generateRandomMatrixSvs(setupRangeSvs<Matrix<Scalar, Dynamic, 1>>(cols, minSingularValue, maxSingularValue), rows,
|
||||
cols, A);
|
||||
|
||||
// Make a random RHS also with a random scaling.
|
||||
using VectorB = decltype(A.col(0).eval());
|
||||
const Scalar scaleB = pow(Scalar(10), internal::random<Scalar>(Scalar(-3), Scalar(3)));
|
||||
const VectorB b = scaleB * VectorB::Random(A.rows());
|
||||
|
||||
//
|
||||
// ACT
|
||||
//
|
||||
|
||||
using Scalar = typename MatrixType::Scalar;
|
||||
using std::sqrt;
|
||||
const Scalar tolerance =
|
||||
sqrt(Eigen::GenericNumTraits<Scalar>::epsilon()) * b.cwiseAbs().maxCoeff() * A.cwiseAbs().maxCoeff();
|
||||
Index max_iter = 5 * A.cols(); // A heuristic guess.
|
||||
NNLS<MatrixType> nnls(A, max_iter, tolerance);
|
||||
const typename NNLS<MatrixType>::SolutionVectorType &x = nnls.solve(b);
|
||||
|
||||
//
|
||||
// VERIFY
|
||||
//
|
||||
|
||||
// In fact, NNLS can fail on some problems, but they are rare in practice.
|
||||
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
|
||||
verify_nnls_optimality(A, b, x, tolerance);
|
||||
}
|
||||
|
||||
static void test_nnls_handles_zero_rhs() {
|
||||
//
|
||||
// SETUP
|
||||
//
|
||||
const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
|
||||
const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
|
||||
const MatrixXd A = MatrixXd::Random(rows, cols);
|
||||
const VectorXd b = VectorXd::Zero(rows);
|
||||
|
||||
//
|
||||
// ACT
|
||||
//
|
||||
NNLS<MatrixXd> nnls(A);
|
||||
const VectorXd x = nnls.solve(b);
|
||||
|
||||
//
|
||||
// VERIFY
|
||||
//
|
||||
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
|
||||
VERIFY_LE(nnls.iterations(), 1); // 0 or 1 would be be fine for an edge case like this.
|
||||
VERIFY_IS_EQUAL(x, VectorXd::Zero(cols));
|
||||
}
|
||||
|
||||
static void test_nnls_handles_Mx0_matrix() {
|
||||
//
|
||||
// SETUP
|
||||
//
|
||||
const Index rows = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
|
||||
const MatrixXd A(rows, 0);
|
||||
const VectorXd b = VectorXd::Random(rows);
|
||||
|
||||
//
|
||||
// ACT
|
||||
//
|
||||
NNLS<MatrixXd> nnls(A);
|
||||
const VectorXd x = nnls.solve(b);
|
||||
|
||||
//
|
||||
// VERIFY
|
||||
//
|
||||
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
|
||||
VERIFY_LE(nnls.iterations(), 0);
|
||||
VERIFY_IS_EQUAL(x.size(), 0);
|
||||
}
|
||||
|
||||
static void test_nnls_handles_0x0_matrix() {
|
||||
//
|
||||
// SETUP
|
||||
//
|
||||
const MatrixXd A(0, 0);
|
||||
const VectorXd b(0);
|
||||
|
||||
//
|
||||
// ACT
|
||||
//
|
||||
NNLS<MatrixXd> nnls(A);
|
||||
const VectorXd x = nnls.solve(b);
|
||||
|
||||
//
|
||||
// VERIFY
|
||||
//
|
||||
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
|
||||
VERIFY_LE(nnls.iterations(), 0);
|
||||
VERIFY_IS_EQUAL(x.size(), 0);
|
||||
}
|
||||
|
||||
static void test_nnls_handles_dependent_columns() {
|
||||
//
|
||||
// SETUP
|
||||
//
|
||||
const Index rank = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE / 2);
|
||||
const Index cols = 2 * rank;
|
||||
const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
|
||||
const MatrixXd A = MatrixXd::Random(rows, rank) * MatrixXd::Random(rank, cols);
|
||||
const VectorXd b = VectorXd::Random(rows);
|
||||
|
||||
//
|
||||
// ACT
|
||||
//
|
||||
const double tolerance = 1e-8;
|
||||
NNLS<MatrixXd> nnls(A);
|
||||
const VectorXd &x = nnls.solve(b);
|
||||
|
||||
//
|
||||
// VERIFY
|
||||
//
|
||||
// What should happen when the input 'A' has dependent columns?
|
||||
// We might still succeed. Or we might not converge.
|
||||
// Either outcome is fine. If Success is indicated,
|
||||
// then 'x' must actually be a solution vector.
|
||||
|
||||
if (nnls.info() == ComputationInfo::Success) {
|
||||
verify_nnls_optimality(A, b, x, tolerance);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_nnls_handles_wide_matrix() {
|
||||
//
|
||||
// SETUP
|
||||
//
|
||||
const Index cols = internal::random<Index>(2, EIGEN_TEST_MAX_SIZE);
|
||||
const Index rows = internal::random<Index>(2, cols - 1);
|
||||
const MatrixXd A = MatrixXd::Random(rows, cols);
|
||||
const VectorXd b = VectorXd::Random(rows);
|
||||
|
||||
//
|
||||
// ACT
|
||||
//
|
||||
const double tolerance = 1e-8;
|
||||
NNLS<MatrixXd> nnls(A);
|
||||
const VectorXd &x = nnls.solve(b);
|
||||
|
||||
//
|
||||
// VERIFY
|
||||
//
|
||||
// What should happen when the input 'A' is wide?
|
||||
// The unconstrained least-squares problem has infinitely many solutions.
|
||||
// Subject the the non-negativity constraints,
|
||||
// the solution might actually be unique (e.g. it is [0,0,..,0]).
|
||||
// So, NNLS might succeed or it might fail.
|
||||
// Either outcome is fine. If Success is indicated,
|
||||
// then 'x' must actually be a solution vector.
|
||||
|
||||
if (nnls.info() == ComputationInfo::Success) {
|
||||
verify_nnls_optimality(A, b, x, tolerance);
|
||||
}
|
||||
}
|
||||
|
||||
// 4x2 problem, unconstrained solution positive
|
||||
static void test_nnls_known_1() {
|
||||
Matrix<double, 4, 2> A(4, 2);
|
||||
Matrix<double, 4, 1> b(4);
|
||||
Matrix<double, 2, 1> x(2);
|
||||
A << 1, 1, 2, 4, 3, 9, 4, 16;
|
||||
b << 0.6, 2.2, 4.8, 8.4;
|
||||
x << 0.1, 0.5;
|
||||
|
||||
return test_nnls_known_solution(A, b, x);
|
||||
}
|
||||
|
||||
// 4x3 problem, unconstrained solution positive
|
||||
static void test_nnls_known_2() {
|
||||
Matrix<double, 4, 3> A(4, 3);
|
||||
Matrix<double, 4, 1> b(4);
|
||||
Matrix<double, 3, 1> x(3);
|
||||
|
||||
A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
|
||||
b << 0.73, 3.24, 8.31, 16.72;
|
||||
x << 0.1, 0.5, 0.13;
|
||||
|
||||
test_nnls_known_solution(A, b, x);
|
||||
}
|
||||
|
||||
// Simple 4x4 problem, unconstrained solution non-negative
|
||||
static void test_nnls_known_3() {
|
||||
Matrix<double, 4, 4> A(4, 4);
|
||||
Matrix<double, 4, 1> b(4);
|
||||
Matrix<double, 4, 1> x(4);
|
||||
|
||||
A << 1, 1, 1, 1, 2, 4, 8, 16, 3, 9, 27, 81, 4, 16, 64, 256;
|
||||
b << 0.73, 3.24, 8.31, 16.72;
|
||||
x << 0.1, 0.5, 0.13, 0;
|
||||
|
||||
test_nnls_known_solution(A, b, x);
|
||||
}
|
||||
|
||||
// Simple 4x3 problem, unconstrained solution non-negative
|
||||
static void test_nnls_known_4() {
|
||||
Matrix<double, 4, 3> A(4, 3);
|
||||
Matrix<double, 4, 1> b(4);
|
||||
Matrix<double, 3, 1> x(3);
|
||||
|
||||
A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
|
||||
b << 0.23, 1.24, 3.81, 8.72;
|
||||
x << 0.1, 0, 0.13;
|
||||
|
||||
test_nnls_known_solution(A, b, x);
|
||||
}
|
||||
|
||||
// Simple 4x3 problem, unconstrained solution indefinite
|
||||
static void test_nnls_known_5() {
|
||||
Matrix<double, 4, 3> A(4, 3);
|
||||
Matrix<double, 4, 1> b(4);
|
||||
Matrix<double, 3, 1> x(3);
|
||||
|
||||
A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
|
||||
b << 0.13, 0.84, 2.91, 7.12;
|
||||
// Solution obtained by original nnls() implementation in Fortran
|
||||
x << 0.0, 0.0, 0.1106544;
|
||||
|
||||
test_nnls_known_solution(A, b, x);
|
||||
}
|
||||
|
||||
static void test_nnls_small_reference_problems() {
|
||||
test_nnls_known_1();
|
||||
test_nnls_known_2();
|
||||
test_nnls_known_3();
|
||||
test_nnls_known_4();
|
||||
test_nnls_known_5();
|
||||
}
|
||||
|
||||
static void test_nnls_with_half_precision() {
|
||||
// The random matrix generation tools don't work with `half`,
|
||||
// so here's a simpler setup mostly just to check that NNLS compiles & runs with custom scalar types.
|
||||
|
||||
using Mat = Matrix<half, 8, 2>;
|
||||
using VecB = Matrix<half, 8, 1>;
|
||||
using VecX = Matrix<half, 2, 1>;
|
||||
Mat A = Mat::Random(); // full-column rank with high probability.
|
||||
VecB b = VecB::Random();
|
||||
|
||||
NNLS<Mat> nnls(A, 20, half(1e-2f));
|
||||
const VecX x = nnls.solve(b);
|
||||
|
||||
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
|
||||
verify_nnls_optimality(A, b, x, half(1e-1));
|
||||
}
|
||||
|
||||
static void test_nnls_special_case_solves_in_zero_iterations() {
|
||||
// The particular NNLS algorithm that is implemented starts with all variables
|
||||
// in the active set.
|
||||
// This test builds a system where all constraints are active at the solution,
|
||||
// so that initial guess is already correct.
|
||||
//
|
||||
// If the implementation changes to another algorithm that does not have this property,
|
||||
// then this test will need to change (e.g. starting from all constraints inactive,
|
||||
// or using ADMM, or an interior point solver).
|
||||
|
||||
const Index n = 10;
|
||||
const Index m = 3 * n;
|
||||
const VectorXd b = VectorXd::Random(m);
|
||||
// With high probability, this is full column rank, which we need for uniqueness.
|
||||
MatrixXd A = MatrixXd::Random(m, n);
|
||||
// Make every column of `A` such that adding it to the active set only /increases/ the objective,
|
||||
// this ensuring the NNLS solution is all zeros.
|
||||
const VectorXd alignment = -(A.transpose() * b).cwiseSign();
|
||||
A = A * alignment.asDiagonal();
|
||||
|
||||
NNLS<MatrixXd> nnls(A);
|
||||
nnls.solve(b);
|
||||
|
||||
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
|
||||
VERIFY(nnls.iterations() == 0);
|
||||
}
|
||||
|
||||
static void test_nnls_special_case_solves_in_n_iterations() {
|
||||
// The particular NNLS algorithm that is implemented starts with all variables
|
||||
// in the active set and then adds one variable to the inactive set each iteration.
|
||||
// This test builds a system where all variables are inactive at the solution,
|
||||
// so it should take 'n' iterations to get there.
|
||||
//
|
||||
// If the implementation changes to another algorithm that does not have this property,
|
||||
// then this test will need to change (e.g. starting from all constraints inactive,
|
||||
// or using ADMM, or an interior point solver).
|
||||
|
||||
const Index n = 10;
|
||||
const Index m = 3 * n;
|
||||
// With high probability, this is full column rank, which we need for uniqueness.
|
||||
const MatrixXd A = MatrixXd::Random(m, n);
|
||||
const VectorXd x = VectorXd::Random(n).cwiseAbs().array() + 1; // all positive.
|
||||
const VectorXd b = A * x;
|
||||
|
||||
NNLS<MatrixXd> nnls(A);
|
||||
nnls.solve(b);
|
||||
|
||||
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
|
||||
VERIFY(nnls.iterations() == n);
|
||||
}
|
||||
|
||||
static void test_nnls_returns_NoConvergence_when_maxIterations_is_too_low() {
|
||||
// Using the special case that takes `n` iterations,
|
||||
// from `test_nnls_special_case_solves_in_n_iterations`,
|
||||
// we can set max iterations too low and that should cause the solve to fail.
|
||||
|
||||
const Index n = 10;
|
||||
const Index m = 3 * n;
|
||||
// With high probability, this is full column rank, which we need for uniqueness.
|
||||
const MatrixXd A = MatrixXd::Random(m, n);
|
||||
const VectorXd x = VectorXd::Random(n).cwiseAbs().array() + 1; // all positive.
|
||||
const VectorXd b = A * x;
|
||||
|
||||
NNLS<MatrixXd> nnls(A);
|
||||
const Index max_iters = n - 1;
|
||||
nnls.setMaxIterations(max_iters);
|
||||
nnls.solve(b);
|
||||
|
||||
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::NoConvergence);
|
||||
VERIFY(nnls.iterations() == max_iters);
|
||||
}
|
||||
|
||||
static void test_nnls_default_maxIterations_is_twice_column_count() {
|
||||
const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
|
||||
const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
|
||||
const MatrixXd A = MatrixXd::Random(rows, cols);
|
||||
|
||||
NNLS<MatrixXd> nnls(A);
|
||||
|
||||
VERIFY_IS_EQUAL(nnls.maxIterations(), 2 * cols);
|
||||
}
|
||||
|
||||
static void test_nnls_does_not_allocate_during_solve() {
|
||||
const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
|
||||
const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
|
||||
const MatrixXd A = MatrixXd::Random(rows, cols);
|
||||
const VectorXd b = VectorXd::Random(rows);
|
||||
|
||||
NNLS<MatrixXd> nnls(A);
|
||||
|
||||
internal::set_is_malloc_allowed(false);
|
||||
nnls.solve(b);
|
||||
internal::set_is_malloc_allowed(true);
|
||||
}
|
||||
|
||||
static void test_nnls_repeated_calls_to_compute_and_solve() {
|
||||
const Index cols2 = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
|
||||
const Index rows2 = internal::random<Index>(cols2, EIGEN_TEST_MAX_SIZE);
|
||||
const MatrixXd A2 = MatrixXd::Random(rows2, cols2);
|
||||
const VectorXd b2 = VectorXd::Random(rows2);
|
||||
|
||||
NNLS<MatrixXd> nnls;
|
||||
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
|
||||
const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
|
||||
const MatrixXd A = MatrixXd::Random(rows, cols);
|
||||
|
||||
nnls.compute(A);
|
||||
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
|
||||
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
const VectorXd b = VectorXd::Random(rows);
|
||||
const VectorXd x = nnls.solve(b);
|
||||
VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
|
||||
verify_nnls_optimality(A, b, x, 1e-4);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(NNLS) {
|
||||
// Small matrices with known solutions:
|
||||
CALL_SUBTEST_1(test_nnls_small_reference_problems());
|
||||
CALL_SUBTEST_1(test_nnls_handles_Mx0_matrix());
|
||||
CALL_SUBTEST_1(test_nnls_handles_0x0_matrix());
|
||||
|
||||
for (int i = 0; i < g_repeat; i++) {
|
||||
// Essential NNLS properties, across different types.
|
||||
CALL_SUBTEST_2(test_nnls_random_problem<MatrixXf>());
|
||||
CALL_SUBTEST_3(test_nnls_random_problem<MatrixXd>());
|
||||
using MatFixed = Matrix<double, 12, 5>;
|
||||
CALL_SUBTEST_4(test_nnls_random_problem<MatFixed>());
|
||||
CALL_SUBTEST_5(test_nnls_with_half_precision());
|
||||
|
||||
// Robustness tests:
|
||||
CALL_SUBTEST_6(test_nnls_handles_zero_rhs());
|
||||
CALL_SUBTEST_6(test_nnls_handles_dependent_columns());
|
||||
CALL_SUBTEST_6(test_nnls_handles_wide_matrix());
|
||||
|
||||
// Properties specific to the implementation,
|
||||
// not NNLS in general.
|
||||
CALL_SUBTEST_7(test_nnls_special_case_solves_in_zero_iterations());
|
||||
CALL_SUBTEST_7(test_nnls_special_case_solves_in_n_iterations());
|
||||
CALL_SUBTEST_7(test_nnls_returns_NoConvergence_when_maxIterations_is_too_low());
|
||||
CALL_SUBTEST_7(test_nnls_default_maxIterations_is_twice_column_count());
|
||||
CALL_SUBTEST_8(test_nnls_repeated_calls_to_compute_and_solve());
|
||||
|
||||
// This test fails. It hits allocations in HouseholderSequence.h
|
||||
// test_nnls_does_not_allocate_during_solve();
|
||||
}
|
||||
}
|
||||
@@ -12,14 +12,10 @@
|
||||
// It is intended to be done for this test only.
|
||||
#include <Eigen/src/Core/util/DisableStupidWarnings.h>
|
||||
|
||||
// tolerance for chekcing number of iterations
|
||||
#define LM_EVAL_COUNT_TOL 4/3
|
||||
// tolerance for checking number of iterations
|
||||
#define LM_EVAL_COUNT_TOL 2
|
||||
|
||||
#define LM_CHECK_N_ITERS(SOLVER,NFEV,NJEV) { \
|
||||
++g_test_level; \
|
||||
VERIFY_IS_EQUAL(SOLVER.nfev, NFEV); \
|
||||
VERIFY_IS_EQUAL(SOLVER.njev, NJEV); \
|
||||
--g_test_level; \
|
||||
VERIFY(SOLVER.nfev <= NFEV * LM_EVAL_COUNT_TOL); \
|
||||
VERIFY(SOLVER.njev <= NJEV * LM_EVAL_COUNT_TOL); \
|
||||
}
|
||||
@@ -113,10 +109,10 @@ void testChkder()
|
||||
}
|
||||
|
||||
// Generic functor
|
||||
template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
|
||||
template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
|
||||
struct Functor
|
||||
{
|
||||
typedef _Scalar Scalar;
|
||||
typedef Scalar_ Scalar;
|
||||
enum {
|
||||
InputsAtCompileTime = NX,
|
||||
ValuesAtCompileTime = NY
|
||||
@@ -186,9 +182,10 @@ void testLmder1()
|
||||
lmder_functor functor;
|
||||
LevenbergMarquardt<lmder_functor> lm(functor);
|
||||
info = lm.lmder1(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 6, 5);
|
||||
|
||||
// check norm
|
||||
@@ -214,9 +211,10 @@ void testLmder()
|
||||
lmder_functor functor;
|
||||
LevenbergMarquardt<lmder_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return values
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 6, 5);
|
||||
|
||||
// check norm
|
||||
@@ -298,9 +296,10 @@ void testHybrj1()
|
||||
hybrj_functor functor;
|
||||
HybridNonLinearSolver<hybrj_functor> solver(functor);
|
||||
info = solver.hybrj1(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(solver, 11, 1);
|
||||
|
||||
// check norm
|
||||
@@ -332,9 +331,10 @@ void testHybrj()
|
||||
solver.diag.setConstant(n, 1.);
|
||||
solver.useExternalScaling = true;
|
||||
info = solver.solve(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(solver, 11, 1);
|
||||
|
||||
// check norm
|
||||
@@ -385,10 +385,11 @@ void testHybrd1()
|
||||
hybrd_functor functor;
|
||||
HybridNonLinearSolver<hybrd_functor> solver(functor);
|
||||
info = solver.hybrd1(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(solver.nfev, 20);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY(solver.nfev <= 20*LM_EVAL_COUNT_TOL);
|
||||
|
||||
// check norm
|
||||
VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
|
||||
@@ -416,10 +417,11 @@ void testHybrd()
|
||||
solver.diag.setConstant(n, 1.);
|
||||
solver.useExternalScaling = true;
|
||||
info = solver.solveNumericalDiff(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(solver.nfev, 14);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY(solver.nfev <= 14*LM_EVAL_COUNT_TOL);
|
||||
|
||||
// check norm
|
||||
VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
|
||||
@@ -487,9 +489,10 @@ void testLmstr1()
|
||||
lmstr_functor functor;
|
||||
LevenbergMarquardt<lmstr_functor> lm(functor);
|
||||
info = lm.lmstr1(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 6, 5);
|
||||
|
||||
// check norm
|
||||
@@ -515,9 +518,10 @@ void testLmstr()
|
||||
lmstr_functor functor;
|
||||
LevenbergMarquardt<lmstr_functor> lm(functor);
|
||||
info = lm.minimizeOptimumStorage(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return values
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 6, 5);
|
||||
|
||||
// check norm
|
||||
@@ -570,10 +574,11 @@ void testLmdif1()
|
||||
lmdif_functor functor;
|
||||
DenseIndex nfev = -1; // initialize to avoid maybe-uninitialized warning
|
||||
info = LevenbergMarquardt<lmdif_functor>::lmdif1(functor, x, &nfev);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(nfev, 26);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY( nfev <= 26*LM_EVAL_COUNT_TOL);
|
||||
|
||||
// check norm
|
||||
functor(x, fvec);
|
||||
@@ -601,10 +606,11 @@ void testLmdif()
|
||||
NumericalDiff<lmdif_functor> numDiff(functor);
|
||||
LevenbergMarquardt<NumericalDiff<lmdif_functor> > lm(numDiff);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return values
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev, 26);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY(lm.nfev <= 26*LM_EVAL_COUNT_TOL);
|
||||
|
||||
// check norm
|
||||
fnorm = lm.fvec.blueNorm();
|
||||
@@ -686,9 +692,10 @@ void testNistChwirut2(void)
|
||||
chwirut2_functor functor;
|
||||
LevenbergMarquardt<chwirut2_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 10, 8);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02);
|
||||
@@ -706,9 +713,10 @@ void testNistChwirut2(void)
|
||||
lm.parameters.ftol = 1.E6*NumTraits<double>::epsilon();
|
||||
lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 7, 6);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02);
|
||||
@@ -764,9 +772,10 @@ void testNistMisra1a(void)
|
||||
misra1a_functor functor;
|
||||
LevenbergMarquardt<misra1a_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 19, 15);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01);
|
||||
@@ -780,9 +789,10 @@ void testNistMisra1a(void)
|
||||
x<< 250., 0.0005;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 5, 4);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01);
|
||||
@@ -852,9 +862,10 @@ void testNistHahn1(void)
|
||||
hahn1_functor functor;
|
||||
LevenbergMarquardt<hahn1_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 11, 10);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00);
|
||||
@@ -873,9 +884,10 @@ void testNistHahn1(void)
|
||||
x<< .1, -.1, .005, -.000001, -.005, .0001, -.0000001;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 11, 10);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00);
|
||||
@@ -936,9 +948,10 @@ void testNistMisra1d(void)
|
||||
misra1d_functor functor;
|
||||
LevenbergMarquardt<misra1d_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 3);
|
||||
// VERIFY_IS_EQUAL(info, 3);
|
||||
LM_CHECK_N_ITERS(lm, 9, 7);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02);
|
||||
@@ -952,9 +965,10 @@ void testNistMisra1d(void)
|
||||
x<< 450., 0.0003;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 4, 3);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02);
|
||||
@@ -1012,13 +1026,14 @@ void testNistLanczos1(void)
|
||||
lanczos1_functor functor;
|
||||
LevenbergMarquardt<lanczos1_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 2);
|
||||
// VERIFY_IS_EQUAL(info, 2);
|
||||
LM_CHECK_N_ITERS(lm, 79, 72);
|
||||
// check norm^2
|
||||
std::cout.precision(30);
|
||||
std::cout << lm.fvec.squaredNorm() << "\n";
|
||||
// std::cout.precision(30);
|
||||
// std::cout << lm.fvec.squaredNorm() << "\n";
|
||||
VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
|
||||
// check x
|
||||
VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
|
||||
@@ -1034,9 +1049,10 @@ void testNistLanczos1(void)
|
||||
x<< 0.5, 0.7, 3.6, 4.2, 4., 6.3;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 2);
|
||||
// VERIFY_IS_EQUAL(info, 2);
|
||||
LM_CHECK_N_ITERS(lm, 9, 8);
|
||||
// check norm^2
|
||||
VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
|
||||
@@ -1098,9 +1114,10 @@ void testNistRat42(void)
|
||||
rat42_functor functor;
|
||||
LevenbergMarquardt<rat42_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 10, 8);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00);
|
||||
@@ -1115,9 +1132,10 @@ void testNistRat42(void)
|
||||
x<< 75., 2.5, 0.07;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 6, 5);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00);
|
||||
@@ -1174,9 +1192,10 @@ void testNistMGH10(void)
|
||||
MGH10_functor functor;
|
||||
LevenbergMarquardt<MGH10_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 2);
|
||||
// VERIFY_IS_EQUAL(info, 2);
|
||||
LM_CHECK_N_ITERS(lm, 284, 249);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01);
|
||||
@@ -1191,9 +1210,10 @@ void testNistMGH10(void)
|
||||
x<< 0.02, 4000., 250.;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 3);
|
||||
// VERIFY_IS_EQUAL(info, 3);
|
||||
LM_CHECK_N_ITERS(lm, 126, 116);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01);
|
||||
@@ -1251,9 +1271,10 @@ void testNistBoxBOD(void)
|
||||
lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
|
||||
lm.parameters.factor = 10.;
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 31, 25);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
|
||||
@@ -1270,10 +1291,11 @@ void testNistBoxBOD(void)
|
||||
lm.parameters.ftol = NumTraits<double>::epsilon();
|
||||
lm.parameters.xtol = NumTraits<double>::epsilon();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 15, 14);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 20, 14);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
|
||||
// check x
|
||||
@@ -1331,6 +1353,7 @@ void testNistMGH17(void)
|
||||
lm.parameters.xtol = NumTraits<double>::epsilon();
|
||||
lm.parameters.maxfev = 1000;
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
|
||||
@@ -1342,7 +1365,7 @@ void testNistMGH17(void)
|
||||
VERIFY_IS_APPROX(x[4], 2.2122699662E-02);
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 2);
|
||||
// VERIFY_IS_EQUAL(info, 2);
|
||||
LM_CHECK_N_ITERS(lm, 602, 545);
|
||||
|
||||
/*
|
||||
@@ -1352,9 +1375,10 @@ void testNistMGH17(void)
|
||||
// do the computation
|
||||
lm.resetParameters();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 18, 15);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
|
||||
@@ -1417,9 +1441,10 @@ void testNistMGH09(void)
|
||||
LevenbergMarquardt<MGH09_functor> lm(functor);
|
||||
lm.parameters.maxfev = 1000;
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 490, 376);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04);
|
||||
@@ -1436,9 +1461,10 @@ void testNistMGH09(void)
|
||||
// do the computation
|
||||
lm.resetParameters();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 18, 16);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04);
|
||||
@@ -1501,9 +1527,10 @@ void testNistBennett5(void)
|
||||
LevenbergMarquardt<Bennett5_functor> lm(functor);
|
||||
lm.parameters.maxfev = 1000;
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 758, 744);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04);
|
||||
@@ -1518,9 +1545,10 @@ void testNistBennett5(void)
|
||||
// do the computation
|
||||
lm.resetParameters();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 203, 192);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04);
|
||||
@@ -1587,9 +1615,10 @@ void testNistThurber(void)
|
||||
lm.parameters.ftol = 1.E4*NumTraits<double>::epsilon();
|
||||
lm.parameters.xtol = 1.E4*NumTraits<double>::epsilon();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 39,36);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03);
|
||||
@@ -1611,9 +1640,10 @@ void testNistThurber(void)
|
||||
lm.parameters.ftol = 1.E4*NumTraits<double>::epsilon();
|
||||
lm.parameters.xtol = 1.E4*NumTraits<double>::epsilon();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 29, 28);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03);
|
||||
@@ -1677,9 +1707,10 @@ void testNistRat43(void)
|
||||
lm.parameters.ftol = 1.E6*NumTraits<double>::epsilon();
|
||||
lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 27, 20);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03);
|
||||
@@ -1698,9 +1729,10 @@ void testNistRat43(void)
|
||||
lm.parameters.ftol = 1.E5*NumTraits<double>::epsilon();
|
||||
lm.parameters.xtol = 1.E5*NumTraits<double>::epsilon();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 9, 8);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03);
|
||||
@@ -1760,9 +1792,10 @@ void testNistEckerle4(void)
|
||||
eckerle4_functor functor;
|
||||
LevenbergMarquardt<eckerle4_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 18, 15);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03);
|
||||
@@ -1777,9 +1810,10 @@ void testNistEckerle4(void)
|
||||
x<< 1.5, 5., 450.;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
LM_CHECK_N_ITERS(lm, 7, 6);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03);
|
||||
|
||||
@@ -9,10 +9,10 @@
|
||||
#include <unsupported/Eigen/NumericalDiff>
|
||||
|
||||
// Generic functor
|
||||
template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
|
||||
template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
|
||||
struct Functor
|
||||
{
|
||||
typedef _Scalar Scalar;
|
||||
typedef Scalar_ Scalar;
|
||||
enum {
|
||||
InputsAtCompileTime = NX,
|
||||
ValuesAtCompileTime = NY
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#define EIGEN_NO_STATIC_ASSERT
|
||||
|
||||
#include "main.h"
|
||||
#include <unsupported/Eigen/AlignedVector3>
|
||||
|
||||
|
||||
@@ -29,10 +29,10 @@ EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p)
|
||||
return (p-Vector(Scalar(-1),Scalar(1.))).norm() + (p.array() * p.array()).sum() + p.dot(p);
|
||||
}
|
||||
|
||||
template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
|
||||
template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
|
||||
struct TestFunc1
|
||||
{
|
||||
typedef _Scalar Scalar;
|
||||
typedef Scalar_ Scalar;
|
||||
enum {
|
||||
InputsAtCompileTime = NX,
|
||||
ValuesAtCompileTime = NY
|
||||
@@ -106,7 +106,6 @@ struct TestFunc1
|
||||
};
|
||||
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
/* Test functor for the C++11 features. */
|
||||
template <typename Scalar>
|
||||
struct integratorFunctor
|
||||
@@ -186,7 +185,6 @@ template<typename Func> void forward_jacobian_cpp11(const Func& f)
|
||||
VERIFY_IS_APPROX(y, yref);
|
||||
VERIFY_IS_APPROX(j, jref);
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename Func> void forward_jacobian(const Func& f)
|
||||
{
|
||||
@@ -247,9 +245,7 @@ void test_autodiff_jacobian()
|
||||
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,2>()) ));
|
||||
CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,3>()) ));
|
||||
CALL_SUBTEST(( forward_jacobian(TestFunc1<double>(3,3)) ));
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
CALL_SUBTEST(( forward_jacobian_cpp11(integratorFunctor<double>(10)) ));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -84,9 +84,7 @@ void check_limits_specialization()
|
||||
// workaround "unused typedef" warning:
|
||||
VERIFY(!bool(internal::is_same<B, A>::value));
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
VERIFY(bool(std::is_base_of<B, A>::value));
|
||||
#endif
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(autodiff_scalar)
|
||||
|
||||
31
libs/eigen/unsupported/test/bicgstabl.cpp
Normal file
31
libs/eigen/unsupported/test/bicgstabl.cpp
Normal file
@@ -0,0 +1,31 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
|
||||
// Copyright (C) 2012 Kolja Brix <brix@igpm.rwth-aaachen.de>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "../../test/sparse_solver.h"
|
||||
#include <Eigen/IterativeSolvers>
|
||||
|
||||
template<typename T> void test_bicgstabl_T()
|
||||
{
|
||||
BiCGSTABL<SparseMatrix<T>, DiagonalPreconditioner<T> > bicgstabl_colmajor_diag;
|
||||
BiCGSTABL<SparseMatrix<T>, IncompleteLUT<T> > bicgstabl_colmajor_ilut;
|
||||
|
||||
//This does not change the tolerance of the test, only the tolerance of the solver.
|
||||
bicgstabl_colmajor_diag.setTolerance(NumTraits<T>::epsilon()*20);
|
||||
bicgstabl_colmajor_ilut.setTolerance(NumTraits<T>::epsilon()*20);
|
||||
|
||||
CALL_SUBTEST( check_sparse_square_solving(bicgstabl_colmajor_diag) );
|
||||
CALL_SUBTEST( check_sparse_square_solving(bicgstabl_colmajor_ilut) );
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(bicgstabl)
|
||||
{
|
||||
CALL_SUBTEST_1(test_bicgstabl_T<double>());
|
||||
CALL_SUBTEST_2(test_bicgstabl_T<std::complex<double> >());
|
||||
}
|
||||
@@ -14,57 +14,57 @@
|
||||
|
||||
using Eigen::Tensor;
|
||||
using Eigen::array;
|
||||
using Eigen::Tuple;
|
||||
using Eigen::Pair;
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_simple_index_tuples()
|
||||
static void test_simple_index_pairs()
|
||||
{
|
||||
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
|
||||
tensor.setRandom();
|
||||
tensor = (tensor + tensor.constant(0.5)).log();
|
||||
|
||||
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
|
||||
index_tuples = tensor.index_tuples();
|
||||
Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
|
||||
index_pairs = tensor.index_pairs();
|
||||
|
||||
for (DenseIndex n = 0; n < 2*3*5*7; ++n) {
|
||||
const Tuple<DenseIndex, float>& v = index_tuples.coeff(n);
|
||||
const Pair<DenseIndex, float>& v = index_pairs.coeff(n);
|
||||
VERIFY_IS_EQUAL(v.first, n);
|
||||
VERIFY_IS_EQUAL(v.second, tensor.coeff(n));
|
||||
}
|
||||
}
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_index_tuples_dim()
|
||||
static void test_index_pairs_dim()
|
||||
{
|
||||
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
|
||||
tensor.setRandom();
|
||||
tensor = (tensor + tensor.constant(0.5)).log();
|
||||
|
||||
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
|
||||
Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
|
||||
|
||||
index_tuples = tensor.index_tuples();
|
||||
index_pairs = tensor.index_pairs();
|
||||
|
||||
for (Eigen::DenseIndex n = 0; n < tensor.size(); ++n) {
|
||||
const Tuple<DenseIndex, float>& v = index_tuples(n); //(i, j, k, l);
|
||||
const Pair<DenseIndex, float>& v = index_pairs(n); //(i, j, k, l);
|
||||
VERIFY_IS_EQUAL(v.first, n);
|
||||
VERIFY_IS_EQUAL(v.second, tensor(n));
|
||||
}
|
||||
}
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_argmax_tuple_reducer()
|
||||
static void test_argmax_pair_reducer()
|
||||
{
|
||||
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
|
||||
tensor.setRandom();
|
||||
tensor = (tensor + tensor.constant(0.5)).log();
|
||||
|
||||
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
|
||||
index_tuples = tensor.index_tuples();
|
||||
Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
|
||||
index_pairs = tensor.index_pairs();
|
||||
|
||||
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
|
||||
Tensor<Pair<DenseIndex, float>, 0, DataLayout> reduced;
|
||||
DimensionList<DenseIndex, 4> dims;
|
||||
reduced = index_tuples.reduce(
|
||||
dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
|
||||
reduced = index_pairs.reduce(
|
||||
dims, internal::ArgMaxPairReducer<Pair<DenseIndex, float> >());
|
||||
|
||||
Tensor<float, 0, DataLayout> maxi = tensor.maximum();
|
||||
|
||||
@@ -72,9 +72,9 @@ static void test_argmax_tuple_reducer()
|
||||
|
||||
array<DenseIndex, 3> reduce_dims;
|
||||
for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
|
||||
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
|
||||
reduced_by_dims = index_tuples.reduce(
|
||||
reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
|
||||
Tensor<Pair<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
|
||||
reduced_by_dims = index_pairs.reduce(
|
||||
reduce_dims, internal::ArgMaxPairReducer<Pair<DenseIndex, float> >());
|
||||
|
||||
Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims);
|
||||
|
||||
@@ -84,19 +84,19 @@ static void test_argmax_tuple_reducer()
|
||||
}
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_argmin_tuple_reducer()
|
||||
static void test_argmin_pair_reducer()
|
||||
{
|
||||
Tensor<float, 4, DataLayout> tensor(2,3,5,7);
|
||||
tensor.setRandom();
|
||||
tensor = (tensor + tensor.constant(0.5)).log();
|
||||
|
||||
Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
|
||||
index_tuples = tensor.index_tuples();
|
||||
Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
|
||||
index_pairs = tensor.index_pairs();
|
||||
|
||||
Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
|
||||
Tensor<Pair<DenseIndex, float>, 0, DataLayout> reduced;
|
||||
DimensionList<DenseIndex, 4> dims;
|
||||
reduced = index_tuples.reduce(
|
||||
dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
|
||||
reduced = index_pairs.reduce(
|
||||
dims, internal::ArgMinPairReducer<Pair<DenseIndex, float> >());
|
||||
|
||||
Tensor<float, 0, DataLayout> mini = tensor.minimum();
|
||||
|
||||
@@ -104,9 +104,9 @@ static void test_argmin_tuple_reducer()
|
||||
|
||||
array<DenseIndex, 3> reduce_dims;
|
||||
for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
|
||||
Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
|
||||
reduced_by_dims = index_tuples.reduce(
|
||||
reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
|
||||
Tensor<Pair<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
|
||||
reduced_by_dims = index_pairs.reduce(
|
||||
reduce_dims, internal::ArgMinPairReducer<Pair<DenseIndex, float> >());
|
||||
|
||||
Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims);
|
||||
|
||||
@@ -275,14 +275,14 @@ static void test_argmin_dim()
|
||||
|
||||
EIGEN_DECLARE_TEST(cxx11_tensor_argmax)
|
||||
{
|
||||
CALL_SUBTEST(test_simple_index_tuples<RowMajor>());
|
||||
CALL_SUBTEST(test_simple_index_tuples<ColMajor>());
|
||||
CALL_SUBTEST(test_index_tuples_dim<RowMajor>());
|
||||
CALL_SUBTEST(test_index_tuples_dim<ColMajor>());
|
||||
CALL_SUBTEST(test_argmax_tuple_reducer<RowMajor>());
|
||||
CALL_SUBTEST(test_argmax_tuple_reducer<ColMajor>());
|
||||
CALL_SUBTEST(test_argmin_tuple_reducer<RowMajor>());
|
||||
CALL_SUBTEST(test_argmin_tuple_reducer<ColMajor>());
|
||||
CALL_SUBTEST(test_simple_index_pairs<RowMajor>());
|
||||
CALL_SUBTEST(test_simple_index_pairs<ColMajor>());
|
||||
CALL_SUBTEST(test_index_pairs_dim<RowMajor>());
|
||||
CALL_SUBTEST(test_index_pairs_dim<ColMajor>());
|
||||
CALL_SUBTEST(test_argmax_pair_reducer<RowMajor>());
|
||||
CALL_SUBTEST(test_argmax_pair_reducer<ColMajor>());
|
||||
CALL_SUBTEST(test_argmin_pair_reducer<RowMajor>());
|
||||
CALL_SUBTEST(test_argmin_pair_reducer<ColMajor>());
|
||||
CALL_SUBTEST(test_simple_argmax<RowMajor>());
|
||||
CALL_SUBTEST(test_simple_argmax<ColMajor>());
|
||||
CALL_SUBTEST(test_simple_argmin<RowMajor>());
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
|
||||
#define EIGEN_USE_SYCL
|
||||
#define EIGEN_HAS_CONSTEXPR 1
|
||||
|
||||
#include "main.h"
|
||||
|
||||
|
||||
@@ -25,10 +25,8 @@ static void test_1d()
|
||||
vec1(4) = 23; vec2(4) = 4;
|
||||
vec1(5) = 42; vec2(5) = 5;
|
||||
|
||||
int col_major[6];
|
||||
int row_major[6];
|
||||
memset(col_major, 0, 6*sizeof(int));
|
||||
memset(row_major, 0, 6*sizeof(int));
|
||||
int col_major[6] = {0};
|
||||
int row_major[6] = {0};
|
||||
TensorMap<Tensor<int, 1> > vec3(col_major, 6);
|
||||
TensorMap<Tensor<int, 1, RowMajor> > vec4(row_major, 6);
|
||||
|
||||
@@ -88,10 +86,8 @@ static void test_2d()
|
||||
mat2(1,1) = 4;
|
||||
mat2(1,2) = 5;
|
||||
|
||||
int col_major[6];
|
||||
int row_major[6];
|
||||
memset(col_major, 0, 6*sizeof(int));
|
||||
memset(row_major, 0, 6*sizeof(int));
|
||||
int col_major[6] = {0};
|
||||
int row_major[6] = {0};
|
||||
TensorMap<Tensor<int, 2> > mat3(row_major, 2, 3);
|
||||
TensorMap<Tensor<int, 2, RowMajor> > mat4(col_major, 2, 3);
|
||||
|
||||
@@ -148,10 +144,8 @@ static void test_3d()
|
||||
}
|
||||
}
|
||||
|
||||
int col_major[2*3*7];
|
||||
int row_major[2*3*7];
|
||||
memset(col_major, 0, 2*3*7*sizeof(int));
|
||||
memset(row_major, 0, 2*3*7*sizeof(int));
|
||||
int col_major[2*3*7] = {0};
|
||||
int row_major[2*3*7] = {0};
|
||||
TensorMap<Tensor<int, 3> > mat3(col_major, 2, 3, 7);
|
||||
TensorMap<Tensor<int, 3, RowMajor> > mat4(row_major, 2, 3, 7);
|
||||
|
||||
@@ -286,7 +280,6 @@ static void test_compound_assign()
|
||||
}
|
||||
|
||||
static void test_std_initializers_tensor() {
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
Tensor<int, 1> a(3);
|
||||
a.setValues({0, 1, 2});
|
||||
VERIFY_IS_EQUAL(a(0), 0);
|
||||
@@ -355,7 +348,6 @@ static void test_std_initializers_tensor() {
|
||||
VERIFY_IS_EQUAL(c(2, 1, 1), 25);
|
||||
VERIFY_IS_EQUAL(c(2, 1, 2), 26);
|
||||
VERIFY_IS_EQUAL(c(2, 1, 3), 27);
|
||||
#endif // EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(cxx11_tensor_assign)
|
||||
|
||||
@@ -244,7 +244,7 @@ static void test_eval_tensor_binary_with_unary_expr_block() {
|
||||
rhs.setRandom();
|
||||
|
||||
VerifyBlockEvaluator<T, NumDims, Layout>(
|
||||
(lhs.square() + rhs.square()).sqrt(),
|
||||
(lhs.abs() + rhs.abs()).sqrt(),
|
||||
[&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
|
||||
}
|
||||
|
||||
|
||||
@@ -91,15 +91,7 @@ static void test_vectorized_broadcasting()
|
||||
}
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
tensor.resize(11,3,5);
|
||||
#else
|
||||
array<Index, 3> new_dims;
|
||||
new_dims[0] = 11;
|
||||
new_dims[1] = 3;
|
||||
new_dims[2] = 5;
|
||||
tensor.resize(new_dims);
|
||||
#endif
|
||||
|
||||
tensor.setRandom();
|
||||
broadcast = tensor.broadcast(broadcasts);
|
||||
@@ -124,15 +116,7 @@ static void test_static_broadcasting()
|
||||
Tensor<float, 3, DataLayout> tensor(8,3,5);
|
||||
tensor.setRandom();
|
||||
|
||||
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||
Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> broadcasts;
|
||||
#else
|
||||
Eigen::array<int, 3> broadcasts;
|
||||
broadcasts[0] = 2;
|
||||
broadcasts[1] = 3;
|
||||
broadcasts[2] = 4;
|
||||
#endif
|
||||
|
||||
Tensor<float, 3, DataLayout> broadcast;
|
||||
broadcast = tensor.broadcast(broadcasts);
|
||||
|
||||
@@ -148,15 +132,7 @@ static void test_static_broadcasting()
|
||||
}
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_VARIADIC_TEMPLATES
|
||||
tensor.resize(11,3,5);
|
||||
#else
|
||||
array<Index, 3> new_dims;
|
||||
new_dims[0] = 11;
|
||||
new_dims[1] = 3;
|
||||
new_dims[2] = 5;
|
||||
tensor.resize(new_dims);
|
||||
#endif
|
||||
|
||||
tensor.setRandom();
|
||||
broadcast = tensor.broadcast(broadcasts);
|
||||
@@ -256,6 +232,22 @@ static void test_simple_broadcasting_n_by_one()
|
||||
}
|
||||
}
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_size_one_broadcasting()
|
||||
{
|
||||
Tensor<float, 1, DataLayout> tensor(1);
|
||||
tensor.setRandom();
|
||||
array<ptrdiff_t, 1> broadcasts = {64};
|
||||
Tensor<float, 1, DataLayout> broadcast;
|
||||
broadcast = tensor.broadcast(broadcasts);
|
||||
|
||||
VERIFY_IS_EQUAL(broadcast.dimension(0), broadcasts[0]);
|
||||
|
||||
for (int i = 0; i < broadcasts[0]; ++i) {
|
||||
VERIFY_IS_EQUAL(tensor(0), broadcast(i));
|
||||
}
|
||||
}
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_simple_broadcasting_one_by_n_by_one_1d()
|
||||
{
|
||||
@@ -328,4 +320,6 @@ EIGEN_DECLARE_TEST(cxx11_tensor_broadcasting)
|
||||
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<ColMajor>());
|
||||
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_1d<RowMajor>());
|
||||
CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<RowMajor>());
|
||||
CALL_SUBTEST(test_size_one_broadcasting<ColMajor>());
|
||||
CALL_SUBTEST(test_size_one_broadcasting<RowMajor>());
|
||||
}
|
||||
|
||||
@@ -38,24 +38,24 @@ template <typename T> T cwiseMin(T x, T y) { return cl::sycl::min(x, y); }
|
||||
}
|
||||
}
|
||||
|
||||
struct EqualAssignement {
|
||||
struct EqualAssignment {
|
||||
template <typename Lhs, typename Rhs>
|
||||
void operator()(Lhs& lhs, const Rhs& rhs) { lhs = rhs; }
|
||||
};
|
||||
|
||||
struct PlusEqualAssignement {
|
||||
struct PlusEqualAssignment {
|
||||
template <typename Lhs, typename Rhs>
|
||||
void operator()(Lhs& lhs, const Rhs& rhs) { lhs += rhs; }
|
||||
};
|
||||
|
||||
template <typename DataType, int DataLayout,
|
||||
typename Assignement, typename Operator>
|
||||
typename Assignment, typename Operator>
|
||||
void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
|
||||
const array<int64_t, 3>& tensor_range) {
|
||||
Operator op;
|
||||
Assignement asgn;
|
||||
Assignment asgn;
|
||||
{
|
||||
/* Assignement(out, Operator(in)) */
|
||||
/* Assignment(out, Operator(in)) */
|
||||
Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
|
||||
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
|
||||
in = in.random() + DataType(0.01);
|
||||
@@ -84,9 +84,10 @@ void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
|
||||
sycl_device.deallocate(gpu_data_out);
|
||||
}
|
||||
{
|
||||
/* Assignement(out, Operator(out)) */
|
||||
/* Assignment(out, Operator(out)) */
|
||||
Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
|
||||
out = out.random() + DataType(0.01);
|
||||
// Offset with 1 to avoid tiny output (< 1e-6) as they can easily fail.
|
||||
out = out.random() + DataType(1);
|
||||
Tensor<DataType, 3, DataLayout, int64_t> reference(out);
|
||||
DataType *gpu_data_out = static_cast<DataType *>(
|
||||
sycl_device.allocate(out.size() * sizeof(DataType)));
|
||||
@@ -137,11 +138,11 @@ DECLARE_UNARY_STRUCT(isnan)
|
||||
DECLARE_UNARY_STRUCT(isfinite)
|
||||
DECLARE_UNARY_STRUCT(isinf)
|
||||
|
||||
template <typename DataType, int DataLayout, typename Assignement>
|
||||
template <typename DataType, int DataLayout, typename Assignment>
|
||||
void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device,
|
||||
const array<int64_t, 3>& tensor_range) {
|
||||
#define RUN_UNARY_TEST(FUNC) \
|
||||
test_unary_builtins_for_scalar<DataType, DataLayout, Assignement, \
|
||||
test_unary_builtins_for_scalar<DataType, DataLayout, Assignment, \
|
||||
op_##FUNC>(sycl_device, tensor_range)
|
||||
RUN_UNARY_TEST(abs);
|
||||
RUN_UNARY_TEST(sqrt);
|
||||
@@ -190,9 +191,9 @@ template <typename DataType, int DataLayout>
|
||||
void test_unary_builtins(const Eigen::SyclDevice& sycl_device,
|
||||
const array<int64_t, 3>& tensor_range) {
|
||||
test_unary_builtins_for_assignement<DataType, DataLayout,
|
||||
PlusEqualAssignement>(sycl_device, tensor_range);
|
||||
PlusEqualAssignment>(sycl_device, tensor_range);
|
||||
test_unary_builtins_for_assignement<DataType, DataLayout,
|
||||
EqualAssignement>(sycl_device, tensor_range);
|
||||
EqualAssignment>(sycl_device, tensor_range);
|
||||
test_unary_builtins_return_bool<DataType, DataLayout,
|
||||
op_isnan>(sycl_device, tensor_range);
|
||||
test_unary_builtins_return_bool<DataType, DataLayout,
|
||||
|
||||
@@ -149,7 +149,7 @@ struct test_cast_runner {
|
||||
|
||||
// Only certain types allow cast from std::complex<>.
|
||||
template<typename Scalar>
|
||||
struct test_cast_runner<Scalar, typename internal::enable_if<NumTraits<Scalar>::IsComplex>::type> {
|
||||
struct test_cast_runner<Scalar, std::enable_if_t<NumTraits<Scalar>::IsComplex>> {
|
||||
static void run() {
|
||||
test_type_cast<Scalar, half>();
|
||||
test_type_cast<Scalar, bfloat16>();
|
||||
|
||||
@@ -25,10 +25,6 @@ typedef Tensor<float, 1>::DimensionPair DimPair;
|
||||
template<int DataLayout>
|
||||
void test_gpu_contraction(int m_size, int k_size, int n_size)
|
||||
{
|
||||
std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
|
||||
// with these dimensions, the output has 300 * 140 elements, which is
|
||||
// more than 30 * 1024, which is the number of threads in blocks on
|
||||
// a 15 SM GK110 GPU
|
||||
Tensor<float, 2, DataLayout> t_left(m_size, k_size);
|
||||
Tensor<float, 2, DataLayout> t_right(k_size, n_size);
|
||||
Tensor<float, 2, DataLayout> t_result(m_size, n_size);
|
||||
@@ -171,25 +167,45 @@ void test_gpu_contraction_n() {
|
||||
|
||||
template<int DataLayout>
|
||||
void test_gpu_contraction_sizes() {
|
||||
int m_sizes[] = { 31, 39, 63, 64, 65,
|
||||
127, 129, 255, 257 , 511,
|
||||
512, 513, 1023, 1024, 1025};
|
||||
int m_sizes[3][5] = {{ 31, 39, 63, 64, 65},
|
||||
{127, 129, 255, 257 , 511},
|
||||
{512, 513, 1023, 1024, 1025}};
|
||||
|
||||
int n_sizes[] = { 31, 39, 63, 64, 65,
|
||||
127, 129, 255, 257, 511,
|
||||
512, 513, 1023, 1024, 1025};
|
||||
int n_sizes[3][5] = {{ 31, 39, 63, 64, 65},
|
||||
{127, 129, 255, 257, 511},
|
||||
{512, 513, 1023, 1024, 1025}};
|
||||
|
||||
int k_sizes[] = { 31, 39, 63, 64, 65,
|
||||
95, 96, 127, 129, 255,
|
||||
257, 511, 512, 513, 1023,
|
||||
1024, 1025};
|
||||
int k_sizes[3][6] = {{ 31, 39, 63, 64, 65, 95},
|
||||
{ 96, 127, 129, 255, 257, 511},
|
||||
{512, 513, 725, 1023, 1024, 1025}};
|
||||
|
||||
for (int i = 0; i < 15; i++) {
|
||||
for (int j = 0; j < 15; j++) {
|
||||
for (int k = 0; k < 17; k++) {
|
||||
test_gpu_contraction<DataLayout>(m_sizes[i], n_sizes[j], k_sizes[k]);
|
||||
// Some selection of specific cases.
|
||||
// - m changes rows each iteration
|
||||
// - n changes rows each 3 iterations
|
||||
// - k changes rows each 9 iterations
|
||||
// - within a row, advance once column each iteration
|
||||
const int m_cols = 5;
|
||||
const int n_cols = 5;
|
||||
const int k_cols = 6;
|
||||
int m_offset = 0;
|
||||
int n_offset = 1;
|
||||
int k_offset = 2;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
for (int l = 0; l < 3; ++l) {
|
||||
int m = m_sizes[l][m_offset];
|
||||
int n = n_sizes[j][n_offset];
|
||||
int k = k_sizes[i][k_offset];
|
||||
test_gpu_contraction<DataLayout>(m, n, k);
|
||||
n_offset = (n_offset + 1) % n_cols;
|
||||
k_offset = (k_offset + 1) % k_cols;
|
||||
}
|
||||
m_offset = (m_offset + 1) % m_cols;
|
||||
if (j < 2) {
|
||||
n_offset = (n_offset + n_cols - 3) % n_cols; // Rewind 3.
|
||||
}
|
||||
}
|
||||
k_offset = (k_offset + 2 * k_cols - 9) % k_cols; // Rewind 9.
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@ using Eigen::Tensor;
|
||||
template <int DataLayout>
|
||||
static void test_map_as_index()
|
||||
{
|
||||
#ifdef EIGEN_HAS_SFINAE
|
||||
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
|
||||
tensor.setRandom();
|
||||
|
||||
@@ -35,14 +34,12 @@ static void test_map_as_index()
|
||||
|
||||
VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
|
||||
VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_matrix_as_index()
|
||||
{
|
||||
#ifdef EIGEN_HAS_SFINAE
|
||||
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
|
||||
tensor.setRandom();
|
||||
|
||||
@@ -53,14 +50,12 @@ static void test_matrix_as_index()
|
||||
|
||||
VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
|
||||
VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_varlist_as_index()
|
||||
{
|
||||
#ifdef EIGEN_HAS_SFINAE
|
||||
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
|
||||
tensor.setRandom();
|
||||
|
||||
@@ -68,14 +63,12 @@ static void test_varlist_as_index()
|
||||
|
||||
VERIFY_IS_EQUAL(tensor.coeff({1,2,4,1}), tensor.coeff(coeff));
|
||||
VERIFY_IS_EQUAL(tensor.coeffRef({1,2,4,1}), tensor.coeffRef(coeff));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <int DataLayout>
|
||||
static void test_sizes_as_index()
|
||||
{
|
||||
#ifdef EIGEN_HAS_SFINAE
|
||||
Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
|
||||
tensor.setRandom();
|
||||
|
||||
@@ -84,7 +77,6 @@ static void test_sizes_as_index()
|
||||
|
||||
VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
|
||||
VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "main.h"
|
||||
#include "OffByOneScalar.h"
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
|
||||
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
|
||||
@@ -175,6 +176,44 @@ void test_3d_convolution(Context* context)
|
||||
context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims);
|
||||
}
|
||||
|
||||
// Helper method to synchronize device.
|
||||
template<typename Device>
|
||||
void synchronize(Device& device) { /*nothing*/ }
|
||||
template<>
|
||||
void synchronize(Eigen::GpuDevice& device) {
|
||||
device.synchronize();
|
||||
}
|
||||
|
||||
template <typename DataType, typename TensorDevice>
|
||||
void test_device_memory(const TensorDevice& device) {
|
||||
int count = 100;
|
||||
Eigen::array<int, 1> tensorRange = {{count}};
|
||||
Eigen::Tensor<DataType, 1> host(tensorRange);
|
||||
Eigen::Tensor<DataType, 1> expected(tensorRange);
|
||||
DataType* device_data = static_cast<DataType*>(device.allocate(count * sizeof(DataType)));
|
||||
|
||||
// memset
|
||||
const char byte_value = static_cast<char>(0xAB);
|
||||
device.memset(device_data, byte_value, count * sizeof(DataType));
|
||||
device.memcpyDeviceToHost(host.data(), device_data, count * sizeof(DataType));
|
||||
synchronize(device);
|
||||
memset(expected.data(), byte_value, count * sizeof(DataType));
|
||||
for (size_t i=0; i<count; i++) {
|
||||
VERIFY_IS_EQUAL(host(i), expected(i));
|
||||
}
|
||||
|
||||
// fill
|
||||
DataType fill_value = DataType(7);
|
||||
std::fill_n(expected.data(), count, fill_value);
|
||||
device.fill(device_data, device_data + count, fill_value);
|
||||
device.memcpyDeviceToHost(host.data(), device_data, count * sizeof(DataType));
|
||||
synchronize(device);
|
||||
for (int i=0; i<count; i++) {
|
||||
VERIFY_IS_EQUAL(host(i), expected(i));
|
||||
}
|
||||
|
||||
device.deallocate(device_data);
|
||||
}
|
||||
|
||||
void test_cpu() {
|
||||
Eigen::Tensor<float, 3> in1(40,50,70);
|
||||
@@ -266,6 +305,9 @@ void test_cpu() {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
test_device_memory<float>(context.device());
|
||||
test_device_memory<OffByOneScalar<int>>(context.device());
|
||||
}
|
||||
|
||||
void test_gpu() {
|
||||
@@ -386,6 +428,8 @@ void test_gpu() {
|
||||
|
||||
#endif
|
||||
|
||||
test_device_memory<float>(context.device());
|
||||
test_device_memory<OffByOneScalar<int>>(context.device());
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -18,26 +18,36 @@
|
||||
#define EIGEN_USE_SYCL
|
||||
|
||||
#include "main.h"
|
||||
#include "OffByOneScalar.h"
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
#include <stdint.h>
|
||||
#include <iostream>
|
||||
|
||||
template <typename DataType, int DataLayout, typename IndexType>
|
||||
void test_device_memory(const Eigen::SyclDevice &sycl_device) {
|
||||
std::cout << "Running on : "
|
||||
<< sycl_device.sycl_queue().get_device(). template get_info<cl::sycl::info::device::name>()
|
||||
<<std::endl;
|
||||
IndexType sizeDim1 = 100;
|
||||
array<IndexType, 1> tensorRange = {{sizeDim1}};
|
||||
Tensor<DataType, 1, DataLayout,IndexType> in(tensorRange);
|
||||
Tensor<DataType, 1, DataLayout,IndexType> in1(tensorRange);
|
||||
memset(in1.data(), 1, in1.size() * sizeof(DataType));
|
||||
DataType* gpu_in_data = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType)));
|
||||
|
||||
// memset
|
||||
memset(in1.data(), 1, in1.size() * sizeof(DataType));
|
||||
sycl_device.memset(gpu_in_data, 1, in.size()*sizeof(DataType));
|
||||
sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
|
||||
for (IndexType i=0; i<in.size(); i++) {
|
||||
VERIFY_IS_EQUAL(in(i), in1(i));
|
||||
}
|
||||
|
||||
// fill
|
||||
DataType value = DataType(7);
|
||||
std::fill_n(in1.data(), in1.size(), value);
|
||||
sycl_device.fill(gpu_in_data, gpu_in_data + in.size(), value);
|
||||
sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
|
||||
for (IndexType i=0; i<in.size(); i++) {
|
||||
VERIFY_IS_EQUAL(in(i), in1(i));
|
||||
}
|
||||
|
||||
sycl_device.deallocate(gpu_in_data);
|
||||
}
|
||||
|
||||
@@ -58,6 +68,31 @@ void test_device_exceptions(const Eigen::SyclDevice &sycl_device) {
|
||||
sycl_device.deallocate(gpu_data);
|
||||
}
|
||||
|
||||
template<typename DataType, int DataLayout, typename IndexType>
|
||||
void test_device_attach_buffer(const Eigen::SyclDevice &sycl_device) {
|
||||
IndexType sizeDim1 = 100;
|
||||
|
||||
array<IndexType, 1> tensorRange = {{sizeDim1}};
|
||||
Tensor<DataType, 1, DataLayout, IndexType> in(tensorRange);
|
||||
|
||||
cl::sycl::buffer<buffer_scalar_t, 1> buffer(cl::sycl::range<1>(sizeDim1 * sizeof(DataType)));
|
||||
DataType* gpu_in_data = static_cast<DataType*>(sycl_device.attach_buffer(buffer));
|
||||
|
||||
// fill
|
||||
DataType value = DataType(7);
|
||||
std::fill_n(in.data(), in.size(), value);
|
||||
sycl_device.fill(gpu_in_data, gpu_in_data + in.size(), value);
|
||||
|
||||
// Check that buffer is filled with the correct value.
|
||||
auto reint = buffer.reinterpret<DataType>(cl::sycl::range<1>(sizeDim1));
|
||||
auto access = reint.template get_access<cl::sycl::access::mode::read>();
|
||||
for (IndexType i=0; i<in.size(); i++) {
|
||||
VERIFY_IS_EQUAL(in(i), access[i]);
|
||||
}
|
||||
|
||||
sycl_device.detach_buffer(gpu_in_data);
|
||||
}
|
||||
|
||||
template<typename DataType> void sycl_device_test_per_device(const cl::sycl::device& d){
|
||||
std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
|
||||
QueueInterface queueInterface(d);
|
||||
@@ -68,10 +103,12 @@ template<typename DataType> void sycl_device_test_per_device(const cl::sycl::dev
|
||||
//test_device_exceptions<DataType, RowMajor>(sycl_device);
|
||||
/// this test throw an exception. enable it if you want to see the exception
|
||||
//test_device_exceptions<DataType, ColMajor>(sycl_device);
|
||||
test_device_attach_buffer<DataType, ColMajor, int64_t>(sycl_device);
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(cxx11_tensor_device_sycl) {
|
||||
for (const auto& device :Eigen::get_sycl_supported_devices()) {
|
||||
CALL_SUBTEST(sycl_device_test_per_device<float>(device));
|
||||
CALL_SUBTEST(sycl_device_test_per_device<OffByOneScalar<int>>(device));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -612,43 +612,42 @@ static void test_async_execute_binary_expr(Device d)
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef EIGEN_DONT_VECTORIZE
|
||||
#define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL
|
||||
#else
|
||||
#define VECTORIZABLE(VAL) VAL
|
||||
#ifndef EIGEN_DONT_VECTORIZE
|
||||
#define EIGEN_DONT_VECTORIZE 0
|
||||
#endif
|
||||
#define VECTORIZABLE(T, VAL) !EIGEN_DONT_VECTORIZE && Eigen::internal::packet_traits<T>::Vectorizable && VAL
|
||||
|
||||
#define CALL_SUBTEST_PART(PART) \
|
||||
CALL_SUBTEST_##PART
|
||||
|
||||
#define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, ColMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, ColMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, RowMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, RowMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, ColMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, ColMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::Off, RowMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, false, TiledEvaluation::On, RowMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(default_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(tp_device)))
|
||||
|
||||
// NOTE: Currently only ThreadPoolDevice supports async expression evaluation.
|
||||
#define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS) \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::Off, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true), TiledEvaluation::On, RowMajor>(tp_device)))
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, ColMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::Off, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false, TiledEvaluation::On, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::Off, RowMajor>(tp_device))); \
|
||||
CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true), TiledEvaluation::On, RowMajor>(tp_device)))
|
||||
|
||||
EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
|
||||
Eigen::DefaultDevice default_device;
|
||||
|
||||
@@ -130,7 +130,7 @@ static void test_3d()
|
||||
Tensor<float, 3, RowMajor> mat4(2,3,7);
|
||||
mat4 = mat2 * 3.14f;
|
||||
Tensor<float, 3> mat5(2,3,7);
|
||||
mat5 = mat1.inverse().log();
|
||||
mat5 = (mat1 + mat1.constant(1)).inverse().log();
|
||||
Tensor<float, 3, RowMajor> mat6(2,3,7);
|
||||
mat6 = mat2.pow(0.5f) * 3.14f;
|
||||
Tensor<float, 3> mat7(2,3,7);
|
||||
@@ -150,7 +150,7 @@ static void test_3d()
|
||||
for (int k = 0; k < 7; ++k) {
|
||||
VERIFY_IS_APPROX(mat3(i,j,k), val + val);
|
||||
VERIFY_IS_APPROX(mat4(i,j,k), val * 3.14f);
|
||||
VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/val));
|
||||
VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/(val + 1)));
|
||||
VERIFY_IS_APPROX(mat6(i,j,k), sqrtf(val) * 3.14f);
|
||||
VERIFY_IS_APPROX(mat7(i,j,k), expf((std::max)(val, mat5(i,j,k) * 2.0f)));
|
||||
VERIFY_IS_APPROX(mat8(i,j,k), expf(-val) * 3.14f);
|
||||
@@ -305,10 +305,10 @@ void test_minmax_nan_propagation_templ() {
|
||||
const Scalar kNaN = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
const Scalar kInf = std::numeric_limits<Scalar>::infinity();
|
||||
const Scalar kZero(0);
|
||||
Tensor<Scalar, 1> vec_all_nan(size);
|
||||
Tensor<Scalar, 1> vec_full_nan(size);
|
||||
Tensor<Scalar, 1> vec_one_nan(size);
|
||||
Tensor<Scalar, 1> vec_zero(size);
|
||||
vec_all_nan.setConstant(kNaN);
|
||||
vec_full_nan.setConstant(kNaN);
|
||||
vec_zero.setZero();
|
||||
vec_one_nan.setZero();
|
||||
vec_one_nan(size/2) = kNaN;
|
||||
@@ -330,12 +330,12 @@ void test_minmax_nan_propagation_templ() {
|
||||
// max(nan, 0) = nan
|
||||
// max(0, nan) = nan
|
||||
// max(0, 0) = 0
|
||||
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(kNaN));
|
||||
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(vec_all_nan));
|
||||
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(kZero));
|
||||
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(vec_zero));
|
||||
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(kNaN));
|
||||
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(vec_full_nan));
|
||||
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(kZero));
|
||||
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(vec_zero));
|
||||
verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(kNaN));
|
||||
verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(vec_all_nan));
|
||||
verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(vec_full_nan));
|
||||
verify_all_zero(vec_zero.template cwiseMax<PropagateNaN>(kZero));
|
||||
verify_all_zero(vec_zero.template cwiseMax<PropagateNaN>(vec_zero));
|
||||
|
||||
@@ -344,12 +344,12 @@ void test_minmax_nan_propagation_templ() {
|
||||
// max(nan, 0) = 0
|
||||
// max(0, nan) = 0
|
||||
// max(0, 0) = 0
|
||||
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNumbers>(kNaN));
|
||||
verify_all_nan(vec_all_nan.template cwiseMax<PropagateNumbers>(vec_all_nan));
|
||||
verify_all_zero(vec_all_nan.template cwiseMax<PropagateNumbers>(kZero));
|
||||
verify_all_zero(vec_all_nan.template cwiseMax<PropagateNumbers>(vec_zero));
|
||||
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNumbers>(kNaN));
|
||||
verify_all_nan(vec_full_nan.template cwiseMax<PropagateNumbers>(vec_full_nan));
|
||||
verify_all_zero(vec_full_nan.template cwiseMax<PropagateNumbers>(kZero));
|
||||
verify_all_zero(vec_full_nan.template cwiseMax<PropagateNumbers>(vec_zero));
|
||||
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(kNaN));
|
||||
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_all_nan));
|
||||
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_full_nan));
|
||||
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(kZero));
|
||||
verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_zero));
|
||||
|
||||
@@ -358,12 +358,12 @@ void test_minmax_nan_propagation_templ() {
|
||||
// min(nan, 0) = nan
|
||||
// min(0, nan) = nan
|
||||
// min(0, 0) = 0
|
||||
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(kNaN));
|
||||
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(vec_all_nan));
|
||||
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(kZero));
|
||||
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(vec_zero));
|
||||
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(kNaN));
|
||||
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(vec_full_nan));
|
||||
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(kZero));
|
||||
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(vec_zero));
|
||||
verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(kNaN));
|
||||
verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(vec_all_nan));
|
||||
verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(vec_full_nan));
|
||||
verify_all_zero(vec_zero.template cwiseMin<PropagateNaN>(kZero));
|
||||
verify_all_zero(vec_zero.template cwiseMin<PropagateNaN>(vec_zero));
|
||||
|
||||
@@ -372,12 +372,12 @@ void test_minmax_nan_propagation_templ() {
|
||||
// min(nan, 0) = 0
|
||||
// min(0, nan) = 0
|
||||
// min(0, 0) = 0
|
||||
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNumbers>(kNaN));
|
||||
verify_all_nan(vec_all_nan.template cwiseMin<PropagateNumbers>(vec_all_nan));
|
||||
verify_all_zero(vec_all_nan.template cwiseMin<PropagateNumbers>(kZero));
|
||||
verify_all_zero(vec_all_nan.template cwiseMin<PropagateNumbers>(vec_zero));
|
||||
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNumbers>(kNaN));
|
||||
verify_all_nan(vec_full_nan.template cwiseMin<PropagateNumbers>(vec_full_nan));
|
||||
verify_all_zero(vec_full_nan.template cwiseMin<PropagateNumbers>(kZero));
|
||||
verify_all_zero(vec_full_nan.template cwiseMin<PropagateNumbers>(vec_zero));
|
||||
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(kNaN));
|
||||
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_all_nan));
|
||||
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_full_nan));
|
||||
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(kZero));
|
||||
verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_zero));
|
||||
|
||||
@@ -397,13 +397,13 @@ void test_minmax_nan_propagation_templ() {
|
||||
VERIFY_IS_EQUAL(val(), kZero);
|
||||
|
||||
// Test NaN propagation for tensor of all NaNs.
|
||||
val = vec_all_nan.template minimum<PropagateNaN>();
|
||||
val = vec_full_nan.template minimum<PropagateNaN>();
|
||||
VERIFY((numext::isnan)(val()));
|
||||
val = vec_all_nan.template minimum<PropagateNumbers>();
|
||||
val = vec_full_nan.template minimum<PropagateNumbers>();
|
||||
VERIFY_IS_EQUAL(val(), kInf);
|
||||
val = vec_all_nan.template maximum<PropagateNaN>();
|
||||
val = vec_full_nan.template maximum<PropagateNaN>();
|
||||
VERIFY((numext::isnan)(val()));
|
||||
val = vec_all_nan.template maximum<PropagateNumbers>();
|
||||
val = vec_full_nan.template maximum<PropagateNumbers>();
|
||||
VERIFY_IS_EQUAL(val(), -kInf);
|
||||
|
||||
// Test NaN propagation for tensor with a single NaN.
|
||||
|
||||
@@ -186,7 +186,7 @@ static void test_fft_real_input_energy() {
|
||||
}
|
||||
const DSizes<ptrdiff_t, TensorRank> arr = dimensions;
|
||||
|
||||
typedef typename internal::conditional<isComplexInput == true, std::complex<RealScalar>, RealScalar>::type InputScalar;
|
||||
typedef std::conditional_t<isComplexInput == true, std::complex<RealScalar>, RealScalar> InputScalar;
|
||||
|
||||
Tensor<InputScalar, TensorRank, DataLayout> input;
|
||||
input.resize(arr);
|
||||
@@ -197,7 +197,7 @@ static void test_fft_real_input_energy() {
|
||||
fft[i] = i;
|
||||
}
|
||||
|
||||
typedef typename internal::conditional<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar>::type OutputScalar;
|
||||
typedef std::conditional_t<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar> OutputScalar;
|
||||
Tensor<OutputScalar, TensorRank, DataLayout> output;
|
||||
output = input.template fft<FFTResultType, FFTDirection>(fft);
|
||||
|
||||
|
||||
@@ -17,8 +17,6 @@
|
||||
|
||||
#include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
|
||||
|
||||
#define EIGEN_GPU_TEST_C99_MATH EIGEN_HAS_CXX11
|
||||
|
||||
using Eigen::Tensor;
|
||||
|
||||
void test_gpu_nullary() {
|
||||
@@ -66,6 +64,47 @@ void test_gpu_nullary() {
|
||||
gpuFree(d_in2);
|
||||
}
|
||||
|
||||
// Tests that there are no indexing overflows when computing tensors with the
|
||||
// max representable size.
|
||||
template <typename IndexType,
|
||||
IndexType N = (std::numeric_limits<IndexType>::max)()>
|
||||
void test_gpu_nullary_max_size()
|
||||
{
|
||||
typedef int8_t DataType;
|
||||
typedef Tensor<DataType, 1, 0, IndexType> TensorType;
|
||||
typedef Eigen::array<IndexType, 1> ArrayType;
|
||||
|
||||
const IndexType n = N;
|
||||
TensorType in1((ArrayType(n)));
|
||||
in1.setZero();
|
||||
|
||||
std::size_t in1_bytes = in1.size() * sizeof(DataType);
|
||||
|
||||
DataType* d_in1;
|
||||
gpuMalloc((void**)(&d_in1), in1_bytes);
|
||||
|
||||
gpuMemcpy(d_in1, in1.data(), in1_bytes, gpuMemcpyHostToDevice);
|
||||
|
||||
Eigen::GpuStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
|
||||
Eigen::TensorMap<TensorType> gpu_in1(d_in1, ArrayType(n));
|
||||
|
||||
gpu_in1.device(gpu_device) = gpu_in1.constant(123);
|
||||
|
||||
TensorType new1((ArrayType(n)));
|
||||
|
||||
assert(gpuMemcpyAsync(new1.data(), d_in1, in1_bytes, gpuMemcpyDeviceToHost,
|
||||
gpu_device.stream()) == gpuSuccess);
|
||||
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
|
||||
|
||||
for (IndexType i = 0; i < n; ++i) {
|
||||
VERIFY_IS_EQUAL(new1(ArrayType(i)), 123);
|
||||
}
|
||||
|
||||
gpuFree(d_in1);
|
||||
}
|
||||
|
||||
void test_gpu_elementwise_small() {
|
||||
Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>(2));
|
||||
Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>(2));
|
||||
@@ -619,7 +658,6 @@ void test_gpu_convolution_3d()
|
||||
}
|
||||
|
||||
|
||||
#if EIGEN_GPU_TEST_C99_MATH
|
||||
template <typename Scalar>
|
||||
void test_gpu_lgamma(const Scalar stddev)
|
||||
{
|
||||
@@ -658,7 +696,6 @@ void test_gpu_lgamma(const Scalar stddev)
|
||||
gpuFree(d_in);
|
||||
gpuFree(d_out);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename Scalar>
|
||||
void test_gpu_digamma()
|
||||
@@ -681,8 +718,8 @@ void test_gpu_digamma()
|
||||
expected_out(2) = Scalar(1.2561176684318);
|
||||
expected_out(3) = Scalar(2.398239129535781);
|
||||
expected_out(4) = Scalar(9.210340372392849);
|
||||
expected_out(5) = std::numeric_limits<Scalar>::infinity();
|
||||
expected_out(6) = std::numeric_limits<Scalar>::infinity();
|
||||
expected_out(5) = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
expected_out(6) = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
|
||||
std::size_t bytes = in.size() * sizeof(Scalar);
|
||||
|
||||
@@ -704,11 +741,8 @@ void test_gpu_digamma()
|
||||
assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
|
||||
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
|
||||
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
for (int i = 5; i < 7; ++i) {
|
||||
VERIFY_IS_EQUAL(out(i), expected_out(i));
|
||||
for (int i = 0; i < 7; ++i) {
|
||||
VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
|
||||
gpuFree(d_in);
|
||||
@@ -741,7 +775,7 @@ void test_gpu_zeta()
|
||||
expected_out(0) = std::numeric_limits<Scalar>::infinity();
|
||||
expected_out(1) = Scalar(1.61237534869);
|
||||
expected_out(2) = Scalar(0.234848505667);
|
||||
expected_out(3) = Scalar(1.03086757337e-5);
|
||||
expected_out(3) = std::numeric_limits<Scalar>::quiet_NaN();
|
||||
expected_out(4) = Scalar(0.367879440865);
|
||||
expected_out(5) = Scalar(0.054102025820864097);
|
||||
|
||||
@@ -769,13 +803,8 @@ void test_gpu_zeta()
|
||||
assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
|
||||
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
|
||||
|
||||
VERIFY_IS_EQUAL(out(0), expected_out(0));
|
||||
VERIFY((std::isnan)(out(3)));
|
||||
|
||||
for (int i = 1; i < 6; ++i) {
|
||||
if (i != 3) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
|
||||
gpuFree(d_in_x);
|
||||
@@ -990,7 +1019,6 @@ void test_gpu_igammac()
|
||||
gpuFree(d_out);
|
||||
}
|
||||
|
||||
#if EIGEN_GPU_TEST_C99_MATH
|
||||
template <typename Scalar>
|
||||
void test_gpu_erf(const Scalar stddev)
|
||||
{
|
||||
@@ -1068,7 +1096,7 @@ void test_gpu_erfc(const Scalar stddev)
|
||||
gpuFree(d_in);
|
||||
gpuFree(d_out);
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename Scalar>
|
||||
void test_gpu_ndtri()
|
||||
{
|
||||
@@ -1117,13 +1145,8 @@ void test_gpu_ndtri()
|
||||
assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
|
||||
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
|
||||
|
||||
VERIFY_IS_EQUAL(out(0), expected_out(0));
|
||||
VERIFY((std::isnan)(out(3)));
|
||||
|
||||
for (int i = 1; i < 6; ++i) {
|
||||
if (i != 3) {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
for (int i = 0; i < 6; ++i) {
|
||||
VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
|
||||
gpuFree(d_in_x);
|
||||
@@ -1262,12 +1285,8 @@ void test_gpu_betainc()
|
||||
assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
|
||||
assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
|
||||
|
||||
for (int i = 1; i < 125; ++i) {
|
||||
if ((std::isnan)(expected_out(i))) {
|
||||
VERIFY((std::isnan)(out(i)));
|
||||
} else {
|
||||
VERIFY_IS_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
for (int i = 0; i < 125; ++i) {
|
||||
VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
|
||||
}
|
||||
|
||||
gpuFree(d_in_x);
|
||||
@@ -1541,6 +1560,10 @@ void test_gpu_gamma_sample_der_alpha()
|
||||
EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
|
||||
{
|
||||
CALL_SUBTEST_1(test_gpu_nullary());
|
||||
CALL_SUBTEST_1(test_gpu_nullary_max_size<int16_t>());
|
||||
CALL_SUBTEST_1(test_gpu_nullary_max_size<int32_t>());
|
||||
CALL_SUBTEST_1((test_gpu_nullary_max_size<
|
||||
int64_t, (std::numeric_limits<int32_t>::max)() + 100ll>()));
|
||||
CALL_SUBTEST_1(test_gpu_elementwise_small());
|
||||
CALL_SUBTEST_1(test_gpu_elementwise());
|
||||
CALL_SUBTEST_1(test_gpu_props());
|
||||
@@ -1560,7 +1583,6 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
|
||||
CALL_SUBTEST_3(test_gpu_convolution_3d<RowMajor>());
|
||||
#endif
|
||||
|
||||
#if EIGEN_GPU_TEST_C99_MATH
|
||||
// std::erf, std::erfc, and so on where only added in c++11. We use them
|
||||
// as a golden reference to validate the results produced by Eigen. Therefore
|
||||
// we can only run these tests if we use a c++11 compiler.
|
||||
@@ -1638,6 +1660,4 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
|
||||
CALL_SUBTEST_6(test_gpu_gamma_sample_der_alpha<float>());
|
||||
CALL_SUBTEST_6(test_gpu_gamma_sample_der_alpha<double>());
|
||||
#endif
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -11,8 +11,6 @@
|
||||
|
||||
#include <Eigen/CXX11/Tensor>
|
||||
|
||||
#ifdef EIGEN_HAS_INDEX_LIST
|
||||
|
||||
static void test_static_index_list()
|
||||
{
|
||||
Tensor<float, 4> tensor(2,3,5,7);
|
||||
@@ -26,6 +24,8 @@ static void test_static_index_list()
|
||||
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[1]), 1);
|
||||
VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[2]), 2);
|
||||
|
||||
VERIFY_IS_EQUAL(reduction_axis.size(), std::size_t(3));
|
||||
|
||||
EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_axis) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
EIGEN_STATIC_ASSERT((internal::array_get<1>(reduction_axis) == 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_axis) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE);
|
||||
@@ -370,16 +370,12 @@ static void test_dim_check()
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
EIGEN_DECLARE_TEST(cxx11_tensor_index_list)
|
||||
{
|
||||
#ifdef EIGEN_HAS_INDEX_LIST
|
||||
CALL_SUBTEST(test_static_index_list());
|
||||
CALL_SUBTEST(test_type2index_list());
|
||||
CALL_SUBTEST(test_type2indexpair_list());
|
||||
CALL_SUBTEST(test_dynamic_index_list());
|
||||
CALL_SUBTEST(test_mixed_index_list());
|
||||
CALL_SUBTEST(test_dim_check());
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -6,131 +6,137 @@
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "main.h"
|
||||
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <Eigen/CXX11/Tensor>
|
||||
|
||||
template <typename Scalar, int rank, int Layout>
|
||||
struct test_tensor_ostream_impl {};
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_output_0d()
|
||||
{
|
||||
Tensor<int, 0, DataLayout> tensor;
|
||||
tensor() = 123;
|
||||
|
||||
std::stringstream os;
|
||||
os << tensor;
|
||||
|
||||
std::string expected("123");
|
||||
VERIFY_IS_EQUAL(std::string(os.str()), expected);
|
||||
}
|
||||
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_output_1d()
|
||||
{
|
||||
Tensor<int, 1, DataLayout> tensor(5);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
tensor(i) = i;
|
||||
template<typename Scalar, int Layout>
|
||||
struct test_tensor_ostream_impl<Scalar, 0, Layout> {
|
||||
static void run() {
|
||||
Eigen::Tensor<Scalar, 0> t;
|
||||
t.setValues(1);
|
||||
std::ostringstream os;
|
||||
os << t.format(Eigen::TensorIOFormat::Plain());
|
||||
VERIFY(os.str() == "1");
|
||||
}
|
||||
};
|
||||
|
||||
std::stringstream os;
|
||||
os << tensor;
|
||||
|
||||
std::string expected("0\n1\n2\n3\n4");
|
||||
VERIFY_IS_EQUAL(std::string(os.str()), expected);
|
||||
|
||||
Eigen::Tensor<double,1,DataLayout> empty_tensor(0);
|
||||
std::stringstream empty_os;
|
||||
empty_os << empty_tensor;
|
||||
std::string empty_string;
|
||||
VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string);
|
||||
}
|
||||
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_output_2d()
|
||||
{
|
||||
Tensor<int, 2, DataLayout> tensor(5, 3);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
tensor(i, j) = i*j;
|
||||
}
|
||||
template<typename Scalar, int Layout>
|
||||
struct test_tensor_ostream_impl<Scalar, 1, Layout> {
|
||||
static void run() {
|
||||
Eigen::Tensor<Scalar, 1> t = {3};
|
||||
t.setValues({1, 2, 3});
|
||||
std::ostringstream os;
|
||||
os << t.format(Eigen::TensorIOFormat::Plain());
|
||||
VERIFY(os.str() == "1 2 3");
|
||||
}
|
||||
};
|
||||
|
||||
std::stringstream os;
|
||||
os << tensor;
|
||||
|
||||
std::string expected("0 0 0\n0 1 2\n0 2 4\n0 3 6\n0 4 8");
|
||||
VERIFY_IS_EQUAL(std::string(os.str()), expected);
|
||||
}
|
||||
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_output_expr()
|
||||
{
|
||||
Tensor<int, 1, DataLayout> tensor1(5);
|
||||
Tensor<int, 1, DataLayout> tensor2(5);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
tensor1(i) = i;
|
||||
tensor2(i) = 7;
|
||||
template<typename Scalar, int Layout>
|
||||
struct test_tensor_ostream_impl<Scalar, 2, Layout> {
|
||||
static void run() {
|
||||
Eigen::Tensor<Scalar, 2> t = {3, 2};
|
||||
t.setValues({{1, 2}, {3, 4}, {5, 6}});
|
||||
std::ostringstream os;
|
||||
os << t.format(Eigen::TensorIOFormat::Plain());
|
||||
VERIFY(os.str() == "1 2\n3 4\n5 6");
|
||||
}
|
||||
};
|
||||
|
||||
std::stringstream os;
|
||||
os << tensor1 + tensor2;
|
||||
|
||||
std::string expected(" 7\n 8\n 9\n10\n11");
|
||||
VERIFY_IS_EQUAL(std::string(os.str()), expected);
|
||||
}
|
||||
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_output_string()
|
||||
{
|
||||
Tensor<std::string, 2, DataLayout> tensor(5, 3);
|
||||
tensor.setConstant(std::string("foo"));
|
||||
|
||||
std::cout << tensor << std::endl;
|
||||
|
||||
std::stringstream os;
|
||||
os << tensor;
|
||||
|
||||
std::string expected("foo foo foo\nfoo foo foo\nfoo foo foo\nfoo foo foo\nfoo foo foo");
|
||||
VERIFY_IS_EQUAL(std::string(os.str()), expected);
|
||||
}
|
||||
|
||||
|
||||
template<int DataLayout>
|
||||
static void test_output_const()
|
||||
{
|
||||
Tensor<int, 1, DataLayout> tensor(5);
|
||||
for (int i = 0; i < 5; ++i) {
|
||||
tensor(i) = i;
|
||||
template<typename Scalar, int Layout>
|
||||
struct test_tensor_ostream_impl<Scalar, 3, Layout> {
|
||||
static void run() {
|
||||
Eigen::Tensor<Scalar, 3> t = {4, 3, 2};
|
||||
t.setValues({{{1, 2}, {3, 4}, {5, 6}},
|
||||
{{7, 8}, {9, 10}, {11, 12}},
|
||||
{{13, 14}, {15, 16}, {17, 18}},
|
||||
{{19, 20}, {21, 22}, {23, 24}}});
|
||||
std::ostringstream os;
|
||||
os << t.format(Eigen::TensorIOFormat::Plain());
|
||||
VERIFY(os.str() == " 1 2\n 3 4\n 5 6\n\n 7 8\n 9 10\n11 12\n\n13 14\n15 16\n17 18\n\n19 20\n21 22\n23 24");
|
||||
}
|
||||
};
|
||||
|
||||
TensorMap<Tensor<const int, 1, DataLayout> > tensor_map(tensor.data(), 5);
|
||||
template<int Layout>
|
||||
struct test_tensor_ostream_impl<bool, 2, Layout> {
|
||||
static void run() {
|
||||
Eigen::Tensor<bool, 2> t = {3, 2};
|
||||
t.setValues({{false, true}, {true, false}, {false, false}});
|
||||
std::ostringstream os;
|
||||
os << t.format(Eigen::TensorIOFormat::Plain());
|
||||
VERIFY(os.str() == "0 1\n1 0\n0 0");
|
||||
}
|
||||
};
|
||||
|
||||
std::stringstream os;
|
||||
os << tensor_map;
|
||||
template<typename Scalar, int Layout>
|
||||
struct test_tensor_ostream_impl<std::complex<Scalar>, 2, Layout> {
|
||||
static void run() {
|
||||
Eigen::Tensor<std::complex<Scalar>, 2> t = {3, 2};
|
||||
t.setValues({{std::complex<Scalar>(1, 2), std::complex<Scalar>(12, 3)},
|
||||
{std::complex<Scalar>(-4, 2), std::complex<Scalar>(0, 5)},
|
||||
{std::complex<Scalar>(-1, 4), std::complex<Scalar>(5, 27)}});
|
||||
std::ostringstream os;
|
||||
os << t.format(Eigen::TensorIOFormat::Plain());
|
||||
VERIFY(os.str() == " (1,2) (12,3)\n(-4,2) (0,5)\n(-1,4) (5,27)");
|
||||
}
|
||||
};
|
||||
|
||||
std::string expected("0\n1\n2\n3\n4");
|
||||
VERIFY_IS_EQUAL(std::string(os.str()), expected);
|
||||
template <typename Scalar, int rank, int Layout>
|
||||
void test_tensor_ostream() {
|
||||
test_tensor_ostream_impl<Scalar, rank, Layout>::run();
|
||||
}
|
||||
|
||||
|
||||
EIGEN_DECLARE_TEST(cxx11_tensor_io)
|
||||
{
|
||||
CALL_SUBTEST(test_output_0d<ColMajor>());
|
||||
CALL_SUBTEST(test_output_0d<RowMajor>());
|
||||
CALL_SUBTEST(test_output_1d<ColMajor>());
|
||||
CALL_SUBTEST(test_output_1d<RowMajor>());
|
||||
CALL_SUBTEST(test_output_2d<ColMajor>());
|
||||
CALL_SUBTEST(test_output_2d<RowMajor>());
|
||||
CALL_SUBTEST(test_output_expr<ColMajor>());
|
||||
CALL_SUBTEST(test_output_expr<RowMajor>());
|
||||
CALL_SUBTEST(test_output_string<ColMajor>());
|
||||
CALL_SUBTEST(test_output_string<RowMajor>());
|
||||
CALL_SUBTEST(test_output_const<ColMajor>());
|
||||
CALL_SUBTEST(test_output_const<RowMajor>());
|
||||
void test_const_tensor_ostream() {
|
||||
Eigen::Tensor<float, 0> t;
|
||||
t.setValues(1);
|
||||
const Eigen::TensorMap<Eigen::Tensor<const float, 0, Eigen::RowMajor>, Eigen::Unaligned> t_const(
|
||||
t.data(), Eigen::DSizes<Eigen::DenseIndex, 0>{});
|
||||
std::ostringstream os;
|
||||
os << t_const.format(Eigen::TensorIOFormat::Plain());
|
||||
VERIFY(os.str() == "1");
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(cxx11_tensor_io) {
|
||||
CALL_SUBTEST((test_tensor_ostream<float, 0, Eigen::ColMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<float, 1, Eigen::ColMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<float, 2, Eigen::ColMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<float, 3, Eigen::ColMajor>()));
|
||||
|
||||
CALL_SUBTEST((test_tensor_ostream<double, 0, Eigen::ColMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<double, 1, Eigen::ColMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<double, 2, Eigen::ColMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<double, 3, Eigen::ColMajor>()));
|
||||
|
||||
CALL_SUBTEST((test_tensor_ostream<int, 0, Eigen::ColMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<int, 1, Eigen::ColMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<int, 2, Eigen::ColMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<int, 3, Eigen::ColMajor>()));
|
||||
|
||||
CALL_SUBTEST((test_tensor_ostream<float, 0, Eigen::RowMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<float, 1, Eigen::RowMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<float, 2, Eigen::RowMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<float, 3, Eigen::RowMajor>()));
|
||||
|
||||
CALL_SUBTEST((test_tensor_ostream<double, 0, Eigen::RowMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<double, 1, Eigen::RowMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<double, 2, Eigen::RowMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<double, 3, Eigen::RowMajor>()));
|
||||
|
||||
CALL_SUBTEST((test_tensor_ostream<int, 0, Eigen::RowMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<int, 1, Eigen::RowMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<int, 2, Eigen::RowMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<int, 3, Eigen::RowMajor>()));
|
||||
|
||||
CALL_SUBTEST((test_tensor_ostream<bool, 2, Eigen::ColMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<bool, 2, Eigen::RowMajor>()));
|
||||
|
||||
CALL_SUBTEST((test_tensor_ostream<std::complex<double>, 2, Eigen::ColMajor>()));
|
||||
CALL_SUBTEST((test_tensor_ostream<std::complex<float>, 2, Eigen::ColMajor>()));
|
||||
|
||||
// Test printing TensorMap with const elements.
|
||||
CALL_SUBTEST((test_const_tensor_ostream()));
|
||||
}
|
||||
|
||||
@@ -43,7 +43,6 @@ static void test_simple_reshape()
|
||||
|
||||
template <typename>
|
||||
static void test_static_reshape() {
|
||||
#if defined(EIGEN_HAS_INDEX_LIST)
|
||||
using Eigen::type2index;
|
||||
|
||||
Tensor<float, 5> tensor(2, 3, 1, 7, 1);
|
||||
@@ -60,7 +59,6 @@ static void test_static_reshape() {
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename>
|
||||
|
||||
487
libs/eigen/unsupported/test/cxx11_tensor_of_bfloat16_gpu.cu
Normal file
487
libs/eigen/unsupported/test/cxx11_tensor_of_bfloat16_gpu.cu
Normal file
@@ -0,0 +1,487 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2021 Rohit Santhanam <rohit.santhanam@amd.com>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#define EIGEN_TEST_NO_LONGDOUBLE
|
||||
#define EIGEN_TEST_NO_COMPLEX
|
||||
|
||||
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
|
||||
#define EIGEN_USE_GPU
|
||||
|
||||
#include "main.h"
|
||||
#include <unsupported/Eigen/CXX11/Tensor>
|
||||
|
||||
|
||||
using Eigen::Tensor;
|
||||
|
||||
template<typename>
|
||||
void test_gpu_numext() {
|
||||
Eigen::GpuStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
int num_elem = 101;
|
||||
|
||||
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
bool* d_res_bfloat16 = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
|
||||
bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
|
||||
d_float, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_bfloat16(
|
||||
d_res_bfloat16, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_float(
|
||||
d_res_float, num_elem);
|
||||
|
||||
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
|
||||
gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>());
|
||||
// Test bfloat16 specific isnan op.
|
||||
gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().unaryExpr(Eigen::internal::scalar_isnan_op<Eigen::bfloat16>());
|
||||
|
||||
Tensor<bool, 1> bfloat16_prec(num_elem);
|
||||
Tensor<bool, 1> full_prec(num_elem);
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(bool));
|
||||
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool));
|
||||
gpu_device.synchronize();
|
||||
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
VERIFY_IS_EQUAL(full_prec(i), bfloat16_prec(i));
|
||||
}
|
||||
|
||||
gpu_device.deallocate(d_float);
|
||||
gpu_device.deallocate(d_res_bfloat16);
|
||||
gpu_device.deallocate(d_res_float);
|
||||
}
|
||||
|
||||
|
||||
#ifdef EIGEN_HAS_GPU_BF16
|
||||
|
||||
template<typename>
|
||||
void test_gpu_conversion() {
|
||||
Eigen::GpuStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
int num_elem = 101;
|
||||
|
||||
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
Eigen::bfloat16* d_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
|
||||
float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
|
||||
d_float, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_bfloat16(
|
||||
d_bfloat16, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
|
||||
d_conv, num_elem);
|
||||
|
||||
gpu_float.device(gpu_device) = gpu_float.random();
|
||||
gpu_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>();
|
||||
gpu_conv.device(gpu_device) = gpu_bfloat16.cast<float>();
|
||||
|
||||
Tensor<float, 1> initial(num_elem);
|
||||
Tensor<float, 1> final(num_elem);
|
||||
gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
|
||||
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
VERIFY_IS_APPROX(static_cast<Eigen::bfloat16>(initial(i)), static_cast<Eigen::bfloat16>(final(i)));
|
||||
}
|
||||
|
||||
gpu_device.deallocate(d_float);
|
||||
gpu_device.deallocate(d_bfloat16);
|
||||
gpu_device.deallocate(d_conv);
|
||||
}
|
||||
|
||||
template<typename>
|
||||
void test_gpu_unary() {
|
||||
Eigen::GpuStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
int num_elem = 101;
|
||||
|
||||
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res_bfloat16 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
|
||||
d_float, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16(
|
||||
d_res_bfloat16, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
|
||||
d_res_float, num_elem);
|
||||
|
||||
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
|
||||
gpu_float.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().cast<float>();
|
||||
gpu_res_float.device(gpu_device) = gpu_float.abs();
|
||||
gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().cast<float>();
|
||||
|
||||
Tensor<float, 1> bfloat16_prec(num_elem);
|
||||
Tensor<float, 1> full_prec(num_elem);
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
|
||||
gpu_device.synchronize();
|
||||
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
VERIFY_IS_APPROX(full_prec(i), bfloat16_prec(i));
|
||||
}
|
||||
|
||||
gpu_device.deallocate(d_float);
|
||||
gpu_device.deallocate(d_res_bfloat16);
|
||||
gpu_device.deallocate(d_res_float);
|
||||
}
|
||||
|
||||
template<typename>
|
||||
void test_gpu_elementwise() {
|
||||
Eigen::GpuStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
int num_elem = 101;
|
||||
|
||||
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res_bfloat16 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
|
||||
d_float1, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
|
||||
d_float2, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16(
|
||||
d_res_bfloat16, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
|
||||
d_res_float, num_elem);
|
||||
|
||||
gpu_float1.device(gpu_device) = gpu_float1.random();
|
||||
gpu_float1.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().cast<float>();
|
||||
gpu_float2.device(gpu_device) = gpu_float2.random();
|
||||
gpu_float2.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>().cast<float>();
|
||||
gpu_res_float.device(gpu_device) = (gpu_float1 + gpu_float2) * gpu_float1;
|
||||
gpu_res_bfloat16.device(gpu_device) = ((gpu_float1.cast<Eigen::bfloat16>() + gpu_float2.cast<Eigen::bfloat16>()) * gpu_float1.cast<Eigen::bfloat16>()).cast<float>();
|
||||
|
||||
Tensor<float, 1> bfloat16_prec(num_elem);
|
||||
Tensor<float, 1> full_prec(num_elem);
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
|
||||
gpu_device.synchronize();
|
||||
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
VERIFY_IS_APPROX(static_cast<Eigen::bfloat16>(full_prec(i)), static_cast<Eigen::bfloat16>(bfloat16_prec(i)));
|
||||
}
|
||||
|
||||
gpu_device.deallocate(d_float1);
|
||||
gpu_device.deallocate(d_float2);
|
||||
gpu_device.deallocate(d_res_bfloat16);
|
||||
gpu_device.deallocate(d_res_float);
|
||||
}
|
||||
|
||||
template<typename>
|
||||
void test_gpu_trancendental() {
|
||||
Eigen::GpuStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
int num_elem = 101;
|
||||
|
||||
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_float3 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
Eigen::bfloat16* d_res1_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
|
||||
Eigen::bfloat16* d_res1_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
|
||||
Eigen::bfloat16* d_res2_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
|
||||
Eigen::bfloat16* d_res2_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
|
||||
Eigen::bfloat16* d_res3_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
|
||||
Eigen::bfloat16* d_res3_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(d_float1, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(d_float2, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float3(d_float3, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res1_bfloat16(d_res1_bfloat16, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res1_float(d_res1_float, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res2_bfloat16(d_res2_bfloat16, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res3_bfloat16(d_res3_bfloat16, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res4_bfloat16(d_res3_bfloat16, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res4_float(d_res3_float, num_elem);
|
||||
|
||||
gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
|
||||
gpu_float1.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().cast<float>();
|
||||
gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
|
||||
gpu_float2.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>().cast<float>();
|
||||
gpu_float3.device(gpu_device) = gpu_float3.random();
|
||||
gpu_float3.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>().cast<float>();
|
||||
gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::bfloat16>();
|
||||
gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::bfloat16>();
|
||||
gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::bfloat16>();
|
||||
gpu_res4_float.device(gpu_device) = gpu_float3.expm1().cast<Eigen::bfloat16>();
|
||||
|
||||
gpu_res1_bfloat16.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>();
|
||||
gpu_res1_bfloat16.device(gpu_device) = gpu_res1_bfloat16.exp();
|
||||
|
||||
gpu_res2_bfloat16.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>();
|
||||
gpu_res2_bfloat16.device(gpu_device) = gpu_res2_bfloat16.log();
|
||||
|
||||
gpu_res3_bfloat16.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>();
|
||||
gpu_res3_bfloat16.device(gpu_device) = gpu_res3_bfloat16.log1p();
|
||||
|
||||
gpu_res3_bfloat16.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>();
|
||||
gpu_res3_bfloat16.device(gpu_device) = gpu_res3_bfloat16.expm1();
|
||||
|
||||
Tensor<float, 1> input1(num_elem);
|
||||
Tensor<Eigen::bfloat16, 1> bfloat16_prec1(num_elem);
|
||||
Tensor<Eigen::bfloat16, 1> full_prec1(num_elem);
|
||||
Tensor<float, 1> input2(num_elem);
|
||||
Tensor<Eigen::bfloat16, 1> bfloat16_prec2(num_elem);
|
||||
Tensor<Eigen::bfloat16, 1> full_prec2(num_elem);
|
||||
Tensor<float, 1> input3(num_elem);
|
||||
Tensor<Eigen::bfloat16, 1> bfloat16_prec3(num_elem);
|
||||
Tensor<Eigen::bfloat16, 1> full_prec3(num_elem);
|
||||
gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(input3.data(), d_float3, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec1.data(), d_res1_bfloat16, num_elem*sizeof(Eigen::bfloat16));
|
||||
gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::bfloat16));
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec2.data(), d_res2_bfloat16, num_elem*sizeof(Eigen::bfloat16));
|
||||
gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::bfloat16));
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec3.data(), d_res3_bfloat16, num_elem*sizeof(Eigen::bfloat16));
|
||||
gpu_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float, num_elem*sizeof(Eigen::bfloat16));
|
||||
gpu_device.synchronize();
|
||||
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
VERIFY_IS_APPROX(full_prec1(i), bfloat16_prec1(i));
|
||||
}
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
if(std::abs(input2(i)-1.f)<0.05f) // log lacks accuracy nearby 1
|
||||
VERIFY_IS_APPROX(full_prec2(i)+Eigen::bfloat16(0.1f), bfloat16_prec2(i)+Eigen::bfloat16(0.1f));
|
||||
else
|
||||
VERIFY_IS_APPROX(full_prec2(i), bfloat16_prec2(i));
|
||||
}
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
VERIFY_IS_APPROX(full_prec3(i), bfloat16_prec3(i));
|
||||
}
|
||||
gpu_device.deallocate(d_float1);
|
||||
gpu_device.deallocate(d_float2);
|
||||
gpu_device.deallocate(d_float3);
|
||||
gpu_device.deallocate(d_res1_bfloat16);
|
||||
gpu_device.deallocate(d_res1_float);
|
||||
gpu_device.deallocate(d_res2_bfloat16);
|
||||
gpu_device.deallocate(d_res2_float);
|
||||
gpu_device.deallocate(d_res3_float);
|
||||
gpu_device.deallocate(d_res3_bfloat16);
|
||||
}
|
||||
|
||||
template<typename>
|
||||
void test_gpu_contractions() {
|
||||
Eigen::GpuStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
int rows = 23;
|
||||
int cols = 23;
|
||||
int num_elem = rows*cols;
|
||||
|
||||
float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
|
||||
Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
|
||||
d_float1, rows, cols);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
|
||||
d_float2, rows, cols);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 2>, Eigen::Aligned> gpu_res_bfloat16(
|
||||
d_res_bfloat16, rows, cols);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 2>, Eigen::Aligned> gpu_res_float(
|
||||
d_res_float, rows, cols);
|
||||
|
||||
gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
|
||||
gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f);
|
||||
|
||||
typedef Tensor<float, 2>::DimensionPair DimPair;
|
||||
Eigen::array<DimPair, 1> dims(DimPair(1, 0));
|
||||
gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::bfloat16>();
|
||||
gpu_res_bfloat16.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().contract(gpu_float2.cast<Eigen::bfloat16>(), dims);
|
||||
|
||||
Tensor<Eigen::bfloat16, 2> bfloat16_prec(rows, cols);
|
||||
Tensor<Eigen::bfloat16, 2> full_prec(rows, cols);
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(Eigen::bfloat16));
|
||||
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(Eigen::bfloat16));
|
||||
gpu_device.synchronize();
|
||||
|
||||
for (int i = 0; i < rows; ++i) {
|
||||
for (int j = 0; j < cols; ++j) {
|
||||
if (numext::abs(full_prec(i, j) - bfloat16_prec(i, j)) > Eigen::bfloat16(1e-2f)) {
|
||||
VERIFY_IS_APPROX(full_prec(i, j), bfloat16_prec(i, j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
gpu_device.deallocate(d_float1);
|
||||
gpu_device.deallocate(d_float2);
|
||||
gpu_device.deallocate(d_res_bfloat16);
|
||||
gpu_device.deallocate(d_res_float);
|
||||
}
|
||||
|
||||
template<typename>
|
||||
void test_gpu_reductions(int size1, int size2, int redux) {
|
||||
Eigen::GpuStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
int num_elem = size1*size2;
|
||||
int result_size = (redux == 1 ? size1 : size2);
|
||||
|
||||
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(result_size * sizeof(Eigen::bfloat16));
|
||||
Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(result_size * sizeof(Eigen::bfloat16));
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
|
||||
d_float, size1, size2);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res_bfloat16(
|
||||
d_res_bfloat16, result_size);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res_float(
|
||||
d_res_float, result_size);
|
||||
|
||||
gpu_float.device(gpu_device) = gpu_float.random() * 2.0f;
|
||||
|
||||
Eigen::array<int, 1> redux_dim = {redux};
|
||||
gpu_res_float.device(gpu_device) = gpu_float.sum(redux_dim).cast<Eigen::bfloat16>();
|
||||
gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().sum(redux_dim);
|
||||
|
||||
Tensor<Eigen::bfloat16, 1> bfloat16_prec(result_size);
|
||||
Tensor<Eigen::bfloat16, 1> full_prec(result_size);
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, result_size*sizeof(Eigen::bfloat16));
|
||||
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, result_size*sizeof(Eigen::bfloat16));
|
||||
gpu_device.synchronize();
|
||||
|
||||
for (int i = 0; i < result_size; ++i) {
|
||||
VERIFY_IS_APPROX(full_prec(i), bfloat16_prec(i));
|
||||
}
|
||||
|
||||
gpu_device.deallocate(d_float);
|
||||
gpu_device.deallocate(d_res_bfloat16);
|
||||
gpu_device.deallocate(d_res_float);
|
||||
}
|
||||
|
||||
template<typename>
|
||||
void test_gpu_reductions() {
|
||||
test_gpu_reductions<void>(13, 13, 0);
|
||||
test_gpu_reductions<void>(13, 13, 1);
|
||||
|
||||
test_gpu_reductions<void>(35, 36, 0);
|
||||
test_gpu_reductions<void>(35, 36, 1);
|
||||
|
||||
test_gpu_reductions<void>(36, 35, 0);
|
||||
test_gpu_reductions<void>(36, 35, 1);
|
||||
}
|
||||
|
||||
template<typename>
|
||||
void test_gpu_full_reductions() {
|
||||
Eigen::GpuStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
int size = 13;
|
||||
int num_elem = size*size;
|
||||
|
||||
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(1 * sizeof(Eigen::bfloat16));
|
||||
Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(1 * sizeof(Eigen::bfloat16));
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
|
||||
d_float, size, size);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 0>, Eigen::Aligned> gpu_res_bfloat16(
|
||||
d_res_bfloat16);
|
||||
Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 0>, Eigen::Aligned> gpu_res_float(
|
||||
d_res_float);
|
||||
|
||||
gpu_float.device(gpu_device) = gpu_float.random();
|
||||
|
||||
gpu_res_float.device(gpu_device) = gpu_float.sum().cast<Eigen::bfloat16>();
|
||||
gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().sum();
|
||||
|
||||
Tensor<Eigen::bfloat16, 0> bfloat16_prec;
|
||||
Tensor<Eigen::bfloat16, 0> full_prec;
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, sizeof(Eigen::bfloat16));
|
||||
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::bfloat16));
|
||||
gpu_device.synchronize();
|
||||
|
||||
VERIFY_IS_APPROX(full_prec(), bfloat16_prec());
|
||||
|
||||
gpu_res_float.device(gpu_device) = gpu_float.maximum().cast<Eigen::bfloat16>();
|
||||
gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().maximum();
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, sizeof(Eigen::bfloat16));
|
||||
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::bfloat16));
|
||||
gpu_device.synchronize();
|
||||
|
||||
VERIFY_IS_APPROX(full_prec(), bfloat16_prec());
|
||||
|
||||
gpu_device.deallocate(d_float);
|
||||
gpu_device.deallocate(d_res_bfloat16);
|
||||
gpu_device.deallocate(d_res_float);
|
||||
}
|
||||
|
||||
template<typename>
|
||||
void test_gpu_forced_evals() {
|
||||
|
||||
Eigen::GpuStreamDevice stream;
|
||||
Eigen::GpuDevice gpu_device(&stream);
|
||||
int num_elem = 101;
|
||||
|
||||
float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res_bfloat16_1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res_bfloat16_2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
|
||||
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
|
||||
d_float, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16_1(
|
||||
d_res_bfloat16_1, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_bfloat16_2(
|
||||
d_res_bfloat16_2, num_elem);
|
||||
Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
|
||||
d_res_float, num_elem);
|
||||
|
||||
Eigen::array<int, 1> no_bcast;
|
||||
no_bcast[0] = 1;
|
||||
|
||||
gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
|
||||
gpu_float.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().cast<float>();
|
||||
gpu_res_float.device(gpu_device) = gpu_float.abs();
|
||||
gpu_res_bfloat16_1.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().eval().cast<float>();
|
||||
gpu_res_bfloat16_2.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().broadcast(no_bcast).eval().cast<float>();
|
||||
|
||||
Tensor<float, 1> bfloat16_prec1(num_elem);
|
||||
Tensor<float, 1> bfloat16_prec2(num_elem);
|
||||
Tensor<float, 1> full_prec(num_elem);
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec1.data(), d_res_bfloat16_1, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(bfloat16_prec2.data(), d_res_bfloat16_2, num_elem*sizeof(float));
|
||||
gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
|
||||
gpu_device.synchronize();
|
||||
|
||||
for (int i = 0; i < num_elem; ++i) {
|
||||
VERIFY_IS_APPROX(full_prec(i), bfloat16_prec1(i));
|
||||
VERIFY_IS_APPROX(full_prec(i), bfloat16_prec2(i));
|
||||
}
|
||||
|
||||
gpu_device.deallocate(d_float);
|
||||
gpu_device.deallocate(d_res_bfloat16_1);
|
||||
gpu_device.deallocate(d_res_bfloat16_2);
|
||||
gpu_device.deallocate(d_res_float);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
EIGEN_DECLARE_TEST(cxx11_tensor_of_bfloat16_gpu)
|
||||
{
|
||||
CALL_SUBTEST_1(test_gpu_numext<void>());
|
||||
|
||||
// The reduction unit tests have been excluded until a working
|
||||
// implementation to expand the accumulator data type to float32
|
||||
// is available.
|
||||
// TODO: add reduction unit tests
|
||||
#ifdef EIGEN_HAS_GPU_BF16
|
||||
CALL_SUBTEST_2(test_gpu_conversion<void>());
|
||||
CALL_SUBTEST_3(test_gpu_unary<void>());
|
||||
CALL_SUBTEST_4(test_gpu_elementwise<void>());
|
||||
CALL_SUBTEST_5(test_gpu_trancendental<void>());
|
||||
CALL_SUBTEST_6(test_gpu_contractions<void>());
|
||||
CALL_SUBTEST_7(test_gpu_reductions<void>());
|
||||
CALL_SUBTEST_8(test_gpu_full_reductions<void>());
|
||||
CALL_SUBTEST_9(test_gpu_forced_evals<void>());
|
||||
#else
|
||||
std::cout << "bfloat16 floats are not supported by this version of gpu: skipping the test" << std::endl;
|
||||
#endif
|
||||
}
|
||||
@@ -47,6 +47,20 @@ static void test_abs()
|
||||
}
|
||||
}
|
||||
|
||||
static void test_arg()
|
||||
{
|
||||
Tensor<std::complex<float>, 1> data1(3);
|
||||
Tensor<std::complex<double>, 1> data2(3);
|
||||
data1.setRandom();
|
||||
data2.setRandom();
|
||||
|
||||
Tensor<float, 1> arg1 = data1.arg();
|
||||
Tensor<double, 1> arg2 = data2.arg();
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
VERIFY_IS_APPROX(arg1(i), std::arg(data1(i)));
|
||||
VERIFY_IS_APPROX(arg2(i), std::arg(data2(i)));
|
||||
}
|
||||
}
|
||||
|
||||
static void test_conjugate()
|
||||
{
|
||||
@@ -98,6 +112,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_of_complex)
|
||||
{
|
||||
CALL_SUBTEST(test_additions());
|
||||
CALL_SUBTEST(test_abs());
|
||||
CALL_SUBTEST(test_arg());
|
||||
CALL_SUBTEST(test_conjugate());
|
||||
CALL_SUBTEST(test_contractions());
|
||||
}
|
||||
|
||||
@@ -37,14 +37,8 @@ static void test_sycl_random_uniform(const Eigen::SyclDevice& sycl_device)
|
||||
|
||||
gpu_out.device(sycl_device)=gpu_out.random();
|
||||
sycl_device.memcpyDeviceToHost(out.data(), d_out,out_bytes);
|
||||
for(IndexType i=1; i<sizeDim0; i++)
|
||||
for(IndexType j=1; j<sizeDim1; j++)
|
||||
{
|
||||
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j));
|
||||
VERIFY_IS_NOT_EQUAL(out(i,j), out(i,j-1));
|
||||
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j-1)); }
|
||||
|
||||
// For now we just check thes code doesn't crash.
|
||||
// For now we just check the code doesn't crash.
|
||||
// TODO: come up with a valid test of randomness
|
||||
sycl_device.deallocate(d_out);
|
||||
}
|
||||
@@ -66,16 +60,8 @@ void test_sycl_random_normal(const Eigen::SyclDevice& sycl_device)
|
||||
Eigen::internal::NormalRandomGenerator<DataType> gen(true);
|
||||
gpu_out.device(sycl_device)=gpu_out.random(gen);
|
||||
sycl_device.memcpyDeviceToHost(out.data(), d_out,out_bytes);
|
||||
for(IndexType i=1; i<sizeDim0; i++)
|
||||
for(IndexType j=1; j<sizeDim1; j++)
|
||||
{
|
||||
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j));
|
||||
VERIFY_IS_NOT_EQUAL(out(i,j), out(i,j-1));
|
||||
VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j-1));
|
||||
|
||||
}
|
||||
|
||||
// For now we just check thes code doesn't crash.
|
||||
// For now we just check the code doesn't crash.
|
||||
// TODO: come up with a valid test of randomness
|
||||
sycl_device.deallocate(d_out);
|
||||
}
|
||||
|
||||
@@ -370,13 +370,7 @@ static void test_static_dims() {
|
||||
Tensor<float, 2, DataLayout> out(72, 97);
|
||||
in.setRandom();
|
||||
|
||||
#if !EIGEN_HAS_CONSTEXPR
|
||||
array<int, 2> reduction_axis;
|
||||
reduction_axis[0] = 1;
|
||||
reduction_axis[1] = 3;
|
||||
#else
|
||||
Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<3> > reduction_axis;
|
||||
#endif
|
||||
|
||||
out = in.maximum(reduction_axis);
|
||||
|
||||
@@ -400,14 +394,8 @@ static void test_innermost_last_dims() {
|
||||
in.setRandom();
|
||||
|
||||
// Reduce on the innermost dimensions.
|
||||
#if !EIGEN_HAS_CONSTEXPR
|
||||
array<int, 2> reduction_axis;
|
||||
reduction_axis[0] = 0;
|
||||
reduction_axis[1] = 1;
|
||||
#else
|
||||
// This triggers the use of packets for ColMajor.
|
||||
Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1> > reduction_axis;
|
||||
#endif
|
||||
|
||||
out = in.maximum(reduction_axis);
|
||||
|
||||
@@ -431,14 +419,8 @@ static void test_innermost_first_dims() {
|
||||
in.setRandom();
|
||||
|
||||
// Reduce on the innermost dimensions.
|
||||
#if !EIGEN_HAS_CONSTEXPR
|
||||
array<int, 2> reduction_axis;
|
||||
reduction_axis[0] = 2;
|
||||
reduction_axis[1] = 3;
|
||||
#else
|
||||
// This triggers the use of packets for RowMajor.
|
||||
Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>> reduction_axis;
|
||||
#endif
|
||||
|
||||
out = in.maximum(reduction_axis);
|
||||
|
||||
@@ -462,14 +444,8 @@ static void test_reduce_middle_dims() {
|
||||
in.setRandom();
|
||||
|
||||
// Reduce on the innermost dimensions.
|
||||
#if !EIGEN_HAS_CONSTEXPR
|
||||
array<int, 2> reduction_axis;
|
||||
reduction_axis[0] = 1;
|
||||
reduction_axis[1] = 2;
|
||||
#else
|
||||
// This triggers the use of packets for RowMajor.
|
||||
Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>> reduction_axis;
|
||||
#endif
|
||||
|
||||
out = in.maximum(reduction_axis);
|
||||
|
||||
@@ -486,22 +462,31 @@ static void test_reduce_middle_dims() {
|
||||
}
|
||||
}
|
||||
|
||||
static void test_sum_accuracy() {
|
||||
Tensor<float, 3> tensor(101, 101, 101);
|
||||
for (float prescribed_mean : {1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f}) {
|
||||
tensor.setRandom();
|
||||
tensor += tensor.constant(prescribed_mean);
|
||||
template <typename ScalarType, int num_elements, int max_mean>
|
||||
void test_sum_accuracy() {
|
||||
Tensor<double, 1> double_tensor(num_elements);
|
||||
Tensor<ScalarType, 1> tensor(num_elements);
|
||||
for (double prescribed_mean = 0; prescribed_mean <= max_mean; prescribed_mean = numext::maxi(1.0, prescribed_mean*3.99)) {
|
||||
// FIXME: NormalRandomGenerator doesn't work in bfloat and half.
|
||||
double_tensor.setRandom<Eigen::internal::NormalRandomGenerator<double>>();
|
||||
double_tensor += double_tensor.constant(prescribed_mean);
|
||||
tensor = double_tensor.cast<ScalarType>();
|
||||
|
||||
Tensor<float, 0> sum = tensor.sum();
|
||||
Tensor<ScalarType, 0> sum;
|
||||
sum = tensor.sum();
|
||||
|
||||
// Compute the reference value in double precsion.
|
||||
double expected_sum = 0.0;
|
||||
for (int i = 0; i < 101; ++i) {
|
||||
for (int j = 0; j < 101; ++j) {
|
||||
for (int k = 0; k < 101; ++k) {
|
||||
expected_sum += static_cast<double>(tensor(i, j, k));
|
||||
}
|
||||
}
|
||||
double abs_sum = 0.0;
|
||||
for (int i = 0; i < num_elements; ++i) {
|
||||
expected_sum += static_cast<double>(tensor(i));
|
||||
abs_sum += static_cast<double>(numext::abs(tensor(i)));
|
||||
}
|
||||
VERIFY_IS_APPROX(sum(), static_cast<float>(expected_sum));
|
||||
// Test against probabilistic forward error bound. In reality, the error is much smaller
|
||||
// when we use tree summation.
|
||||
double err = Eigen::numext::abs(static_cast<double>(sum()) - expected_sum);
|
||||
double tol = numext::sqrt(num_elements) * NumTraits<ScalarType>::epsilon() * static_cast<ScalarType>(abs_sum);
|
||||
VERIFY_LE(err, tol);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -528,5 +513,11 @@ EIGEN_DECLARE_TEST(cxx11_tensor_reduction) {
|
||||
CALL_SUBTEST(test_innermost_first_dims<RowMajor>());
|
||||
CALL_SUBTEST(test_reduce_middle_dims<ColMajor>());
|
||||
CALL_SUBTEST(test_reduce_middle_dims<RowMajor>());
|
||||
CALL_SUBTEST(test_sum_accuracy());
|
||||
CALL_SUBTEST((test_sum_accuracy<float,10*1024*1024,8*1024>()));
|
||||
CALL_SUBTEST((test_sum_accuracy<Eigen::bfloat16,10*1024*1024,8*1024>()));
|
||||
// The range of half is limited to 65519 when using round-to-even,
|
||||
// so we are severely limited in the size and mean of the tensors
|
||||
// we can reduce without overflow.
|
||||
CALL_SUBTEST((test_sum_accuracy<Eigen::half,4*1024,16>()));
|
||||
CALL_SUBTEST((test_sum_accuracy<Eigen::half,10*1024*1024,0>()));
|
||||
}
|
||||
|
||||
@@ -16,7 +16,6 @@
|
||||
|
||||
#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
|
||||
#define EIGEN_USE_SYCL
|
||||
#define EIGEN_HAS_CONSTEXPR 1
|
||||
|
||||
#include "main.h"
|
||||
|
||||
|
||||
277
libs/eigen/unsupported/test/fft_test_shared.h
Normal file
277
libs/eigen/unsupported/test/fft_test_shared.h
Normal file
@@ -0,0 +1,277 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2009 Mark Borgerding mark a borgerding net
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "main.h"
|
||||
#include <unsupported/Eigen/FFT>
|
||||
|
||||
template <typename T>
|
||||
inline std::complex<T> RandomCpx() {
|
||||
return std::complex<T>((T)(rand() / (T)RAND_MAX - .5), (T)(rand() / (T)RAND_MAX - .5));
|
||||
}
|
||||
|
||||
using namespace std;
|
||||
using namespace Eigen;
|
||||
|
||||
template <typename T>
|
||||
inline complex<long double> promote(complex<T> x) {
|
||||
return complex<long double>((long double)x.real(), (long double)x.imag());
|
||||
}
|
||||
|
||||
inline complex<long double> promote(float x) { return complex<long double>((long double)x); }
|
||||
inline complex<long double> promote(double x) { return complex<long double>((long double)x); }
|
||||
inline complex<long double> promote(long double x) { return complex<long double>((long double)x); }
|
||||
|
||||
template <typename VT1, typename VT2>
|
||||
long double fft_rmse(const VT1& fftbuf, const VT2& timebuf) {
|
||||
long double totalpower = 0;
|
||||
long double difpower = 0;
|
||||
long double pi = acos((long double)-1);
|
||||
for (size_t k0 = 0; k0 < (size_t)fftbuf.size(); ++k0) {
|
||||
complex<long double> acc = 0;
|
||||
long double phinc = (long double)(-2.) * k0 * pi / timebuf.size();
|
||||
for (size_t k1 = 0; k1 < (size_t)timebuf.size(); ++k1) {
|
||||
acc += promote(timebuf[k1]) * exp(complex<long double>(0, k1 * phinc));
|
||||
}
|
||||
totalpower += numext::abs2(acc);
|
||||
complex<long double> x = promote(fftbuf[k0]);
|
||||
complex<long double> dif = acc - x;
|
||||
difpower += numext::abs2(dif);
|
||||
// cerr << k0 << "\t" << acc << "\t" << x << "\t" << sqrt(numext::abs2(dif)) << endl;
|
||||
}
|
||||
// cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
|
||||
return sqrt(difpower / totalpower);
|
||||
}
|
||||
|
||||
template <typename VT1, typename VT2>
|
||||
long double dif_rmse(const VT1 buf1, const VT2 buf2) {
|
||||
long double totalpower = 0;
|
||||
long double difpower = 0;
|
||||
size_t n = (min)(buf1.size(), buf2.size());
|
||||
for (size_t k = 0; k < n; ++k) {
|
||||
totalpower += (long double)((numext::abs2(buf1[k]) + numext::abs2(buf2[k])) / 2);
|
||||
difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
|
||||
}
|
||||
return sqrt(difpower / totalpower);
|
||||
}
|
||||
|
||||
enum { StdVectorContainer, EigenVectorContainer };
|
||||
|
||||
template <int Container, typename Scalar>
|
||||
struct VectorType;
|
||||
|
||||
template <typename Scalar>
|
||||
struct VectorType<StdVectorContainer, Scalar> {
|
||||
typedef vector<Scalar> type;
|
||||
};
|
||||
|
||||
template <typename Scalar>
|
||||
struct VectorType<EigenVectorContainer, Scalar> {
|
||||
typedef Matrix<Scalar, Dynamic, 1> type;
|
||||
};
|
||||
|
||||
template <int Container, typename T>
|
||||
void test_scalar_generic(int nfft) {
|
||||
typedef typename FFT<T>::Complex Complex;
|
||||
typedef typename FFT<T>::Scalar Scalar;
|
||||
typedef typename VectorType<Container, Scalar>::type ScalarVector;
|
||||
typedef typename VectorType<Container, Complex>::type ComplexVector;
|
||||
|
||||
FFT<T> fft;
|
||||
ScalarVector tbuf(nfft);
|
||||
ComplexVector freqBuf;
|
||||
for (int k = 0; k < nfft; ++k) tbuf[k] = (T)(rand() / (double)RAND_MAX - .5);
|
||||
|
||||
// make sure it DOESN'T give the right full spectrum answer
|
||||
// if we've asked for half-spectrum
|
||||
fft.SetFlag(fft.HalfSpectrum);
|
||||
fft.fwd(freqBuf, tbuf);
|
||||
VERIFY((size_t)freqBuf.size() == (size_t)((nfft >> 1) + 1));
|
||||
VERIFY(T(fft_rmse(freqBuf, tbuf)) < test_precision<T>()); // gross check
|
||||
|
||||
fft.ClearFlag(fft.HalfSpectrum);
|
||||
fft.fwd(freqBuf, tbuf);
|
||||
VERIFY((size_t)freqBuf.size() == (size_t)nfft);
|
||||
VERIFY(T(fft_rmse(freqBuf, tbuf)) < test_precision<T>()); // gross check
|
||||
|
||||
if (nfft & 1) return; // odd FFTs get the wrong size inverse FFT
|
||||
|
||||
ScalarVector tbuf2;
|
||||
fft.inv(tbuf2, freqBuf);
|
||||
VERIFY(T(dif_rmse(tbuf, tbuf2)) < test_precision<T>()); // gross check
|
||||
|
||||
// verify that the Unscaled flag takes effect
|
||||
ScalarVector tbuf3;
|
||||
fft.SetFlag(fft.Unscaled);
|
||||
|
||||
fft.inv(tbuf3, freqBuf);
|
||||
|
||||
for (int k = 0; k < nfft; ++k) tbuf3[k] *= T(1. / nfft);
|
||||
|
||||
// for (size_t i=0;i<(size_t) tbuf.size();++i)
|
||||
// cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " - in=" << tbuf[i] << " => " << (tbuf3[i] -
|
||||
// tbuf[i] ) << endl;
|
||||
|
||||
VERIFY(T(dif_rmse(tbuf, tbuf3)) < test_precision<T>()); // gross check
|
||||
|
||||
// verify that ClearFlag works
|
||||
fft.ClearFlag(fft.Unscaled);
|
||||
fft.inv(tbuf2, freqBuf);
|
||||
VERIFY(T(dif_rmse(tbuf, tbuf2)) < test_precision<T>()); // gross check
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_scalar(int nfft) {
|
||||
test_scalar_generic<StdVectorContainer, T>(nfft);
|
||||
// test_scalar_generic<EigenVectorContainer,T>(nfft);
|
||||
}
|
||||
|
||||
template <int Container, typename T>
|
||||
void test_complex_generic(int nfft) {
|
||||
typedef typename FFT<T>::Complex Complex;
|
||||
typedef typename VectorType<Container, Complex>::type ComplexVector;
|
||||
|
||||
FFT<T> fft;
|
||||
|
||||
ComplexVector inbuf(nfft);
|
||||
ComplexVector outbuf;
|
||||
ComplexVector buf3;
|
||||
for (int k = 0; k < nfft; ++k)
|
||||
inbuf[k] = Complex((T)(rand() / (double)RAND_MAX - .5), (T)(rand() / (double)RAND_MAX - .5));
|
||||
fft.fwd(outbuf, inbuf);
|
||||
|
||||
VERIFY(T(fft_rmse(outbuf, inbuf)) < test_precision<T>()); // gross check
|
||||
fft.inv(buf3, outbuf);
|
||||
|
||||
VERIFY(T(dif_rmse(inbuf, buf3)) < test_precision<T>()); // gross check
|
||||
|
||||
// verify that the Unscaled flag takes effect
|
||||
ComplexVector buf4;
|
||||
fft.SetFlag(fft.Unscaled);
|
||||
fft.inv(buf4, outbuf);
|
||||
for (int k = 0; k < nfft; ++k) buf4[k] *= T(1. / nfft);
|
||||
VERIFY(T(dif_rmse(inbuf, buf4)) < test_precision<T>()); // gross check
|
||||
|
||||
// verify that ClearFlag works
|
||||
fft.ClearFlag(fft.Unscaled);
|
||||
fft.inv(buf3, outbuf);
|
||||
VERIFY(T(dif_rmse(inbuf, buf3)) < test_precision<T>()); // gross check
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void test_complex(int nfft) {
|
||||
test_complex_generic<StdVectorContainer, T>(nfft);
|
||||
test_complex_generic<EigenVectorContainer, T>(nfft);
|
||||
}
|
||||
|
||||
template <typename T, int nrows, int ncols>
|
||||
void test_complex2d() {
|
||||
typedef typename Eigen::FFT<T>::Complex Complex;
|
||||
FFT<T> fft;
|
||||
Eigen::Matrix<Complex, nrows, ncols> src, src2, dst, dst2;
|
||||
|
||||
src = Eigen::Matrix<Complex, nrows, ncols>::Random();
|
||||
// src = Eigen::Matrix<Complex,nrows,ncols>::Identity();
|
||||
|
||||
for (int k = 0; k < ncols; k++) {
|
||||
Eigen::Matrix<Complex, nrows, 1> tmpOut;
|
||||
fft.fwd(tmpOut, src.col(k));
|
||||
dst2.col(k) = tmpOut;
|
||||
}
|
||||
|
||||
for (int k = 0; k < nrows; k++) {
|
||||
Eigen::Matrix<Complex, 1, ncols> tmpOut;
|
||||
fft.fwd(tmpOut, dst2.row(k));
|
||||
dst2.row(k) = tmpOut;
|
||||
}
|
||||
|
||||
fft.fwd2(dst.data(), src.data(), ncols, nrows);
|
||||
fft.inv2(src2.data(), dst.data(), ncols, nrows);
|
||||
VERIFY((src - src2).norm() < test_precision<T>());
|
||||
VERIFY((dst - dst2).norm() < test_precision<T>());
|
||||
}
|
||||
|
||||
inline void test_return_by_value(int len) {
|
||||
VectorXf in;
|
||||
VectorXf in1;
|
||||
in.setRandom(len);
|
||||
VectorXcf out1, out2;
|
||||
FFT<float> fft;
|
||||
|
||||
fft.SetFlag(fft.HalfSpectrum);
|
||||
|
||||
fft.fwd(out1, in);
|
||||
out2 = fft.fwd(in);
|
||||
VERIFY((out1 - out2).norm() < test_precision<float>());
|
||||
in1 = fft.inv(out1);
|
||||
VERIFY((in1 - in).norm() < test_precision<float>());
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(FFTW) {
|
||||
CALL_SUBTEST(test_return_by_value(32));
|
||||
CALL_SUBTEST(test_complex<float>(32));
|
||||
CALL_SUBTEST(test_complex<double>(32));
|
||||
CALL_SUBTEST(test_complex<float>(256));
|
||||
CALL_SUBTEST(test_complex<double>(256));
|
||||
CALL_SUBTEST(test_complex<float>(3 * 8));
|
||||
CALL_SUBTEST(test_complex<double>(3 * 8));
|
||||
CALL_SUBTEST(test_complex<float>(5 * 32));
|
||||
CALL_SUBTEST(test_complex<double>(5 * 32));
|
||||
CALL_SUBTEST(test_complex<float>(2 * 3 * 4));
|
||||
CALL_SUBTEST(test_complex<double>(2 * 3 * 4));
|
||||
CALL_SUBTEST(test_complex<float>(2 * 3 * 4 * 5));
|
||||
CALL_SUBTEST(test_complex<double>(2 * 3 * 4 * 5));
|
||||
CALL_SUBTEST(test_complex<float>(2 * 3 * 4 * 5 * 7));
|
||||
CALL_SUBTEST(test_complex<double>(2 * 3 * 4 * 5 * 7));
|
||||
|
||||
CALL_SUBTEST(test_scalar<float>(32));
|
||||
CALL_SUBTEST(test_scalar<double>(32));
|
||||
CALL_SUBTEST(test_scalar<float>(45));
|
||||
CALL_SUBTEST(test_scalar<double>(45));
|
||||
CALL_SUBTEST(test_scalar<float>(50));
|
||||
CALL_SUBTEST(test_scalar<double>(50));
|
||||
CALL_SUBTEST(test_scalar<float>(256));
|
||||
CALL_SUBTEST(test_scalar<double>(256));
|
||||
CALL_SUBTEST(test_scalar<float>(2 * 3 * 4 * 5 * 7));
|
||||
CALL_SUBTEST(test_scalar<double>(2 * 3 * 4 * 5 * 7));
|
||||
|
||||
#if defined EIGEN_HAS_FFTWL || defined EIGEN_POCKETFFT_DEFAULT
|
||||
CALL_SUBTEST(test_complex<long double>(32));
|
||||
CALL_SUBTEST(test_complex<long double>(256));
|
||||
CALL_SUBTEST(test_complex<long double>(3 * 8));
|
||||
CALL_SUBTEST(test_complex<long double>(5 * 32));
|
||||
CALL_SUBTEST(test_complex<long double>(2 * 3 * 4));
|
||||
CALL_SUBTEST(test_complex<long double>(2 * 3 * 4 * 5));
|
||||
CALL_SUBTEST(test_complex<long double>(2 * 3 * 4 * 5 * 7));
|
||||
|
||||
CALL_SUBTEST(test_scalar<long double>(32));
|
||||
CALL_SUBTEST(test_scalar<long double>(45));
|
||||
CALL_SUBTEST(test_scalar<long double>(50));
|
||||
CALL_SUBTEST(test_scalar<long double>(256));
|
||||
CALL_SUBTEST(test_scalar<long double>(2 * 3 * 4 * 5 * 7));
|
||||
|
||||
CALL_SUBTEST((test_complex2d<long double, 2 * 3 * 4, 2 * 3 * 4>()));
|
||||
CALL_SUBTEST((test_complex2d<long double, 3 * 4 * 5, 3 * 4 * 5>()));
|
||||
CALL_SUBTEST((test_complex2d<long double, 24, 60>()));
|
||||
CALL_SUBTEST((test_complex2d<long double, 60, 24>()));
|
||||
// fail to build since Eigen limit the stack allocation size,too big here.
|
||||
// CALL_SUBTEST( ( test_complex2d<long double, 256, 256> () ) );
|
||||
#endif
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
|
||||
CALL_SUBTEST((test_complex2d<float, 24, 24>()));
|
||||
CALL_SUBTEST((test_complex2d<float, 60, 60>()));
|
||||
CALL_SUBTEST((test_complex2d<float, 24, 60>()));
|
||||
CALL_SUBTEST((test_complex2d<float, 60, 24>()));
|
||||
#endif
|
||||
#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
|
||||
CALL_SUBTEST((test_complex2d<double, 24, 24>()));
|
||||
CALL_SUBTEST((test_complex2d<double, 60, 60>()));
|
||||
CALL_SUBTEST((test_complex2d<double, 24, 60>()));
|
||||
CALL_SUBTEST((test_complex2d<double, 60, 24>()));
|
||||
#endif
|
||||
}
|
||||
@@ -20,10 +20,10 @@ EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p)
|
||||
return (p-Vector(Scalar(-1),Scalar(1.))).norm() + (p.array().sqrt().abs() * p.array().sin()).sum() + p.dot(p);
|
||||
}
|
||||
|
||||
template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
|
||||
template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
|
||||
struct TestFunc1
|
||||
{
|
||||
typedef _Scalar Scalar;
|
||||
typedef Scalar_ Scalar;
|
||||
enum {
|
||||
InputsAtCompileTime = NX,
|
||||
ValuesAtCompileTime = NY
|
||||
|
||||
28
libs/eigen/unsupported/test/idrstabl.cpp
Normal file
28
libs/eigen/unsupported/test/idrstabl.cpp
Normal file
@@ -0,0 +1,28 @@
|
||||
// This file is part of Eigen, a lightweight C++ template library
|
||||
// for linear algebra.
|
||||
//
|
||||
// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
|
||||
//
|
||||
// This Source Code Form is subject to the terms of the Mozilla
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
#include "../../test/sparse_solver.h"
|
||||
#include <unsupported/Eigen/IterativeSolvers>
|
||||
|
||||
template <typename T>
|
||||
void test_idrstabl_T() {
|
||||
IDRSTABL<SparseMatrix<T>, DiagonalPreconditioner<T> > idrstabl_colmajor_diag;
|
||||
IDRSTABL<SparseMatrix<T>, IncompleteLUT<T> > idrstabl_colmajor_ilut;
|
||||
|
||||
idrstabl_colmajor_diag.setTolerance(NumTraits<T>::epsilon() * 4);
|
||||
idrstabl_colmajor_ilut.setTolerance(NumTraits<T>::epsilon() * 4);
|
||||
|
||||
CALL_SUBTEST(check_sparse_square_solving(idrstabl_colmajor_diag));
|
||||
CALL_SUBTEST(check_sparse_square_solving(idrstabl_colmajor_ilut));
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(idrstabl) {
|
||||
CALL_SUBTEST_1((test_idrstabl_T<double>()));
|
||||
CALL_SUBTEST_2((test_idrstabl_T<std::complex<double> >()));
|
||||
}
|
||||
@@ -29,7 +29,7 @@ void check_kronecker_product(const MatrixType& ab)
|
||||
{
|
||||
VERIFY_IS_EQUAL(ab.rows(), 6);
|
||||
VERIFY_IS_EQUAL(ab.cols(), 6);
|
||||
VERIFY_IS_EQUAL(ab.nonZeros(), 36);
|
||||
VERIFY_IS_EQUAL(ab.size(), 36);
|
||||
VERIFY_IS_APPROX(ab.coeff(0,0), -0.4017367630386106);
|
||||
VERIFY_IS_APPROX(ab.coeff(0,1), 0.1056863433932735);
|
||||
VERIFY_IS_APPROX(ab.coeff(0,2), -0.7255206194554212);
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
using std::sqrt;
|
||||
|
||||
// tolerance for chekcing number of iterations
|
||||
#define LM_EVAL_COUNT_TOL 4/3
|
||||
#define LM_EVAL_COUNT_TOL 2
|
||||
|
||||
struct lmder_functor : DenseFunctor<double>
|
||||
{
|
||||
@@ -75,11 +75,11 @@ void testLmder1()
|
||||
lmder_functor functor;
|
||||
LevenbergMarquardt<lmder_functor> lm(functor);
|
||||
info = lm.lmder1(x);
|
||||
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 6);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 5);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 6);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 5);
|
||||
|
||||
// check norm
|
||||
VERIFY_IS_APPROX(lm.fvec().blueNorm(), 0.09063596);
|
||||
@@ -104,11 +104,12 @@ void testLmder()
|
||||
lmder_functor functor;
|
||||
LevenbergMarquardt<lmder_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return values
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 6);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 5);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 6);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 5);
|
||||
|
||||
// check norm
|
||||
fnorm = lm.fvec().blueNorm();
|
||||
@@ -177,9 +178,10 @@ void testLmdif1()
|
||||
lmdif_functor functor;
|
||||
DenseIndex nfev;
|
||||
info = LevenbergMarquardt<lmdif_functor>::lmdif1(functor, x, &nfev);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(nfev, 26);
|
||||
|
||||
// check norm
|
||||
@@ -208,9 +210,10 @@ void testLmdif()
|
||||
NumericalDiff<lmdif_functor> numDiff(functor);
|
||||
LevenbergMarquardt<NumericalDiff<lmdif_functor> > lm(numDiff);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return values
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 26);
|
||||
|
||||
// check norm
|
||||
@@ -293,11 +296,12 @@ void testNistChwirut2(void)
|
||||
chwirut2_functor functor;
|
||||
LevenbergMarquardt<chwirut2_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 10);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 8);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 8);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.1304802941E+02);
|
||||
// check x
|
||||
@@ -314,11 +318,12 @@ void testNistChwirut2(void)
|
||||
lm.setFtol(1.E6*NumTraits<double>::epsilon());
|
||||
lm.setXtol(1.E6*NumTraits<double>::epsilon());
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 7);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 6);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 6);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.1304802941E+02);
|
||||
// check x
|
||||
@@ -373,11 +378,12 @@ void testNistMisra1a(void)
|
||||
misra1a_functor functor;
|
||||
LevenbergMarquardt<misra1a_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 19);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 15);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 19);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 15);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.2455138894E-01);
|
||||
// check x
|
||||
@@ -390,11 +396,12 @@ void testNistMisra1a(void)
|
||||
x<< 250., 0.0005;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 5);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 4);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 5);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 4);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.2455138894E-01);
|
||||
// check x
|
||||
@@ -464,11 +471,12 @@ void testNistHahn1(void)
|
||||
hahn1_functor functor;
|
||||
LevenbergMarquardt<hahn1_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 11);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 10);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 11);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 10);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.5324382854E+00);
|
||||
// check x
|
||||
@@ -486,11 +494,12 @@ void testNistHahn1(void)
|
||||
x<< .1, -.1, .005, -.000001, -.005, .0001, -.0000001;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 11);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 10);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 10);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.5324382854E+00);
|
||||
// check x
|
||||
@@ -550,11 +559,12 @@ void testNistMisra1d(void)
|
||||
misra1d_functor functor;
|
||||
LevenbergMarquardt<misra1d_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 9);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 7);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 9);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 7);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6419295283E-02);
|
||||
// check x
|
||||
@@ -567,11 +577,12 @@ void testNistMisra1d(void)
|
||||
x<< 450., 0.0003;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 4);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 3);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 4);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 3);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6419295283E-02);
|
||||
// check x
|
||||
@@ -628,11 +639,12 @@ void testNistLanczos1(void)
|
||||
lanczos1_functor functor;
|
||||
LevenbergMarquardt<lanczos1_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 79);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 72);
|
||||
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 79);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 72);
|
||||
// check norm^2
|
||||
VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
|
||||
// check x
|
||||
@@ -649,11 +661,12 @@ void testNistLanczos1(void)
|
||||
x<< 0.5, 0.7, 3.6, 4.2, 4., 6.3;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 9);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 8);
|
||||
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 9);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 8);
|
||||
// check norm^2
|
||||
VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
|
||||
// check x
|
||||
@@ -714,11 +727,12 @@ void testNistRat42(void)
|
||||
rat42_functor functor;
|
||||
LevenbergMarquardt<rat42_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 10);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 8);
|
||||
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 10);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 8);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.0565229338E+00);
|
||||
// check x
|
||||
@@ -732,11 +746,12 @@ void testNistRat42(void)
|
||||
x<< 75., 2.5, 0.07;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 6);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 5);
|
||||
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 6);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 5);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.0565229338E+00);
|
||||
// check x
|
||||
@@ -787,14 +802,15 @@ void testNistMGH10(void)
|
||||
/*
|
||||
* First try
|
||||
*/
|
||||
x<< 2., 400000., 25000.;
|
||||
x << 2., 400000., 25000.;
|
||||
// do the computation
|
||||
MGH10_functor functor;
|
||||
LevenbergMarquardt<MGH10_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
++g_test_level;
|
||||
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
||||
--g_test_level;
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
// ++g_test_level;
|
||||
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
||||
// --g_test_level;
|
||||
// was: VERIFY_IS_EQUAL(info, 1);
|
||||
|
||||
// check norm^2
|
||||
@@ -805,11 +821,11 @@ void testNistMGH10(void)
|
||||
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
|
||||
|
||||
// check return value
|
||||
|
||||
++g_test_level;
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 284 );
|
||||
VERIFY_IS_EQUAL(lm.njev(), 249 );
|
||||
--g_test_level;
|
||||
|
||||
// ++g_test_level;
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 284 );
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 249 );
|
||||
// --g_test_level;
|
||||
VERIFY(lm.nfev() < 284 * LM_EVAL_COUNT_TOL);
|
||||
VERIFY(lm.njev() < 249 * LM_EVAL_COUNT_TOL);
|
||||
|
||||
@@ -819,11 +835,12 @@ void testNistMGH10(void)
|
||||
x<< 0.02, 4000., 250.;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
++g_test_level;
|
||||
VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
||||
// was: VERIFY_IS_EQUAL(info, 1);
|
||||
--g_test_level;
|
||||
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
// ++g_test_level;
|
||||
// VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
|
||||
// // was: VERIFY_IS_EQUAL(info, 1);
|
||||
// --g_test_level;
|
||||
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
|
||||
// check x
|
||||
@@ -832,10 +849,10 @@ void testNistMGH10(void)
|
||||
VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
|
||||
|
||||
// check return value
|
||||
++g_test_level;
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 126);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 116);
|
||||
--g_test_level;
|
||||
// ++g_test_level;
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 126);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 116);
|
||||
// --g_test_level;
|
||||
VERIFY(lm.nfev() < 126 * LM_EVAL_COUNT_TOL);
|
||||
VERIFY(lm.njev() < 116 * LM_EVAL_COUNT_TOL);
|
||||
}
|
||||
@@ -888,6 +905,7 @@ void testNistBoxBOD(void)
|
||||
lm.setXtol(1.E6*NumTraits<double>::epsilon());
|
||||
lm.setFactor(10);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03);
|
||||
@@ -896,9 +914,9 @@ void testNistBoxBOD(void)
|
||||
VERIFY_IS_APPROX(x[1], 5.4723748542E-01);
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY(lm.nfev() < 31); // 31
|
||||
VERIFY(lm.njev() < 25); // 25
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY(lm.nfev() < 31); // 31
|
||||
// VERIFY(lm.njev() < 25); // 25
|
||||
|
||||
/*
|
||||
* Second try
|
||||
@@ -909,13 +927,14 @@ void testNistBoxBOD(void)
|
||||
lm.setFtol(NumTraits<double>::epsilon());
|
||||
lm.setXtol( NumTraits<double>::epsilon());
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
++g_test_level;
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 16 );
|
||||
VERIFY_IS_EQUAL(lm.njev(), 15 );
|
||||
--g_test_level;
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// ++g_test_level;
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 16 );
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 15 );
|
||||
// --g_test_level;
|
||||
VERIFY(lm.nfev() < 16 * LM_EVAL_COUNT_TOL);
|
||||
VERIFY(lm.njev() < 15 * LM_EVAL_COUNT_TOL);
|
||||
// check norm^2
|
||||
@@ -975,6 +994,7 @@ void testNistMGH17(void)
|
||||
lm.setXtol(NumTraits<double>::epsilon());
|
||||
lm.setMaxfev(1000);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.4648946975E-05);
|
||||
@@ -987,8 +1007,8 @@ void testNistMGH17(void)
|
||||
|
||||
// check return value
|
||||
// VERIFY_IS_EQUAL(info, 2); //FIXME Use (lm.info() == Success)
|
||||
VERIFY(lm.nfev() < 700 ); // 602
|
||||
VERIFY(lm.njev() < 600 ); // 545
|
||||
// VERIFY(lm.nfev() < 700 ); // 602
|
||||
// VERIFY(lm.njev() < 600 ); // 545
|
||||
|
||||
/*
|
||||
* Second try
|
||||
@@ -997,11 +1017,12 @@ void testNistMGH17(void)
|
||||
// do the computation
|
||||
lm.resetParameters();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 18);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 15);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 18);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 15);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.4648946975E-05);
|
||||
// check x
|
||||
@@ -1063,6 +1084,7 @@ void testNistMGH09(void)
|
||||
LevenbergMarquardt<MGH09_functor> lm(functor);
|
||||
lm.setMaxfev(1000);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 3.0750560385E-04);
|
||||
@@ -1072,9 +1094,9 @@ void testNistMGH09(void)
|
||||
VERIFY_IS_APPROX(x[2], 0.12305309914); // should be 1.2305650693E-01
|
||||
VERIFY_IS_APPROX(x[3], 0.13605395375); // should be 1.3606233068E-01
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY(lm.nfev() < 510 ); // 490
|
||||
VERIFY(lm.njev() < 400 ); // 376
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY(lm.nfev() < 510 ); // 490
|
||||
// VERIFY(lm.njev() < 400 ); // 376
|
||||
|
||||
/*
|
||||
* Second try
|
||||
@@ -1083,11 +1105,12 @@ void testNistMGH09(void)
|
||||
// do the computation
|
||||
lm.resetParameters();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 18);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 16);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 18);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 16);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 3.0750560385E-04);
|
||||
// check x
|
||||
@@ -1149,11 +1172,12 @@ void testNistBennett5(void)
|
||||
LevenbergMarquardt<Bennett5_functor> lm(functor);
|
||||
lm.setMaxfev(1000);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 758);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 744);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 758);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 744);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.2404744073E-04);
|
||||
// check x
|
||||
@@ -1167,11 +1191,12 @@ void testNistBennett5(void)
|
||||
// do the computation
|
||||
lm.resetParameters();
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 203);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 192);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 203);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 192);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.2404744073E-04);
|
||||
// check x
|
||||
@@ -1237,11 +1262,12 @@ void testNistThurber(void)
|
||||
lm.setFtol(1.E4*NumTraits<double>::epsilon());
|
||||
lm.setXtol(1.E4*NumTraits<double>::epsilon());
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 39);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 36);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 39);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 36);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6427082397E+03);
|
||||
// check x
|
||||
@@ -1262,11 +1288,12 @@ void testNistThurber(void)
|
||||
lm.setFtol(1.E4*NumTraits<double>::epsilon());
|
||||
lm.setXtol(1.E4*NumTraits<double>::epsilon());
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 29);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 28);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 29);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 28);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6427082397E+03);
|
||||
// check x
|
||||
@@ -1329,11 +1356,12 @@ void testNistRat43(void)
|
||||
lm.setFtol(1.E6*NumTraits<double>::epsilon());
|
||||
lm.setXtol(1.E6*NumTraits<double>::epsilon());
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 27);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 20);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 27);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 20);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7864049080E+03);
|
||||
// check x
|
||||
@@ -1351,11 +1379,12 @@ void testNistRat43(void)
|
||||
lm.setFtol(1.E5*NumTraits<double>::epsilon());
|
||||
lm.setXtol(1.E5*NumTraits<double>::epsilon());
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 9);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 8);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 9);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 8);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7864049080E+03);
|
||||
// check x
|
||||
@@ -1414,11 +1443,12 @@ void testNistEckerle4(void)
|
||||
eckerle4_functor functor;
|
||||
LevenbergMarquardt<eckerle4_functor> lm(functor);
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 18);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 15);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 18);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 15);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.4635887487E-03);
|
||||
// check x
|
||||
@@ -1432,11 +1462,12 @@ void testNistEckerle4(void)
|
||||
x<< 1.5, 5., 450.;
|
||||
// do the computation
|
||||
info = lm.minimize(x);
|
||||
EIGEN_UNUSED_VARIABLE(info)
|
||||
|
||||
// check return value
|
||||
VERIFY_IS_EQUAL(info, 1);
|
||||
VERIFY_IS_EQUAL(lm.nfev(), 7);
|
||||
VERIFY_IS_EQUAL(lm.njev(), 6);
|
||||
// VERIFY_IS_EQUAL(info, 1);
|
||||
// VERIFY_IS_EQUAL(lm.nfev(), 7);
|
||||
// VERIFY_IS_EQUAL(lm.njev(), 6);
|
||||
// check norm^2
|
||||
VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.4635887487E-03);
|
||||
// check x
|
||||
|
||||
@@ -104,8 +104,8 @@ void testSingular(const MatrixType& m_const, const typename MatrixType::RealScal
|
||||
MatrixType& m = const_cast<MatrixType&>(m_const);
|
||||
|
||||
const int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex;
|
||||
typedef typename internal::conditional<IsComplex, TriangularView<MatrixType,Upper>, const MatrixType&>::type TriangularType;
|
||||
typename internal::conditional< IsComplex, ComplexSchur<MatrixType>, RealSchur<MatrixType> >::type schur;
|
||||
typedef std::conditional_t<IsComplex, TriangularView<MatrixType,Upper>, const MatrixType&> TriangularType;
|
||||
std::conditional_t< IsComplex, ComplexSchur<MatrixType>, RealSchur<MatrixType> > schur;
|
||||
MatrixType T;
|
||||
|
||||
for (int i=0; i < g_repeat; ++i) {
|
||||
@@ -171,7 +171,7 @@ EIGEN_DECLARE_TEST(matrix_power)
|
||||
CALL_SUBTEST_5(testGeneral(Matrix3cf(), 1e-4f));
|
||||
CALL_SUBTEST_8(testGeneral(Matrix4f(), 1e-4f));
|
||||
CALL_SUBTEST_6(testGeneral(MatrixXf(2,2), 1e-3f)); // see bug 614
|
||||
CALL_SUBTEST_9(testGeneral(MatrixXe(7,7), 1e-13L));
|
||||
CALL_SUBTEST_9(testGeneral(MatrixXe(7,7), 1e-12L));
|
||||
CALL_SUBTEST_10(testGeneral(Matrix3d(), 1e-13));
|
||||
CALL_SUBTEST_11(testGeneral(Matrix3f(), 1e-4f));
|
||||
CALL_SUBTEST_12(testGeneral(Matrix3e(), 1e-13L));
|
||||
@@ -184,7 +184,7 @@ EIGEN_DECLARE_TEST(matrix_power)
|
||||
CALL_SUBTEST_5(testSingular(Matrix3cf(), 1e-4f));
|
||||
CALL_SUBTEST_8(testSingular(Matrix4f(), 1e-4f));
|
||||
CALL_SUBTEST_6(testSingular(MatrixXf(2,2), 1e-3f));
|
||||
CALL_SUBTEST_9(testSingular(MatrixXe(7,7), 1e-13L));
|
||||
CALL_SUBTEST_9(testSingular(MatrixXe(7,7), 1e-12L));
|
||||
CALL_SUBTEST_10(testSingular(Matrix3d(), 1e-13));
|
||||
CALL_SUBTEST_11(testSingular(Matrix3f(), 1e-4f));
|
||||
CALL_SUBTEST_12(testSingular(Matrix3e(), 1e-13L));
|
||||
@@ -197,7 +197,7 @@ EIGEN_DECLARE_TEST(matrix_power)
|
||||
CALL_SUBTEST_5(testLogThenExp(Matrix3cf(), 1e-4f));
|
||||
CALL_SUBTEST_8(testLogThenExp(Matrix4f(), 1e-4f));
|
||||
CALL_SUBTEST_6(testLogThenExp(MatrixXf(2,2), 1e-3f));
|
||||
CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7), 1e-13L));
|
||||
CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7), 1e-12L));
|
||||
CALL_SUBTEST_10(testLogThenExp(Matrix3d(), 1e-13));
|
||||
CALL_SUBTEST_11(testLogThenExp(Matrix3f(), 1e-4f));
|
||||
CALL_SUBTEST_12(testLogThenExp(Matrix3e(), 1e-13L));
|
||||
|
||||
2
libs/eigen/unsupported/test/mklfft.cpp
Normal file
2
libs/eigen/unsupported/test/mklfft.cpp
Normal file
@@ -0,0 +1,2 @@
|
||||
#define EIGEN_MKL_DEFAULT 1
|
||||
#include "fft_test_shared.h"
|
||||
2
libs/eigen/unsupported/test/pocketfft.cpp
Normal file
2
libs/eigen/unsupported/test/pocketfft.cpp
Normal file
@@ -0,0 +1,2 @@
|
||||
#define EIGEN_POCKETFFT_DEFAULT 1
|
||||
#include "fft_test_shared.h"
|
||||
@@ -179,29 +179,29 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
|
||||
}
|
||||
|
||||
|
||||
template<typename _Scalar, int _Deg>
|
||||
template<typename Scalar_, int Deg_>
|
||||
void polynomialsolver(int deg)
|
||||
{
|
||||
typedef typename NumTraits<_Scalar>::Real RealScalar;
|
||||
typedef internal::increment_if_fixed_size<_Deg> Dim;
|
||||
typedef Matrix<_Scalar,Dim::ret,1> PolynomialType;
|
||||
typedef Matrix<_Scalar,_Deg,1> EvalRootsType;
|
||||
typedef Matrix<RealScalar,_Deg,1> RealRootsType;
|
||||
typedef typename NumTraits<Scalar_>::Real RealScalar;
|
||||
typedef internal::increment_if_fixed_size<Deg_> Dim;
|
||||
typedef Matrix<Scalar_,Dim::ret,1> PolynomialType;
|
||||
typedef Matrix<Scalar_,Deg_,1> EvalRootsType;
|
||||
typedef Matrix<RealScalar,Deg_,1> RealRootsType;
|
||||
|
||||
cout << "Standard cases" << endl;
|
||||
PolynomialType pols = PolynomialType::Random(deg+1);
|
||||
evalSolver<_Deg,PolynomialType>( pols );
|
||||
evalSolver<Deg_,PolynomialType>( pols );
|
||||
|
||||
cout << "Hard cases" << endl;
|
||||
_Scalar multipleRoot = internal::random<_Scalar>();
|
||||
Scalar_ multipleRoot = internal::random<Scalar_>();
|
||||
EvalRootsType allRoots = EvalRootsType::Constant(deg,multipleRoot);
|
||||
roots_to_monicPolynomial( allRoots, pols );
|
||||
evalSolver<_Deg,PolynomialType>( pols );
|
||||
evalSolver<Deg_,PolynomialType>( pols );
|
||||
|
||||
cout << "Test sugar" << endl;
|
||||
RealRootsType realRoots = RealRootsType::Random(deg);
|
||||
roots_to_monicPolynomial( realRoots, pols );
|
||||
evalSolverSugarFunction<_Deg>(
|
||||
evalSolverSugarFunction<Deg_>(
|
||||
pols,
|
||||
realRoots.template cast <std::complex<RealScalar> >().eval(),
|
||||
realRoots );
|
||||
|
||||
@@ -25,12 +25,12 @@ struct increment_if_fixed_size
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _Scalar, int _Deg>
|
||||
template<typename Scalar_, int Deg_>
|
||||
void realRoots_to_monicPolynomial_test(int deg)
|
||||
{
|
||||
typedef internal::increment_if_fixed_size<_Deg> Dim;
|
||||
typedef Matrix<_Scalar,Dim::ret,1> PolynomialType;
|
||||
typedef Matrix<_Scalar,_Deg,1> EvalRootsType;
|
||||
typedef internal::increment_if_fixed_size<Deg_> Dim;
|
||||
typedef Matrix<Scalar_,Dim::ret,1> PolynomialType;
|
||||
typedef Matrix<Scalar_,Deg_,1> EvalRootsType;
|
||||
|
||||
PolynomialType pols(deg+1);
|
||||
EvalRootsType roots = EvalRootsType::Random(deg);
|
||||
@@ -40,43 +40,43 @@ void realRoots_to_monicPolynomial_test(int deg)
|
||||
for( int i=0; i<roots.size(); ++i ){
|
||||
evr[i] = std::abs( poly_eval( pols, roots[i] ) ); }
|
||||
|
||||
bool evalToZero = evr.isZero( test_precision<_Scalar>() );
|
||||
bool evalToZero = evr.isZero( test_precision<Scalar_>() );
|
||||
if( !evalToZero ){
|
||||
cerr << evr.transpose() << endl; }
|
||||
VERIFY( evalToZero );
|
||||
}
|
||||
|
||||
template<typename _Scalar> void realRoots_to_monicPolynomial_scalar()
|
||||
template<typename Scalar_> void realRoots_to_monicPolynomial_scalar()
|
||||
{
|
||||
CALL_SUBTEST_2( (realRoots_to_monicPolynomial_test<_Scalar,2>(2)) );
|
||||
CALL_SUBTEST_3( (realRoots_to_monicPolynomial_test<_Scalar,3>(3)) );
|
||||
CALL_SUBTEST_4( (realRoots_to_monicPolynomial_test<_Scalar,4>(4)) );
|
||||
CALL_SUBTEST_5( (realRoots_to_monicPolynomial_test<_Scalar,5>(5)) );
|
||||
CALL_SUBTEST_6( (realRoots_to_monicPolynomial_test<_Scalar,6>(6)) );
|
||||
CALL_SUBTEST_7( (realRoots_to_monicPolynomial_test<_Scalar,7>(7)) );
|
||||
CALL_SUBTEST_8( (realRoots_to_monicPolynomial_test<_Scalar,17>(17)) );
|
||||
CALL_SUBTEST_2( (realRoots_to_monicPolynomial_test<Scalar_,2>(2)) );
|
||||
CALL_SUBTEST_3( (realRoots_to_monicPolynomial_test<Scalar_,3>(3)) );
|
||||
CALL_SUBTEST_4( (realRoots_to_monicPolynomial_test<Scalar_,4>(4)) );
|
||||
CALL_SUBTEST_5( (realRoots_to_monicPolynomial_test<Scalar_,5>(5)) );
|
||||
CALL_SUBTEST_6( (realRoots_to_monicPolynomial_test<Scalar_,6>(6)) );
|
||||
CALL_SUBTEST_7( (realRoots_to_monicPolynomial_test<Scalar_,7>(7)) );
|
||||
CALL_SUBTEST_8( (realRoots_to_monicPolynomial_test<Scalar_,17>(17)) );
|
||||
|
||||
CALL_SUBTEST_9( (realRoots_to_monicPolynomial_test<_Scalar,Dynamic>(
|
||||
CALL_SUBTEST_9( (realRoots_to_monicPolynomial_test<Scalar_,Dynamic>(
|
||||
internal::random<int>(18,26) )) );
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
template<typename _Scalar, int _Deg>
|
||||
template<typename Scalar_, int Deg_>
|
||||
void CauchyBounds(int deg)
|
||||
{
|
||||
typedef internal::increment_if_fixed_size<_Deg> Dim;
|
||||
typedef Matrix<_Scalar,Dim::ret,1> PolynomialType;
|
||||
typedef Matrix<_Scalar,_Deg,1> EvalRootsType;
|
||||
typedef internal::increment_if_fixed_size<Deg_> Dim;
|
||||
typedef Matrix<Scalar_,Dim::ret,1> PolynomialType;
|
||||
typedef Matrix<Scalar_,Deg_,1> EvalRootsType;
|
||||
|
||||
PolynomialType pols(deg+1);
|
||||
EvalRootsType roots = EvalRootsType::Random(deg);
|
||||
roots_to_monicPolynomial( roots, pols );
|
||||
_Scalar M = cauchy_max_bound( pols );
|
||||
_Scalar m = cauchy_min_bound( pols );
|
||||
_Scalar Max = roots.array().abs().maxCoeff();
|
||||
_Scalar min = roots.array().abs().minCoeff();
|
||||
Scalar_ M = cauchy_max_bound( pols );
|
||||
Scalar_ m = cauchy_min_bound( pols );
|
||||
Scalar_ Max = roots.array().abs().maxCoeff();
|
||||
Scalar_ min = roots.array().abs().minCoeff();
|
||||
bool eval = (M >= Max) && (m <= min);
|
||||
if( !eval )
|
||||
{
|
||||
@@ -87,17 +87,17 @@ void CauchyBounds(int deg)
|
||||
VERIFY( eval );
|
||||
}
|
||||
|
||||
template<typename _Scalar> void CauchyBounds_scalar()
|
||||
template<typename Scalar_> void CauchyBounds_scalar()
|
||||
{
|
||||
CALL_SUBTEST_2( (CauchyBounds<_Scalar,2>(2)) );
|
||||
CALL_SUBTEST_3( (CauchyBounds<_Scalar,3>(3)) );
|
||||
CALL_SUBTEST_4( (CauchyBounds<_Scalar,4>(4)) );
|
||||
CALL_SUBTEST_5( (CauchyBounds<_Scalar,5>(5)) );
|
||||
CALL_SUBTEST_6( (CauchyBounds<_Scalar,6>(6)) );
|
||||
CALL_SUBTEST_7( (CauchyBounds<_Scalar,7>(7)) );
|
||||
CALL_SUBTEST_8( (CauchyBounds<_Scalar,17>(17)) );
|
||||
CALL_SUBTEST_2( (CauchyBounds<Scalar_,2>(2)) );
|
||||
CALL_SUBTEST_3( (CauchyBounds<Scalar_,3>(3)) );
|
||||
CALL_SUBTEST_4( (CauchyBounds<Scalar_,4>(4)) );
|
||||
CALL_SUBTEST_5( (CauchyBounds<Scalar_,5>(5)) );
|
||||
CALL_SUBTEST_6( (CauchyBounds<Scalar_,6>(6)) );
|
||||
CALL_SUBTEST_7( (CauchyBounds<Scalar_,7>(7)) );
|
||||
CALL_SUBTEST_8( (CauchyBounds<Scalar_,17>(17)) );
|
||||
|
||||
CALL_SUBTEST_9( (CauchyBounds<_Scalar,Dynamic>(
|
||||
CALL_SUBTEST_9( (CauchyBounds<Scalar_,Dynamic>(
|
||||
internal::random<int>(18,26) )) );
|
||||
}
|
||||
|
||||
|
||||
@@ -7,32 +7,8 @@
|
||||
// Public License v. 2.0. If a copy of the MPL was not distributed
|
||||
// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
|
||||
// import basic and product tests for deprecated DynamicSparseMatrix
|
||||
#if 0 // sparse_basic(DynamicSparseMatrix) does not compile at all -> disabled
|
||||
static long g_realloc_count = 0;
|
||||
#define EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN g_realloc_count++;
|
||||
|
||||
static long g_dense_op_sparse_count = 0;
|
||||
#define EIGEN_SPARSE_ASSIGNMENT_FROM_DENSE_OP_SPARSE_PLUGIN g_dense_op_sparse_count++;
|
||||
#define EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_ADD_DENSE_PLUGIN g_dense_op_sparse_count+=10;
|
||||
#define EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_SUB_DENSE_PLUGIN g_dense_op_sparse_count+=20;
|
||||
|
||||
#define EIGEN_SPARSE_TEST_INCLUDED_FROM_SPARSE_EXTRA 1
|
||||
#endif
|
||||
|
||||
#define EIGEN_NO_DEPRECATED_WARNING
|
||||
// Disable counting of temporaries, since sparse_product(DynamicSparseMatrix)
|
||||
// has an extra copy-assignment.
|
||||
#define EIGEN_SPARSE_PRODUCT_IGNORE_TEMPORARY_COUNT
|
||||
#include "sparse_product.cpp"
|
||||
|
||||
#if 0 // sparse_basic(DynamicSparseMatrix) does not compile at all -> disabled
|
||||
#include "sparse_basic.cpp"
|
||||
#endif
|
||||
|
||||
#if EIGEN_HAS_CXX11
|
||||
|
||||
#ifdef min
|
||||
#undef min
|
||||
#endif
|
||||
@@ -41,12 +17,6 @@ static long g_dense_op_sparse_count = 0;
|
||||
#undef max
|
||||
#endif
|
||||
|
||||
#include <unordered_map>
|
||||
#define EIGEN_UNORDERED_MAP_SUPPORT
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#include <Eigen/SparseExtra>
|
||||
|
||||
template<typename SetterType,typename DenseType, typename Scalar, int Options>
|
||||
@@ -67,21 +37,6 @@ bool test_random_setter(SparseMatrix<Scalar,Options>& sm, const DenseType& ref,
|
||||
return sm.isApprox(ref);
|
||||
}
|
||||
|
||||
template<typename SetterType,typename DenseType, typename T>
|
||||
bool test_random_setter(DynamicSparseMatrix<T>& sm, const DenseType& ref, const std::vector<Vector2i>& nonzeroCoords)
|
||||
{
|
||||
sm.setZero();
|
||||
std::vector<Vector2i> remaining = nonzeroCoords;
|
||||
while(!remaining.empty())
|
||||
{
|
||||
int i = internal::random<int>(0,static_cast<int>(remaining.size())-1);
|
||||
sm.coeffRef(remaining[i].x(),remaining[i].y()) = ref.coeff(remaining[i].x(),remaining[i].y());
|
||||
remaining[i] = remaining.back();
|
||||
remaining.pop_back();
|
||||
}
|
||||
return sm.isApprox(ref);
|
||||
}
|
||||
|
||||
template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& ref)
|
||||
{
|
||||
const Index rows = ref.rows();
|
||||
@@ -136,9 +91,7 @@ template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& re
|
||||
// VERIFY_IS_APPROX(m, refMat);
|
||||
|
||||
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdMapTraits> >(m,refMat,nonzeroCoords) ));
|
||||
#ifdef EIGEN_UNORDERED_MAP_SUPPORT
|
||||
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdUnorderedMapTraits> >(m,refMat,nonzeroCoords) ));
|
||||
#endif
|
||||
#ifdef EIGEN_GOOGLEHASH_SUPPORT
|
||||
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleDenseHashMapTraits> >(m,refMat,nonzeroCoords) ));
|
||||
VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleSparseHashMapTraits> >(m,refMat,nonzeroCoords) ));
|
||||
@@ -187,6 +140,72 @@ void check_marketio_vector()
|
||||
VERIFY_IS_EQUAL(v1,v2);
|
||||
}
|
||||
|
||||
template<typename DenseMatrixType>
|
||||
void check_marketio_dense()
|
||||
{
|
||||
Index rows=DenseMatrixType::MaxRowsAtCompileTime;
|
||||
if (DenseMatrixType::MaxRowsAtCompileTime==Dynamic){
|
||||
rows=internal::random<Index>(1,100);
|
||||
}else if(DenseMatrixType::RowsAtCompileTime==Dynamic){
|
||||
rows=internal::random<Index>(1,DenseMatrixType::MaxRowsAtCompileTime);
|
||||
}
|
||||
|
||||
Index cols =DenseMatrixType::MaxColsAtCompileTime;
|
||||
if (DenseMatrixType::MaxColsAtCompileTime==Dynamic){
|
||||
cols=internal::random<Index>(1,100);
|
||||
}else if(DenseMatrixType::ColsAtCompileTime==Dynamic){
|
||||
cols=internal::random<Index>(1,DenseMatrixType::MaxColsAtCompileTime);
|
||||
}
|
||||
|
||||
DenseMatrixType m1, m2;
|
||||
m1= DenseMatrixType::Random(rows,cols);
|
||||
saveMarketDense(m1, "dense_extra.mtx");
|
||||
loadMarketDense(m2, "dense_extra.mtx");
|
||||
VERIFY_IS_EQUAL(m1,m2);
|
||||
}
|
||||
|
||||
template <typename Scalar>
|
||||
void check_sparse_inverse() {
|
||||
typedef SparseMatrix<Scalar> MatrixType;
|
||||
|
||||
Matrix<Scalar, -1, -1> A;
|
||||
A.resize(1000, 1000);
|
||||
A.fill(0);
|
||||
A.setIdentity();
|
||||
A.col(0).array() += 1;
|
||||
A.row(0).array() += 2;
|
||||
A.col(2).array() += 3;
|
||||
A.row(7).array() += 3;
|
||||
A.col(9).array() += 3;
|
||||
A.block(3, 4, 4, 2).array() += 9;
|
||||
A.middleRows(10, 50).array() += 3;
|
||||
A.middleCols(50, 50).array() += 40;
|
||||
A.block(500, 300, 40, 20).array() += 10;
|
||||
A.transposeInPlace();
|
||||
|
||||
Eigen::SparseLU<MatrixType> slu;
|
||||
slu.compute(A.sparseView());
|
||||
Matrix<Scalar, -1, -1> Id(A.rows(), A.cols());
|
||||
Id.setIdentity();
|
||||
Matrix<Scalar, -1, -1> inv = slu.solve(Id);
|
||||
|
||||
const MatrixType sparseInv = Eigen::SparseInverse<Scalar>().compute(A.sparseView()).inverse();
|
||||
|
||||
Scalar sumdiff = 0; // Check the diff only of the non-zero elements
|
||||
for (Eigen::Index j = 0; j < A.cols(); j++) {
|
||||
for (typename MatrixType::InnerIterator iter(sparseInv, j); iter; ++iter) {
|
||||
const Scalar diff = std::abs(inv(iter.row(), iter.col()) - iter.value());
|
||||
VERIFY_IS_APPROX_OR_LESS_THAN(diff, 1e-11);
|
||||
|
||||
if (iter.value() != 0) {
|
||||
sumdiff += diff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
VERIFY_IS_APPROX_OR_LESS_THAN(sumdiff, 1e-10);
|
||||
}
|
||||
|
||||
EIGEN_DECLARE_TEST(sparse_extra)
|
||||
{
|
||||
for(int i = 0; i < g_repeat; i++) {
|
||||
@@ -195,22 +214,24 @@ EIGEN_DECLARE_TEST(sparse_extra)
|
||||
CALL_SUBTEST_2( sparse_extra(SparseMatrix<std::complex<double> >(s, s)) );
|
||||
CALL_SUBTEST_1( sparse_extra(SparseMatrix<double>(s, s)) );
|
||||
|
||||
CALL_SUBTEST_3( sparse_extra(DynamicSparseMatrix<double>(s, s)) );
|
||||
// CALL_SUBTEST_3(( sparse_basic(DynamicSparseMatrix<double>(s, s)) ));
|
||||
// CALL_SUBTEST_3(( sparse_basic(DynamicSparseMatrix<double,ColMajor,long int>(s, s)) ));
|
||||
|
||||
CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, ColMajor> >()) );
|
||||
CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, RowMajor> >()) );
|
||||
|
||||
CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,int> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,int> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) );
|
||||
CALL_SUBTEST_3( (check_marketio<SparseMatrix<float,ColMajor,int> >()) );
|
||||
CALL_SUBTEST_3( (check_marketio<SparseMatrix<double,ColMajor,int> >()) );
|
||||
CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) );
|
||||
CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) );
|
||||
CALL_SUBTEST_3( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) );
|
||||
CALL_SUBTEST_3( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) );
|
||||
CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) );
|
||||
CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) );
|
||||
|
||||
CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,Dynamic> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,Dynamic,RowMajor> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,Dynamic,Dynamic> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio_dense<Matrix<std::complex<float>,Dynamic,Dynamic> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio_dense<Matrix<std::complex<double>,Dynamic,Dynamic> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,3> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,3,Dynamic> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,3,4> >()) );
|
||||
CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,Dynamic,Dynamic,ColMajor,5,5> >()) );
|
||||
|
||||
CALL_SUBTEST_5( (check_marketio_vector<Matrix<float,1,Dynamic> >()) );
|
||||
CALL_SUBTEST_5( (check_marketio_vector<Matrix<double,1,Dynamic> >()) );
|
||||
@@ -221,6 +242,8 @@ EIGEN_DECLARE_TEST(sparse_extra)
|
||||
CALL_SUBTEST_5( (check_marketio_vector<Matrix<std::complex<float>,Dynamic,1> >()) );
|
||||
CALL_SUBTEST_5( (check_marketio_vector<Matrix<std::complex<double>,Dynamic,1> >()) );
|
||||
|
||||
CALL_SUBTEST_6((check_sparse_inverse<double>()));
|
||||
|
||||
TEST_SET_BUT_UNUSED_VARIABLE(s);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -191,10 +191,10 @@ template<typename ArrayType> void array_special_functions()
|
||||
|
||||
// Check the zeta function against scipy.special.zeta
|
||||
{
|
||||
ArrayType x(10), q(10), res(10), ref(10);
|
||||
x << 1.5, 4, 10.5, 10000.5, 3, 1, 0.9, 2, 3, 4;
|
||||
q << 2, 1.5, 3, 1.0001, -2.5, 1.2345, 1.2345, -1, -2, -3;
|
||||
ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan, plusinf, nan, plusinf;
|
||||
ArrayType x(11), q(11), res(11), ref(11);
|
||||
x << 1.5, 4, 10.5, 10000.5, 3, 1, 0.9, 2, 3, 4, 2000;
|
||||
q << 2, 1.5, 3, 1.0001, -2.5, 1.2345, 1.2345, -1, -2, -3, 2000;
|
||||
ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan, plusinf, nan, plusinf, 0;
|
||||
CALL_SUBTEST( verify_component_wise(ref, ref); );
|
||||
CALL_SUBTEST( res = x.zeta(q); verify_component_wise(res, ref); );
|
||||
CALL_SUBTEST( res = zeta(x,q); verify_component_wise(res, ref); );
|
||||
|
||||
@@ -114,7 +114,7 @@ template<typename Scalar,typename Packet> void packetmath_real()
|
||||
Scalar(std::pow(Scalar(10), internal::random<Scalar>(Scalar(-1),Scalar(2))));
|
||||
}
|
||||
|
||||
#if EIGEN_HAS_C99_MATH && (EIGEN_COMP_CXXVER >= 11)
|
||||
#if EIGEN_HAS_C99_MATH
|
||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLGamma, std::lgamma, internal::plgamma);
|
||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErf, std::erf, internal::perf);
|
||||
CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErfc, std::erfc, internal::perfc);
|
||||
|
||||
Reference in New Issue
Block a user