ADD: added other eigen lib

2022-12-21 16:19:04 +01:00
parent a570766dc6
commit 9e56c7f2c0
832 changed files with 36586 additions and 20006 deletions
--- a/libs/eigen/unsupported/test/CMakeLists.txt
+++ b/libs/eigen/unsupported/test/CMakeLists.txt
@@ -26,11 +26,7 @@ find_package(Adolc)
 if(Adolc_FOUND)
  include_directories(${ADOLC_INCLUDES})
  ei_add_property(EIGEN_TESTED_BACKENDS "Adolc, ")
-  if(EIGEN_TEST_CXX11)
-    ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES})
-  else()
-    message(STATUS "Adolc found, but tests require C++11 mode")
-  endif()
+  ei_add_test(forward_adolc "" ${ADOLC_LIBRARIES})
 else()
  ei_add_property(EIGEN_MISSING_BACKENDS "Adolc, ")
 endif()
@@ -56,14 +52,16 @@ ei_add_test(FFT)
 ei_add_test(EulerAngles)

 find_package(MPREAL)
-if(MPREAL_FOUND AND EIGEN_COMPILER_SUPPORT_CPP11)
+if(MPREAL_FOUND)
  ei_add_property(EIGEN_TESTED_BACKENDS "MPFR C++, ")
  include_directories(${MPREAL_INCLUDES})
-  ei_add_test(mpreal_support "-std=c++11" "${MPREAL_LIBRARIES}" )
+  ei_add_test(mpreal_support "" "${MPREAL_LIBRARIES}" )
 else()
  ei_add_property(EIGEN_MISSING_BACKENDS "MPFR C++, ")
 endif()

+ei_add_test(NNLS)
+
 ei_add_test(sparse_extra   "" "")

 find_package(FFTW)
@@ -79,6 +77,17 @@ else()
  ei_add_property(EIGEN_MISSING_BACKENDS "fftw, ")
 endif()

+find_path(POCKETFFT  pocketfft_hdronly.h)
+if(POCKETFFT)
+  if(EIGEN_TEST_CXX11)
+    ei_add_property(EIGEN_TESTED_BACKENDS "pocketfft, ")
+    include_directories( ${POCKETFFT} )
+    ei_add_test(pocketfft "-pthread" "${CMAKE_THREAD_LIBS_INIT}" "-DEIGEN_POCKETFFT_DEFAULT" )  
+  endif()  
+else()
+  ei_add_property(EIGEN_MISSING_BACKENDS "pocketfft, ")
+endif()
+
 option(EIGEN_TEST_OPENGL "Enable OpenGL support in unit tests" OFF)
 if(EIGEN_TEST_OPENGL)
  find_package(OpenGL)
@@ -103,229 +112,222 @@ ei_add_test(gmres)
 ei_add_test(dgmres)
 ei_add_test(minres)
 ei_add_test(idrs)
+ei_add_test(bicgstabl)
+ei_add_test(idrstabl)
 ei_add_test(levenberg_marquardt)
 ei_add_test(kronecker_product)
 ei_add_test(bessel_functions)
 ei_add_test(special_functions)
 ei_add_test(special_packetmath "-DEIGEN_FAST_MATH=1")

-if(EIGEN_TEST_CXX11)
-  if(EIGEN_TEST_SYCL)
-    set(EIGEN_SYCL ON)
-    # Forward CMake options as preprocessor definitions
-    if(EIGEN_SYCL_USE_DEFAULT_SELECTOR)
-      add_definitions(-DEIGEN_SYCL_USE_DEFAULT_SELECTOR=${EIGEN_SYCL_USE_DEFAULT_SELECTOR})
-    endif()
-    if(EIGEN_SYCL_NO_LOCAL_MEM)
-      add_definitions(-DEIGEN_SYCL_NO_LOCAL_MEM=${EIGEN_SYCL_NO_LOCAL_MEM})
-    endif()
-    if(EIGEN_SYCL_LOCAL_MEM)
-      add_definitions(-DEIGEN_SYCL_LOCAL_MEM=${EIGEN_SYCL_LOCAL_MEM})
-    endif()
-    if(EIGEN_SYCL_MAX_GLOBAL_RANGE)
-      add_definitions(-DEIGEN_SYCL_MAX_GLOBAL_RANGE=${EIGEN_SYCL_MAX_GLOBAL_RANGE})
-    endif()
-    if(EIGEN_SYCL_LOCAL_THREAD_DIM0)
-      add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM0=${EIGEN_SYCL_LOCAL_THREAD_DIM0})
-    endif()
-    if(EIGEN_SYCL_LOCAL_THREAD_DIM1)
-      add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM1=${EIGEN_SYCL_LOCAL_THREAD_DIM1})
-    endif()
-    if(EIGEN_SYCL_REG_M)
-      add_definitions(-DEIGEN_SYCL_REG_M=${EIGEN_SYCL_REG_M})
-    endif()
-    if(EIGEN_SYCL_REG_N)
-      add_definitions(-DEIGEN_SYCL_REG_N=${EIGEN_SYCL_REG_N})
-    endif()
-    if(EIGEN_SYCL_USE_PROGRAM_CLASS)
-      add_definitions(-DEIGEN_SYCL_USE_PROGRAM_CLASS=${EIGEN_SYCL_USE_PROGRAM_CLASS})
-    endif()
-    if(EIGEN_SYCL_ASYNC_EXECUTION)
-      add_definitions(-DEIGEN_SYCL_ASYNC_EXECUTION=${EIGEN_SYCL_ASYNC_EXECUTION})
-    endif()
-    if(EIGEN_SYCL_DISABLE_SKINNY)
-      add_definitions(-DEIGEN_SYCL_DISABLE_SKINNY=${EIGEN_SYCL_DISABLE_SKINNY})
-    endif()
-    if(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER)
+if(EIGEN_TEST_SYCL)
+  set(EIGEN_SYCL ON)
+  # Forward CMake options as preprocessor definitions
+  if(EIGEN_SYCL_USE_DEFAULT_SELECTOR)
+    add_definitions(-DEIGEN_SYCL_USE_DEFAULT_SELECTOR=${EIGEN_SYCL_USE_DEFAULT_SELECTOR})
+  endif()
+  if(EIGEN_SYCL_NO_LOCAL_MEM)
+    add_definitions(-DEIGEN_SYCL_NO_LOCAL_MEM=${EIGEN_SYCL_NO_LOCAL_MEM})
+  endif()
+  if(EIGEN_SYCL_LOCAL_MEM)
+    add_definitions(-DEIGEN_SYCL_LOCAL_MEM=${EIGEN_SYCL_LOCAL_MEM})
+  endif()
+  if(EIGEN_SYCL_MAX_GLOBAL_RANGE)
+    add_definitions(-DEIGEN_SYCL_MAX_GLOBAL_RANGE=${EIGEN_SYCL_MAX_GLOBAL_RANGE})
+  endif()
+  if(EIGEN_SYCL_LOCAL_THREAD_DIM0)
+    add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM0=${EIGEN_SYCL_LOCAL_THREAD_DIM0})
+  endif()
+  if(EIGEN_SYCL_LOCAL_THREAD_DIM1)
+    add_definitions(-DEIGEN_SYCL_LOCAL_THREAD_DIM1=${EIGEN_SYCL_LOCAL_THREAD_DIM1})
+  endif()
+  if(EIGEN_SYCL_REG_M)
+    add_definitions(-DEIGEN_SYCL_REG_M=${EIGEN_SYCL_REG_M})
+  endif()
+  if(EIGEN_SYCL_REG_N)
+    add_definitions(-DEIGEN_SYCL_REG_N=${EIGEN_SYCL_REG_N})
+  endif()
+  if(EIGEN_SYCL_USE_PROGRAM_CLASS)
+    add_definitions(-DEIGEN_SYCL_USE_PROGRAM_CLASS=${EIGEN_SYCL_USE_PROGRAM_CLASS})
+  endif()
+  if(EIGEN_SYCL_ASYNC_EXECUTION)
+    add_definitions(-DEIGEN_SYCL_ASYNC_EXECUTION=${EIGEN_SYCL_ASYNC_EXECUTION})
+  endif()
+  if(EIGEN_SYCL_DISABLE_SKINNY)
+    add_definitions(-DEIGEN_SYCL_DISABLE_SKINNY=${EIGEN_SYCL_DISABLE_SKINNY})
+  endif()
+  if(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER)
    add_definitions(-DEIGEN_SYCL_DISABLE_DOUBLE_BUFFER=${EIGEN_SYCL_DISABLE_DOUBLE_BUFFER})
  endif()
-    if(EIGEN_SYCL_DISABLE_RANK1)
-      add_definitions(-DEIGEN_SYCL_DISABLE_RANK1=${EIGEN_SYCL_DISABLE_RANK1})
-    endif()
-    if(EIGEN_SYCL_DISABLE_SCALAR)
-      add_definitions(-DEIGEN_SYCL_DISABLE_SCALAR=${EIGEN_SYCL_DISABLE_SCALAR})
-    endif()
-    if(EIGEN_SYCL_DISABLE_GEMV)
-      add_definitions(-DEIGEN_SYCL_DISABLE_GEMV=${EIGEN_SYCL_DISABLE_GEMV})
-    endif()
-    if(EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION)
-      add_definitions(-DEIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=${EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION})
-    endif()
-
-    if(EIGEN_SYCL_TRISYCL)
-      # triSYCL now requires c++17.
-      set(CMAKE_CXX_STANDARD 17)
-    else()
-      if(MSVC)
-        # Set the host and device compilers C++ standard to C++14. On Windows setting this to C++11
-        # can cause issues with the ComputeCpp device compiler parsing Visual Studio Headers.
-        set(CMAKE_CXX_STANDARD 14)
-        list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
-      else()
-        set(CMAKE_CXX_STANDARD 11)
-        list(APPEND COMPUTECPP_USER_FLAGS -Wall)
-      endif()
-      # The following flags are not supported by Clang and can cause warnings
-      # if used with -Werror so they are removed here.
-      if(COMPUTECPP_USE_COMPILER_DRIVER)
-        set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
-        string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-        string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-        string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
-      endif()
-      list(APPEND COMPUTECPP_USER_FLAGS
-          -DEIGEN_NO_ASSERTION_CHECKING=1
-          -no-serial-memop
-          -Xclang
-          -cl-mad-enable)
-    endif()
-
-    ei_add_test(cxx11_tensor_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_image_op_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_math_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_forced_eval_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_broadcast_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_device_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_reduction_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_morphing_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_shuffling_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_padding_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_builtins_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_contract_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_concatenation_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_reverse_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_convolution_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_striding_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_chipping_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_layout_swap_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_inflation_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_random_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_generator_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_patch_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_image_patch_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_volume_patch_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_argmax_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_custom_op_sycl ${STD_CXX_FLAG})
-    ei_add_test(cxx11_tensor_scan_sycl ${STD_CXX_FLAG})
-    set(EIGEN_SYCL OFF)
+  if(EIGEN_SYCL_DISABLE_RANK1)
+    add_definitions(-DEIGEN_SYCL_DISABLE_RANK1=${EIGEN_SYCL_DISABLE_RANK1})
+  endif()
+  if(EIGEN_SYCL_DISABLE_SCALAR)
+    add_definitions(-DEIGEN_SYCL_DISABLE_SCALAR=${EIGEN_SYCL_DISABLE_SCALAR})
+  endif()
+  if(EIGEN_SYCL_DISABLE_GEMV)
+    add_definitions(-DEIGEN_SYCL_DISABLE_GEMV=${EIGEN_SYCL_DISABLE_GEMV})
+  endif()
+  if(EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION)
+    add_definitions(-DEIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION=${EIGEN_SYCL_DISABLE_ARM_GPU_CACHE_OPTIMISATION})
  endif()

-  ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+  if(EIGEN_SYCL_TRISYCL)
+    # triSYCL now requires c++17.
+    set(CMAKE_CXX_STANDARD 17)
+  else()
+    if(MSVC)
+      list(APPEND COMPUTECPP_USER_FLAGS -DWIN32)
+    else()
+      list(APPEND COMPUTECPP_USER_FLAGS -Wall)
+    endif()
+    # The following flags are not supported by Clang and can cause warnings
+    # if used with -Werror so they are removed here.
+    if(COMPUTECPP_USE_COMPILER_DRIVER)
+      set(CMAKE_CXX_COMPILER ${ComputeCpp_DEVICE_COMPILER_EXECUTABLE})
+      string(REPLACE "-Wlogical-op" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+      string(REPLACE "-Wno-psabi" "" CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS})
+    endif()
+    list(APPEND COMPUTECPP_USER_FLAGS
+        -DEIGEN_NO_ASSERTION_CHECKING=1
+        -no-serial-memop
+        -Xclang
+        -cl-mad-enable)
+  endif()

-  ei_add_test(cxx11_meta)
-  ei_add_test(cxx11_maxsizevector)
-  ei_add_test(cxx11_tensor_argmax)
-  ei_add_test(cxx11_tensor_assign)
-  ei_add_test(cxx11_tensor_block_access)
-  ei_add_test(cxx11_tensor_block_eval)
-  ei_add_test(cxx11_tensor_block_io)
-  ei_add_test(cxx11_tensor_broadcasting)
-  ei_add_test(cxx11_tensor_casts)
-  ei_add_test(cxx11_tensor_chipping)
-  ei_add_test(cxx11_tensor_comparisons)
-  ei_add_test(cxx11_tensor_concatenation)
-  ei_add_test(cxx11_tensor_const)
-  ei_add_test(cxx11_tensor_contraction)
-  ei_add_test(cxx11_tensor_convolution)
-  ei_add_test(cxx11_tensor_custom_index)
-  ei_add_test(cxx11_tensor_custom_op)
-  ei_add_test(cxx11_tensor_dimension)
-  ei_add_test(cxx11_tensor_empty)
-  ei_add_test(cxx11_tensor_executor "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_tensor_expr)
-  ei_add_test(cxx11_tensor_fft)
-  ei_add_test(cxx11_tensor_fixed_size)
-  ei_add_test(cxx11_tensor_forced_eval)
-  ei_add_test(cxx11_tensor_generator)
-  ei_add_test(cxx11_tensor_ifft)
-  ei_add_test(cxx11_tensor_image_patch)
-  ei_add_test(cxx11_tensor_index_list)
-  ei_add_test(cxx11_tensor_inflation)
-  ei_add_test(cxx11_tensor_intdiv)
-  ei_add_test(cxx11_tensor_io)
-  ei_add_test(cxx11_tensor_layout_swap)
-  ei_add_test(cxx11_tensor_lvalue)
-  ei_add_test(cxx11_tensor_map)
-  ei_add_test(cxx11_tensor_math)
-  ei_add_test(cxx11_tensor_mixed_indices)
-  ei_add_test(cxx11_tensor_morphing)
-  ei_add_test(cxx11_tensor_move)
-  ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_tensor_of_complex)
-  ei_add_test(cxx11_tensor_of_const_values)
-  ei_add_test(cxx11_tensor_of_strings)
-  ei_add_test(cxx11_tensor_padding)
-  ei_add_test(cxx11_tensor_patch)
-  ei_add_test(cxx11_tensor_random)
-  ei_add_test(cxx11_tensor_reduction)
-  ei_add_test(cxx11_tensor_ref)
-  ei_add_test(cxx11_tensor_roundings)
-  ei_add_test(cxx11_tensor_scan)
-  ei_add_test(cxx11_tensor_shuffling)
-  ei_add_test(cxx11_tensor_simple)
-  ei_add_test(cxx11_tensor_striding)
-  ei_add_test(cxx11_tensor_sugar)
-  ei_add_test(cxx11_tensor_thread_local "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
-  ei_add_test(cxx11_tensor_trace)
-  ei_add_test(cxx11_tensor_volume_patch)
+  ei_add_test(cxx11_tensor_sycl)
+  ei_add_test(cxx11_tensor_image_op_sycl)
+  ei_add_test(cxx11_tensor_math_sycl)
+  ei_add_test(cxx11_tensor_forced_eval_sycl)
+  ei_add_test(cxx11_tensor_broadcast_sycl)
+  ei_add_test(cxx11_tensor_device_sycl)
+  ei_add_test(cxx11_tensor_reduction_sycl)
+  ei_add_test(cxx11_tensor_morphing_sycl)
+  ei_add_test(cxx11_tensor_shuffling_sycl)
+  ei_add_test(cxx11_tensor_padding_sycl)
+  ei_add_test(cxx11_tensor_builtins_sycl)
+  ei_add_test(cxx11_tensor_contract_sycl)
+  ei_add_test(cxx11_tensor_concatenation_sycl)
+  ei_add_test(cxx11_tensor_reverse_sycl)
+  ei_add_test(cxx11_tensor_convolution_sycl)
+  ei_add_test(cxx11_tensor_striding_sycl)
+  ei_add_test(cxx11_tensor_chipping_sycl)
+  ei_add_test(cxx11_tensor_layout_swap_sycl)
+  ei_add_test(cxx11_tensor_inflation_sycl)
+  ei_add_test(cxx11_tensor_random_sycl)
+  ei_add_test(cxx11_tensor_generator_sycl)
+  ei_add_test(cxx11_tensor_patch_sycl)
+  ei_add_test(cxx11_tensor_image_patch_sycl)
+  ei_add_test(cxx11_tensor_volume_patch_sycl)
+  ei_add_test(cxx11_tensor_argmax_sycl)
+  ei_add_test(cxx11_tensor_custom_op_sycl)
+  ei_add_test(cxx11_tensor_scan_sycl)
+  set(EIGEN_SYCL OFF)
+endif()
+
+ei_add_test(cxx11_eventcount "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_runqueue "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_non_blocking_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+
+ei_add_test(cxx11_meta)
+ei_add_test(cxx11_maxsizevector)
+ei_add_test(cxx11_tensor_argmax)
+ei_add_test(cxx11_tensor_assign)
+ei_add_test(cxx11_tensor_block_access)
+ei_add_test(cxx11_tensor_block_eval)
+ei_add_test(cxx11_tensor_block_io)
+ei_add_test(cxx11_tensor_broadcasting)
+ei_add_test(cxx11_tensor_casts)
+ei_add_test(cxx11_tensor_chipping)
+ei_add_test(cxx11_tensor_comparisons)
+ei_add_test(cxx11_tensor_concatenation)
+ei_add_test(cxx11_tensor_const)
+ei_add_test(cxx11_tensor_contraction)
+ei_add_test(cxx11_tensor_convolution)
+ei_add_test(cxx11_tensor_custom_index)
+ei_add_test(cxx11_tensor_custom_op)
+ei_add_test(cxx11_tensor_dimension)
+ei_add_test(cxx11_tensor_empty)
+ei_add_test(cxx11_tensor_executor "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_tensor_expr)
+ei_add_test(cxx11_tensor_fft)
+ei_add_test(cxx11_tensor_fixed_size)
+ei_add_test(cxx11_tensor_forced_eval)
+ei_add_test(cxx11_tensor_generator)
+ei_add_test(cxx11_tensor_ifft)
+ei_add_test(cxx11_tensor_image_patch)
+ei_add_test(cxx11_tensor_index_list)
+ei_add_test(cxx11_tensor_inflation)
+ei_add_test(cxx11_tensor_intdiv)
+ei_add_test(cxx11_tensor_io)
+ei_add_test(cxx11_tensor_layout_swap)
+ei_add_test(cxx11_tensor_lvalue)
+ei_add_test(cxx11_tensor_map)
+ei_add_test(cxx11_tensor_math)
+ei_add_test(cxx11_tensor_mixed_indices)
+ei_add_test(cxx11_tensor_morphing)
+ei_add_test(cxx11_tensor_move)
+ei_add_test(cxx11_tensor_notification "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_tensor_of_complex)
+ei_add_test(cxx11_tensor_of_const_values)
+ei_add_test(cxx11_tensor_of_strings)
+ei_add_test(cxx11_tensor_padding)
+ei_add_test(cxx11_tensor_patch)
+ei_add_test(cxx11_tensor_random)
+ei_add_test(cxx11_tensor_reduction)
+ei_add_test(cxx11_tensor_ref)
+ei_add_test(cxx11_tensor_roundings)
+ei_add_test(cxx11_tensor_scan)
+ei_add_test(cxx11_tensor_shuffling)
+ei_add_test(cxx11_tensor_simple)
+ei_add_test(cxx11_tensor_striding)
+ei_add_test(cxx11_tensor_sugar)
+ei_add_test(cxx11_tensor_thread_local "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_tensor_thread_pool "-pthread" "${CMAKE_THREAD_LIBS_INIT}")
+ei_add_test(cxx11_tensor_trace)
+ei_add_test(cxx11_tensor_volume_patch)
 #  ei_add_test(cxx11_tensor_symmetry)
 if("${CMAKE_SIZEOF_VOID_P}" EQUAL "8" AND NOT CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
  # This test requires __uint128_t which is only available on 64bit systems
  ei_add_test(cxx11_tensor_uint128)
 endif()

-endif()
-
-# These tests needs nvcc
-find_package(CUDA 7.0)
+find_package(CUDA 9.0)
 if(CUDA_FOUND AND EIGEN_TEST_CUDA)
  # Make sure to compile without the -pedantic, -Wundef, -Wnon-virtual-dtor
  # and -fno-check-new flags since they trigger thousands of compilation warnings
  # in the CUDA runtime
-  # Also remove -ansi that is incompatible with std=c++11.
  string(REPLACE "-pedantic" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
  string(REPLACE "-Wundef" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
  string(REPLACE "-Wnon-virtual-dtor" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
  string(REPLACE "-fno-check-new" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
-  string(REPLACE "-ansi" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")

-  message(STATUS "Flags used to compile cuda code: " ${CMAKE_CXX_FLAGS})
-
-  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
-    set(CUDA_NVCC_FLAGS "-ccbin ${CMAKE_C_COMPILER}" CACHE STRING "nvcc flags" FORCE)
-  endif()
  if(EIGEN_TEST_CUDA_CLANG)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
    string(APPEND CMAKE_CXX_FLAGS " --cuda-path=${CUDA_TOOLKIT_ROOT_DIR}")
    foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
        string(APPEND CMAKE_CXX_FLAGS " --cuda-gpu-arch=sm_${ARCH}")
    endforeach()
+    string(APPEND CMAKE_CXX_FLAGS " ${EIGEN_CUDA_CXX_FLAGS}")
+  else()
+    set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+    set(NVCC_ARCH_FLAGS)
+    # Define an -arch=sm_<arch>, otherwise if GPU does not exactly match one of
+    # those in the arch list for -gencode, the kernels will fail to run with
+    #    cudaErrorNoKernelImageForDevice
+    # This can happen with newer cards (e.g. sm_75) and compiling with older
+    # versions of nvcc (e.g. 9.2) that do not support their specific arch.
+    list(LENGTH EIGEN_CUDA_COMPUTE_ARCH EIGEN_CUDA_COMPUTE_ARCH_SIZE)
+    if(EIGEN_CUDA_COMPUTE_ARCH_SIZE)
+      list(GET EIGEN_CUDA_COMPUTE_ARCH 0 EIGEN_CUDA_COMPUTE_DEFAULT)
+      set(NVCC_ARCH_FLAGS " -arch=sm_${EIGEN_CUDA_COMPUTE_DEFAULT}")
+    endif()
+    foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
+      string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
+    endforeach()
+    set(CUDA_NVCC_FLAGS  "--expt-relaxed-constexpr -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS} ${EIGEN_CUDA_CXX_FLAGS}")
+    cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
  endif()
-
-  set(EIGEN_CUDA_RELAXED_CONSTEXPR "--expt-relaxed-constexpr")
-  if (${CUDA_VERSION} STREQUAL "7.0")
-    set(EIGEN_CUDA_RELAXED_CONSTEXPR "--relaxed-constexpr")
-  endif()
-
-  set(NVCC_ARCH_FLAGS)
-  foreach(ARCH IN LISTS EIGEN_CUDA_COMPUTE_ARCH)
-    string(APPEND NVCC_ARCH_FLAGS " -gencode arch=compute_${ARCH},code=sm_${ARCH}")
-  endforeach()
-  set(CUDA_NVCC_FLAGS  "${EIGEN_CUDA_RELAXED_CONSTEXPR} -Xcudafe \"--display_error_number\" ${NVCC_ARCH_FLAGS} ${CUDA_NVCC_FLAGS}")
-  cuda_include_directories("${CMAKE_CURRENT_BINARY_DIR}" "${CUDA_TOOLKIT_ROOT_DIR}/include")
+  
  set(EIGEN_ADD_TEST_FILENAME_EXTENSION "cu")

  ei_add_test(cxx11_tensor_complex_gpu)
@@ -355,7 +357,6 @@ if(CUDA_FOUND AND EIGEN_TEST_CUDA)
    ei_add_test(cxx11_tensor_random_gpu)
  endif()

-
  unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
 endif()

@@ -365,52 +366,46 @@ if (EIGEN_TEST_HIP)
  set(HIP_PATH "/opt/rocm/hip" CACHE STRING "Path to the HIP installation.")

  if (EXISTS ${HIP_PATH})
-
    list(APPEND CMAKE_MODULE_PATH ${HIP_PATH}/cmake)

    find_package(HIP REQUIRED)
    if (HIP_FOUND)
-
      execute_process(COMMAND ${HIP_PATH}/bin/hipconfig --platform OUTPUT_VARIABLE HIP_PLATFORM)

      if ((${HIP_PLATFORM} STREQUAL "hcc") OR (${HIP_PLATFORM} STREQUAL "amd"))
+        include_directories(${CMAKE_CURRENT_BINARY_DIR})
+        include_directories(${HIP_PATH}/include)

-	include_directories(${CMAKE_CURRENT_BINARY_DIR})
-	include_directories(${HIP_PATH}/include)
+        set(EIGEN_ADD_TEST_FILENAME_EXTENSION  "cu")
+        #
+        # complex datatype is not yet supported by HIP
+        # so leaving out those tests for now
+        #
+        # ei_add_test(cxx11_tensor_complex_gpu)
+        # ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
+        #
+        ei_add_test(cxx11_tensor_reduction_gpu)
+        ei_add_test(cxx11_tensor_argmax_gpu)
+        ei_add_test(cxx11_tensor_cast_float16_gpu)
+        ei_add_test(cxx11_tensor_scan_gpu)
+        ei_add_test(cxx11_tensor_device)

-	set(EIGEN_ADD_TEST_FILENAME_EXTENSION  "cu")
-	#
-	# complex datatype is not yet supported by HIP
-	# so leaving out those tests for now
-	#
-	# ei_add_test(cxx11_tensor_complex_gpu)
-	# ei_add_test(cxx11_tensor_complex_cwise_ops_gpu)
-	#
-	ei_add_test(cxx11_tensor_reduction_gpu)
-	ei_add_test(cxx11_tensor_argmax_gpu)
-	ei_add_test(cxx11_tensor_cast_float16_gpu)
-	ei_add_test(cxx11_tensor_scan_gpu)
-	ei_add_test(cxx11_tensor_device)
+        ei_add_test(cxx11_tensor_gpu)
+        ei_add_test(cxx11_tensor_contract_gpu)
+        ei_add_test(cxx11_tensor_of_float16_gpu)
+        ei_add_test(cxx11_tensor_of_bfloat16_gpu)
+        ei_add_test(cxx11_tensor_random_gpu)

-	ei_add_test(cxx11_tensor_gpu)
-	ei_add_test(cxx11_tensor_contract_gpu)
-	ei_add_test(cxx11_tensor_of_float16_gpu)
-	ei_add_test(cxx11_tensor_random_gpu)
-
-	unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)
+        unset(EIGEN_ADD_TEST_FILENAME_EXTENSION)

      elseif ((${HIP_PLATFORM} STREQUAL "nvcc") OR (${HIP_PLATFORM} STREQUAL "nvidia"))
-	message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
+        message(FATAL_ERROR "HIP_PLATFORM = nvcc is not supported within Eigen")
      else ()
-	message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
+        message(FATAL_ERROR "Unknown HIP_PLATFORM = ${HIP_PLATFORM}")
      endif()
-
    endif()
-
  else ()
-
    message(FATAL_ERROR "EIGEN_TEST_HIP is ON, but the specified HIP_PATH (${HIP_PATH}) does not exist")
-
  endif()

 endif()
--- a/libs/eigen/unsupported/test/FFT.cpp
+++ b/libs/eigen/unsupported/test/FFT.cpp
@@ -1,2 +1,2 @@
-#define test_FFTW test_FFT
-#include "FFTW.cpp"
+#define EIGEN_FFT_DEFAULT 1
+#include "fft_test_shared.h"
--- a/libs/eigen/unsupported/test/FFTW.cpp
+++ b/libs/eigen/unsupported/test/FFTW.cpp
@@ -1,262 +1,2 @@
-// This file is part of Eigen, a lightweight C++ template library
-// for linear algebra.
-//
-// Copyright (C) 2009 Mark Borgerding mark a borgerding net
-//
-// This Source Code Form is subject to the terms of the Mozilla
-// Public License v. 2.0. If a copy of the MPL was not distributed
-// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
-#include "main.h"
-#include <unsupported/Eigen/FFT>
-
-template <typename T> 
-std::complex<T> RandomCpx() { return std::complex<T>( (T)(rand()/(T)RAND_MAX - .5), (T)(rand()/(T)RAND_MAX - .5) ); }
-
-using namespace std;
-using namespace Eigen;
-
-
-template < typename T>
-complex<long double>  promote(complex<T> x) { return complex<long double>((long double)x.real(),(long double)x.imag()); }
-
-complex<long double>  promote(float x) { return complex<long double>((long double)x); }
-complex<long double>  promote(double x) { return complex<long double>((long double)x); }
-complex<long double>  promote(long double x) { return complex<long double>((long double)x); }
-    
-
-    template <typename VT1,typename VT2>
-    long double fft_rmse( const VT1 & fftbuf,const VT2 & timebuf)
-    {
-        long double totalpower=0;
-        long double difpower=0;
-        long double pi = acos((long double)-1 );
-        for (size_t k0=0;k0<(size_t)fftbuf.size();++k0) {
-            complex<long double> acc = 0;
-            long double phinc = (long double)(-2.)*k0* pi / timebuf.size();
-            for (size_t k1=0;k1<(size_t)timebuf.size();++k1) {
-                acc +=  promote( timebuf[k1] ) * exp( complex<long double>(0,k1*phinc) );
-            }
-            totalpower += numext::abs2(acc);
-            complex<long double> x = promote(fftbuf[k0]); 
-            complex<long double> dif = acc - x;
-            difpower += numext::abs2(dif);
-            //cerr << k0 << "\t" << acc << "\t" <<  x << "\t" << sqrt(numext::abs2(dif)) << endl;
-        }
-        cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
-        return sqrt(difpower/totalpower);
-    }
-
-    template <typename VT1,typename VT2>
-    long double dif_rmse( const VT1 buf1,const VT2 buf2)
-    {
-        long double totalpower=0;
-        long double difpower=0;
-        size_t n = (min)( buf1.size(),buf2.size() );
-        for (size_t k=0;k<n;++k) {
-            totalpower += (long double)((numext::abs2( buf1[k] ) + numext::abs2(buf2[k]) )/2);
-            difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
-        }
-        return sqrt(difpower/totalpower);
-    }
-
-enum { StdVectorContainer, EigenVectorContainer };
-
-template<int Container, typename Scalar> struct VectorType;
-
-template<typename Scalar> struct VectorType<StdVectorContainer,Scalar>
-{
-  typedef vector<Scalar> type;
-};
-
-template<typename Scalar> struct VectorType<EigenVectorContainer,Scalar>
-{
-  typedef Matrix<Scalar,Dynamic,1> type;
-};
-
-template <int Container, typename T>
-void test_scalar_generic(int nfft)
-{
-    typedef typename FFT<T>::Complex Complex;
-    typedef typename FFT<T>::Scalar Scalar;
-    typedef typename VectorType<Container,Scalar>::type ScalarVector;
-    typedef typename VectorType<Container,Complex>::type ComplexVector;
-
-    FFT<T> fft;
-    ScalarVector tbuf(nfft);
-    ComplexVector freqBuf;
-    for (int k=0;k<nfft;++k)
-        tbuf[k]= (T)( rand()/(double)RAND_MAX - .5);
-
-    // make sure it DOESN'T give the right full spectrum answer
-    // if we've asked for half-spectrum
-    fft.SetFlag(fft.HalfSpectrum );
-    fft.fwd( freqBuf,tbuf);
-    VERIFY((size_t)freqBuf.size() == (size_t)( (nfft>>1)+1) );
-    VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>()  );// gross check
-
-    fft.ClearFlag(fft.HalfSpectrum );
-    fft.fwd( freqBuf,tbuf);
-    VERIFY( (size_t)freqBuf.size() == (size_t)nfft);
-    VERIFY( T(fft_rmse(freqBuf,tbuf)) < test_precision<T>()  );// gross check
-
-    if (nfft&1)
-        return; // odd FFTs get the wrong size inverse FFT
-
-    ScalarVector tbuf2;
-    fft.inv( tbuf2 , freqBuf);
-    VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>()  );// gross check
-
-
-    // verify that the Unscaled flag takes effect
-    ScalarVector tbuf3;
-    fft.SetFlag(fft.Unscaled);
-
-    fft.inv( tbuf3 , freqBuf);
-
-    for (int k=0;k<nfft;++k)
-        tbuf3[k] *= T(1./nfft);
-
-
-    //for (size_t i=0;i<(size_t) tbuf.size();++i)
-    //    cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " -  in=" << tbuf[i] << " => " << (tbuf3[i] - tbuf[i] ) <<  endl;
-
-    VERIFY( T(dif_rmse(tbuf,tbuf3)) < test_precision<T>()  );// gross check
-
-    // verify that ClearFlag works
-    fft.ClearFlag(fft.Unscaled);
-    fft.inv( tbuf2 , freqBuf);
-    VERIFY( T(dif_rmse(tbuf,tbuf2)) < test_precision<T>()  );// gross check
-}
-
-template <typename T>
-void test_scalar(int nfft)
-{
-  test_scalar_generic<StdVectorContainer,T>(nfft);
-  //test_scalar_generic<EigenVectorContainer,T>(nfft);
-}
-
-
-template <int Container, typename T>
-void test_complex_generic(int nfft)
-{
-    typedef typename FFT<T>::Complex Complex;
-    typedef typename VectorType<Container,Complex>::type ComplexVector;
-
-    FFT<T> fft;
-
-    ComplexVector inbuf(nfft);
-    ComplexVector outbuf;
-    ComplexVector buf3;
-    for (int k=0;k<nfft;++k)
-        inbuf[k]= Complex( (T)(rand()/(double)RAND_MAX - .5), (T)(rand()/(double)RAND_MAX - .5) );
-    fft.fwd( outbuf , inbuf);
-
-    VERIFY( T(fft_rmse(outbuf,inbuf)) < test_precision<T>()  );// gross check
-    fft.inv( buf3 , outbuf);
-
-    VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>()  );// gross check
-
-    // verify that the Unscaled flag takes effect
-    ComplexVector buf4;
-    fft.SetFlag(fft.Unscaled);
-    fft.inv( buf4 , outbuf);
-    for (int k=0;k<nfft;++k)
-        buf4[k] *= T(1./nfft);
-    VERIFY( T(dif_rmse(inbuf,buf4)) < test_precision<T>()  );// gross check
-
-    // verify that ClearFlag works
-    fft.ClearFlag(fft.Unscaled);
-    fft.inv( buf3 , outbuf);
-    VERIFY( T(dif_rmse(inbuf,buf3)) < test_precision<T>()  );// gross check
-}
-
-template <typename T>
-void test_complex(int nfft)
-{
-  test_complex_generic<StdVectorContainer,T>(nfft);
-  test_complex_generic<EigenVectorContainer,T>(nfft);
-}
-/*
-template <typename T,int nrows,int ncols>
-void test_complex2d()
-{
-    typedef typename Eigen::FFT<T>::Complex Complex;
-    FFT<T> fft;
-    Eigen::Matrix<Complex,nrows,ncols> src,src2,dst,dst2;
-
-    src = Eigen::Matrix<Complex,nrows,ncols>::Random();
-    //src =  Eigen::Matrix<Complex,nrows,ncols>::Identity();
-
-    for (int k=0;k<ncols;k++) {
-        Eigen::Matrix<Complex,nrows,1> tmpOut;
-        fft.fwd( tmpOut,src.col(k) );
-        dst2.col(k) = tmpOut;
-    }
-
-    for (int k=0;k<nrows;k++) {
-        Eigen::Matrix<Complex,1,ncols> tmpOut;
-        fft.fwd( tmpOut,  dst2.row(k) );
-        dst2.row(k) = tmpOut;
-    }
-
-    fft.fwd2(dst.data(),src.data(),ncols,nrows);
-    fft.inv2(src2.data(),dst.data(),ncols,nrows);
-    VERIFY( (src-src2).norm() < test_precision<T>() );
-    VERIFY( (dst-dst2).norm() < test_precision<T>() );
-}
-*/
-
-
-void test_return_by_value(int len)
-{
-    VectorXf in;
-    VectorXf in1;
-    in.setRandom( len );
-    VectorXcf out1,out2;
-    FFT<float> fft;
-
-    fft.SetFlag(fft.HalfSpectrum );
-
-    fft.fwd(out1,in);
-    out2 = fft.fwd(in);
-    VERIFY( (out1-out2).norm() < test_precision<float>() );
-    in1 = fft.inv(out1);
-    VERIFY( (in1-in).norm() < test_precision<float>() );
-}
-
-EIGEN_DECLARE_TEST(FFTW)
-{
-  CALL_SUBTEST( test_return_by_value(32) );
-  //CALL_SUBTEST( ( test_complex2d<float,4,8> () ) ); CALL_SUBTEST( ( test_complex2d<double,4,8> () ) );
-  //CALL_SUBTEST( ( test_complex2d<long double,4,8> () ) );
-  CALL_SUBTEST( test_complex<float>(32) ); CALL_SUBTEST( test_complex<double>(32) ); 
-  CALL_SUBTEST( test_complex<float>(256) ); CALL_SUBTEST( test_complex<double>(256) ); 
-  CALL_SUBTEST( test_complex<float>(3*8) ); CALL_SUBTEST( test_complex<double>(3*8) ); 
-  CALL_SUBTEST( test_complex<float>(5*32) ); CALL_SUBTEST( test_complex<double>(5*32) ); 
-  CALL_SUBTEST( test_complex<float>(2*3*4) ); CALL_SUBTEST( test_complex<double>(2*3*4) ); 
-  CALL_SUBTEST( test_complex<float>(2*3*4*5) ); CALL_SUBTEST( test_complex<double>(2*3*4*5) ); 
-  CALL_SUBTEST( test_complex<float>(2*3*4*5*7) ); CALL_SUBTEST( test_complex<double>(2*3*4*5*7) ); 
-
-  CALL_SUBTEST( test_scalar<float>(32) ); CALL_SUBTEST( test_scalar<double>(32) ); 
-  CALL_SUBTEST( test_scalar<float>(45) ); CALL_SUBTEST( test_scalar<double>(45) ); 
-  CALL_SUBTEST( test_scalar<float>(50) ); CALL_SUBTEST( test_scalar<double>(50) ); 
-  CALL_SUBTEST( test_scalar<float>(256) ); CALL_SUBTEST( test_scalar<double>(256) ); 
-  CALL_SUBTEST( test_scalar<float>(2*3*4*5*7) ); CALL_SUBTEST( test_scalar<double>(2*3*4*5*7) ); 
-  
-  #ifdef EIGEN_HAS_FFTWL
-  CALL_SUBTEST( test_complex<long double>(32) );
-  CALL_SUBTEST( test_complex<long double>(256) );
-  CALL_SUBTEST( test_complex<long double>(3*8) );
-  CALL_SUBTEST( test_complex<long double>(5*32) );
-  CALL_SUBTEST( test_complex<long double>(2*3*4) );
-  CALL_SUBTEST( test_complex<long double>(2*3*4*5) );
-  CALL_SUBTEST( test_complex<long double>(2*3*4*5*7) );
-  
-  CALL_SUBTEST( test_scalar<long double>(32) );
-  CALL_SUBTEST( test_scalar<long double>(45) );
-  CALL_SUBTEST( test_scalar<long double>(50) );
-  CALL_SUBTEST( test_scalar<long double>(256) );
-  CALL_SUBTEST( test_scalar<long double>(2*3*4*5*7) );
-  #endif
-}
+#define EIGEN_FFTW_DEFAULT 1 
+#include "fft_test_shared.h"
--- a/libs/eigen/unsupported/test/NNLS.cpp
+++ b/libs/eigen/unsupported/test/NNLS.cpp
@@ -0,0 +1,472 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) Essex Edwards <essex.edwards@gmail.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_RUNTIME_NO_MALLOC
+
+#include "main.h"
+#include <unsupported/Eigen/NNLS>
+
+/// Check that 'x' solves the NNLS optimization problem `min ||A*x-b|| s.t. 0 <= x`.
+/// The \p tolerance parameter is the absolute tolerance on the gradient, A'*(A*x-b).
+template <typename MatrixType, typename VectorB, typename VectorX, typename Scalar>
+static void verify_nnls_optimality(const MatrixType &A, const VectorB &b, const VectorX &x, const Scalar tolerance) {
+  // The NNLS optimality conditions are:
+  //
+  // * 0 = A'*A*x - A'*b - lambda
+  // * 0 <= x[i] \forall i
+  // * 0 <= lambda[i] \forall i
+  // * 0 = x[i]*lambda[i] \forall i
+  //
+  // we don't know lambda, but by assuming the first optimality condition is true,
+  // we can derive it and then check the others conditions.
+  const VectorX lambda = A.transpose() * (A * x - b);
+
+  // NNLS solutions are EXACTLY not negative.
+  VERIFY_LE(0, x.minCoeff());
+
+  // Exact lambda would be non-negative, but computed lambda might leak a little
+  VERIFY_LE(-tolerance, lambda.minCoeff());
+
+  // x[i]*lambda[i] == 0 <~~> (x[i]==0) || (lambda[i] is small)
+  VERIFY(((x.array() == Scalar(0)) || (lambda.array() <= tolerance)).all());
+}
+
+template <typename MatrixType, typename VectorB, typename VectorX>
+static void test_nnls_known_solution(const MatrixType &A, const VectorB &b, const VectorX &x_expected) {
+  using Scalar = typename MatrixType::Scalar;
+
+  using std::sqrt;
+  const Scalar tolerance = sqrt(Eigen::GenericNumTraits<Scalar>::epsilon());
+  Index max_iter = 5 * A.cols();  // A heuristic guess.
+  NNLS<MatrixType> nnls(A, max_iter, tolerance);
+  const VectorX x = nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_IS_APPROX(x, x_expected);
+  verify_nnls_optimality(A, b, x, tolerance);
+}
+
+template <typename MatrixType>
+static void test_nnls_random_problem() {
+  //
+  // SETUP
+  //
+
+  Index cols = MatrixType::ColsAtCompileTime;
+  if (cols == Dynamic) cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  Index rows = MatrixType::RowsAtCompileTime;
+  if (rows == Dynamic) rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  VERIFY_LE(cols, rows);  // To have a unique LS solution: cols <= rows.
+
+  // Make some sort of random test problem from a wide range of scales and condition numbers.
+  using std::pow;
+  using Scalar = typename MatrixType::Scalar;
+  const Scalar sqrtConditionNumber = pow(Scalar(10), internal::random<Scalar>(Scalar(0), Scalar(2)));
+  const Scalar scaleA = pow(Scalar(10), internal::random<Scalar>(Scalar(-3), Scalar(3)));
+  const Scalar minSingularValue = scaleA / sqrtConditionNumber;
+  const Scalar maxSingularValue = scaleA * sqrtConditionNumber;
+  MatrixType A(rows, cols);
+  generateRandomMatrixSvs(setupRangeSvs<Matrix<Scalar, Dynamic, 1>>(cols, minSingularValue, maxSingularValue), rows,
+                          cols, A);
+
+  // Make a random RHS also with a random scaling.
+  using VectorB = decltype(A.col(0).eval());
+  const Scalar scaleB = pow(Scalar(10), internal::random<Scalar>(Scalar(-3), Scalar(3)));
+  const VectorB b = scaleB * VectorB::Random(A.rows());
+
+  //
+  // ACT
+  //
+
+  using Scalar = typename MatrixType::Scalar;
+  using std::sqrt;
+  const Scalar tolerance =
+      sqrt(Eigen::GenericNumTraits<Scalar>::epsilon()) * b.cwiseAbs().maxCoeff() * A.cwiseAbs().maxCoeff();
+  Index max_iter = 5 * A.cols();  // A heuristic guess.
+  NNLS<MatrixType> nnls(A, max_iter, tolerance);
+  const typename NNLS<MatrixType>::SolutionVectorType &x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+
+  // In fact, NNLS can fail on some problems, but they are rare in practice.
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  verify_nnls_optimality(A, b, x, tolerance);
+}
+
+static void test_nnls_handles_zero_rhs() {
+  //
+  // SETUP
+  //
+  const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+  const VectorXd b = VectorXd::Zero(rows);
+
+  //
+  // ACT
+  //
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_LE(nnls.iterations(), 1);  // 0 or 1 would be be fine for an edge case like this.
+  VERIFY_IS_EQUAL(x, VectorXd::Zero(cols));
+}
+
+static void test_nnls_handles_Mx0_matrix() {
+  //
+  // SETUP
+  //
+  const Index rows = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A(rows, 0);
+  const VectorXd b = VectorXd::Random(rows);
+
+  //
+  // ACT
+  //
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_LE(nnls.iterations(), 0);
+  VERIFY_IS_EQUAL(x.size(), 0);
+}
+
+static void test_nnls_handles_0x0_matrix() {
+  //
+  // SETUP
+  //
+  const MatrixXd A(0, 0);
+  const VectorXd b(0);
+
+  //
+  // ACT
+  //
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY_LE(nnls.iterations(), 0);
+  VERIFY_IS_EQUAL(x.size(), 0);
+}
+
+static void test_nnls_handles_dependent_columns() {
+  //
+  // SETUP
+  //
+  const Index rank = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE / 2);
+  const Index cols = 2 * rank;
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, rank) * MatrixXd::Random(rank, cols);
+  const VectorXd b = VectorXd::Random(rows);
+
+  //
+  // ACT
+  //
+  const double tolerance = 1e-8;
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd &x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  // What should happen when the input 'A' has dependent columns?
+  // We might still succeed. Or we might not converge.
+  // Either outcome is fine. If Success is indicated,
+  // then 'x' must actually be a solution vector.
+
+  if (nnls.info() == ComputationInfo::Success) {
+    verify_nnls_optimality(A, b, x, tolerance);
+  }
+}
+
+static void test_nnls_handles_wide_matrix() {
+  //
+  // SETUP
+  //
+  const Index cols = internal::random<Index>(2, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(2, cols - 1);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+  const VectorXd b = VectorXd::Random(rows);
+
+  //
+  // ACT
+  //
+  const double tolerance = 1e-8;
+  NNLS<MatrixXd> nnls(A);
+  const VectorXd &x = nnls.solve(b);
+
+  //
+  // VERIFY
+  //
+  // What should happen when the input 'A' is wide?
+  // The unconstrained least-squares problem has infinitely many solutions.
+  // Subject the the non-negativity constraints,
+  // the solution might actually be unique (e.g. it is [0,0,..,0]).
+  // So, NNLS might succeed or it might fail.
+  // Either outcome is fine. If Success is indicated,
+  // then 'x' must actually be a solution vector.
+
+  if (nnls.info() == ComputationInfo::Success) {
+    verify_nnls_optimality(A, b, x, tolerance);
+  }
+}
+
+// 4x2 problem, unconstrained solution positive
+static void test_nnls_known_1() {
+  Matrix<double, 4, 2> A(4, 2);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 2, 1> x(2);
+  A << 1, 1, 2, 4, 3, 9, 4, 16;
+  b << 0.6, 2.2, 4.8, 8.4;
+  x << 0.1, 0.5;
+
+  return test_nnls_known_solution(A, b, x);
+}
+
+// 4x3 problem, unconstrained solution positive
+static void test_nnls_known_2() {
+  Matrix<double, 4, 3> A(4, 3);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 3, 1> x(3);
+
+  A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
+  b << 0.73, 3.24, 8.31, 16.72;
+  x << 0.1, 0.5, 0.13;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+// Simple 4x4 problem, unconstrained solution non-negative
+static void test_nnls_known_3() {
+  Matrix<double, 4, 4> A(4, 4);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 4, 1> x(4);
+
+  A << 1, 1, 1, 1, 2, 4, 8, 16, 3, 9, 27, 81, 4, 16, 64, 256;
+  b << 0.73, 3.24, 8.31, 16.72;
+  x << 0.1, 0.5, 0.13, 0;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+// Simple 4x3 problem, unconstrained solution non-negative
+static void test_nnls_known_4() {
+  Matrix<double, 4, 3> A(4, 3);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 3, 1> x(3);
+
+  A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
+  b << 0.23, 1.24, 3.81, 8.72;
+  x << 0.1, 0, 0.13;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+// Simple 4x3 problem, unconstrained solution indefinite
+static void test_nnls_known_5() {
+  Matrix<double, 4, 3> A(4, 3);
+  Matrix<double, 4, 1> b(4);
+  Matrix<double, 3, 1> x(3);
+
+  A << 1, 1, 1, 2, 4, 8, 3, 9, 27, 4, 16, 64;
+  b << 0.13, 0.84, 2.91, 7.12;
+  // Solution obtained by original nnls() implementation in Fortran
+  x << 0.0, 0.0, 0.1106544;
+
+  test_nnls_known_solution(A, b, x);
+}
+
+static void test_nnls_small_reference_problems() {
+  test_nnls_known_1();
+  test_nnls_known_2();
+  test_nnls_known_3();
+  test_nnls_known_4();
+  test_nnls_known_5();
+}
+
+static void test_nnls_with_half_precision() {
+  // The random matrix generation tools don't work with `half`,
+  // so here's a simpler setup mostly just to check that NNLS compiles & runs with custom scalar types.
+
+  using Mat = Matrix<half, 8, 2>;
+  using VecB = Matrix<half, 8, 1>;
+  using VecX = Matrix<half, 2, 1>;
+  Mat A = Mat::Random();  // full-column rank with high probability.
+  VecB b = VecB::Random();
+
+  NNLS<Mat> nnls(A, 20, half(1e-2f));
+  const VecX x = nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  verify_nnls_optimality(A, b, x, half(1e-1));
+}
+
+static void test_nnls_special_case_solves_in_zero_iterations() {
+  // The particular NNLS algorithm that is implemented starts with all variables
+  // in the active set.
+  // This test builds a system where all constraints are active at the solution,
+  // so that initial guess is already correct.
+  //
+  // If the implementation changes to another algorithm that does not have this property,
+  // then this test will need to change (e.g. starting from all constraints inactive,
+  // or using ADMM, or an interior point solver).
+
+  const Index n = 10;
+  const Index m = 3 * n;
+  const VectorXd b = VectorXd::Random(m);
+  // With high probability, this is full column rank, which we need for uniqueness.
+  MatrixXd A = MatrixXd::Random(m, n);
+  // Make every column of `A` such that adding it to the active set only /increases/ the objective,
+  // this ensuring the NNLS solution is all zeros.
+  const VectorXd alignment = -(A.transpose() * b).cwiseSign();
+  A = A * alignment.asDiagonal();
+
+  NNLS<MatrixXd> nnls(A);
+  nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY(nnls.iterations() == 0);
+}
+
+static void test_nnls_special_case_solves_in_n_iterations() {
+  // The particular NNLS algorithm that is implemented starts with all variables
+  // in the active set and then adds one variable to the inactive set each iteration.
+  // This test builds a system where all variables are inactive at the solution,
+  // so it should take 'n' iterations to get there.
+  //
+  // If the implementation changes to another algorithm that does not have this property,
+  // then this test will need to change (e.g. starting from all constraints inactive,
+  // or using ADMM, or an interior point solver).
+
+  const Index n = 10;
+  const Index m = 3 * n;
+  // With high probability, this is full column rank, which we need for uniqueness.
+  const MatrixXd A = MatrixXd::Random(m, n);
+  const VectorXd x = VectorXd::Random(n).cwiseAbs().array() + 1;  // all positive.
+  const VectorXd b = A * x;
+
+  NNLS<MatrixXd> nnls(A);
+  nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+  VERIFY(nnls.iterations() == n);
+}
+
+static void test_nnls_returns_NoConvergence_when_maxIterations_is_too_low() {
+  // Using the special case that takes `n` iterations,
+  // from `test_nnls_special_case_solves_in_n_iterations`,
+  // we can set max iterations too low and that should cause the solve to fail.
+
+  const Index n = 10;
+  const Index m = 3 * n;
+  // With high probability, this is full column rank, which we need for uniqueness.
+  const MatrixXd A = MatrixXd::Random(m, n);
+  const VectorXd x = VectorXd::Random(n).cwiseAbs().array() + 1;  // all positive.
+  const VectorXd b = A * x;
+
+  NNLS<MatrixXd> nnls(A);
+  const Index max_iters = n - 1;
+  nnls.setMaxIterations(max_iters);
+  nnls.solve(b);
+
+  VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::NoConvergence);
+  VERIFY(nnls.iterations() == max_iters);
+}
+
+static void test_nnls_default_maxIterations_is_twice_column_count() {
+  const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+
+  NNLS<MatrixXd> nnls(A);
+
+  VERIFY_IS_EQUAL(nnls.maxIterations(), 2 * cols);
+}
+
+static void test_nnls_does_not_allocate_during_solve() {
+  const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A = MatrixXd::Random(rows, cols);
+  const VectorXd b = VectorXd::Random(rows);
+
+  NNLS<MatrixXd> nnls(A);
+
+  internal::set_is_malloc_allowed(false);
+  nnls.solve(b);
+  internal::set_is_malloc_allowed(true);
+}
+
+static void test_nnls_repeated_calls_to_compute_and_solve() {
+  const Index cols2 = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+  const Index rows2 = internal::random<Index>(cols2, EIGEN_TEST_MAX_SIZE);
+  const MatrixXd A2 = MatrixXd::Random(rows2, cols2);
+  const VectorXd b2 = VectorXd::Random(rows2);
+
+  NNLS<MatrixXd> nnls;
+
+  for (int i = 0; i < 4; ++i) {
+    const Index cols = internal::random<Index>(1, EIGEN_TEST_MAX_SIZE);
+    const Index rows = internal::random<Index>(cols, EIGEN_TEST_MAX_SIZE);
+    const MatrixXd A = MatrixXd::Random(rows, cols);
+
+    nnls.compute(A);
+    VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+
+    for (int j = 0; j < 3; ++j) {
+      const VectorXd b = VectorXd::Random(rows);
+      const VectorXd x = nnls.solve(b);
+      VERIFY_IS_EQUAL(nnls.info(), ComputationInfo::Success);
+      verify_nnls_optimality(A, b, x, 1e-4);
+    }
+  }
+}
+
+EIGEN_DECLARE_TEST(NNLS) {
+  // Small matrices with known solutions:
+  CALL_SUBTEST_1(test_nnls_small_reference_problems());
+  CALL_SUBTEST_1(test_nnls_handles_Mx0_matrix());
+  CALL_SUBTEST_1(test_nnls_handles_0x0_matrix());
+
+  for (int i = 0; i < g_repeat; i++) {
+    // Essential NNLS properties, across different types.
+    CALL_SUBTEST_2(test_nnls_random_problem<MatrixXf>());
+    CALL_SUBTEST_3(test_nnls_random_problem<MatrixXd>());
+    using MatFixed = Matrix<double, 12, 5>;
+    CALL_SUBTEST_4(test_nnls_random_problem<MatFixed>());
+    CALL_SUBTEST_5(test_nnls_with_half_precision());
+
+    // Robustness tests:
+    CALL_SUBTEST_6(test_nnls_handles_zero_rhs());
+    CALL_SUBTEST_6(test_nnls_handles_dependent_columns());
+    CALL_SUBTEST_6(test_nnls_handles_wide_matrix());
+
+    // Properties specific to the implementation,
+    // not NNLS in general.
+    CALL_SUBTEST_7(test_nnls_special_case_solves_in_zero_iterations());
+    CALL_SUBTEST_7(test_nnls_special_case_solves_in_n_iterations());
+    CALL_SUBTEST_7(test_nnls_returns_NoConvergence_when_maxIterations_is_too_low());
+    CALL_SUBTEST_7(test_nnls_default_maxIterations_is_twice_column_count());
+    CALL_SUBTEST_8(test_nnls_repeated_calls_to_compute_and_solve());
+
+    // This test fails. It hits allocations in HouseholderSequence.h
+    // test_nnls_does_not_allocate_during_solve();
+  }
+}
--- a/libs/eigen/unsupported/test/NonLinearOptimization.cpp
+++ b/libs/eigen/unsupported/test/NonLinearOptimization.cpp
@@ -12,14 +12,10 @@
 // It is intended to be done for this test only.
 #include <Eigen/src/Core/util/DisableStupidWarnings.h>

-// tolerance for chekcing number of iterations
-#define LM_EVAL_COUNT_TOL 4/3
+// tolerance for checking number of iterations
+#define LM_EVAL_COUNT_TOL 2

 #define LM_CHECK_N_ITERS(SOLVER,NFEV,NJEV) { \
-            ++g_test_level; \
-            VERIFY_IS_EQUAL(SOLVER.nfev, NFEV); \
-            VERIFY_IS_EQUAL(SOLVER.njev, NJEV); \
-            --g_test_level; \
            VERIFY(SOLVER.nfev <= NFEV * LM_EVAL_COUNT_TOL); \
            VERIFY(SOLVER.njev <= NJEV * LM_EVAL_COUNT_TOL); \
        }
@@ -113,10 +109,10 @@ void testChkder()
 }

 // Generic functor
-template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
+template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
 struct Functor
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
  enum {
    InputsAtCompileTime = NX,
    ValuesAtCompileTime = NY
@@ -186,9 +182,10 @@ void testLmder1()
  lmder_functor functor;
  LevenbergMarquardt<lmder_functor> lm(functor);
  info = lm.lmder1(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 6, 5);

  // check norm
@@ -214,9 +211,10 @@ void testLmder()
  lmder_functor functor;
  LevenbergMarquardt<lmder_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return values
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 6, 5);

  // check norm
@@ -298,9 +296,10 @@ void testHybrj1()
  hybrj_functor functor;
  HybridNonLinearSolver<hybrj_functor> solver(functor);
  info = solver.hybrj1(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(solver, 11, 1);

  // check norm
@@ -332,9 +331,10 @@ void testHybrj()
  solver.diag.setConstant(n, 1.);
  solver.useExternalScaling = true;
  info = solver.solve(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(solver, 11, 1);

  // check norm
@@ -385,10 +385,11 @@ void testHybrd1()
  hybrd_functor functor;
  HybridNonLinearSolver<hybrd_functor> solver(functor);
  info = solver.hybrd1(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(solver.nfev, 20);
+  // VERIFY_IS_EQUAL(info, 1);
+  VERIFY(solver.nfev <= 20*LM_EVAL_COUNT_TOL);

  // check norm
  VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
@@ -416,10 +417,11 @@ void testHybrd()
  solver.diag.setConstant(n, 1.);
  solver.useExternalScaling = true;
  info = solver.solveNumericalDiff(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(solver.nfev, 14);
+  // VERIFY_IS_EQUAL(info, 1);
+  VERIFY(solver.nfev <= 14*LM_EVAL_COUNT_TOL);

  // check norm
  VERIFY_IS_APPROX(solver.fvec.blueNorm(), 1.192636e-08);
@@ -487,9 +489,10 @@ void testLmstr1()
  lmstr_functor functor;
  LevenbergMarquardt<lmstr_functor> lm(functor);
  info = lm.lmstr1(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 6, 5);

  // check norm
@@ -515,9 +518,10 @@ void testLmstr()
  lmstr_functor functor;
  LevenbergMarquardt<lmstr_functor> lm(functor);
  info = lm.minimizeOptimumStorage(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return values
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 6, 5);

  // check norm
@@ -570,10 +574,11 @@ void testLmdif1()
  lmdif_functor functor;
  DenseIndex nfev = -1; // initialize to avoid maybe-uninitialized warning
  info = LevenbergMarquardt<lmdif_functor>::lmdif1(functor, x, &nfev);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(nfev, 26);
+  // VERIFY_IS_EQUAL(info, 1);
+  VERIFY( nfev <= 26*LM_EVAL_COUNT_TOL);

  // check norm
  functor(x, fvec);
@@ -601,10 +606,11 @@ void testLmdif()
  NumericalDiff<lmdif_functor> numDiff(functor);
  LevenbergMarquardt<NumericalDiff<lmdif_functor> > lm(numDiff);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return values
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev, 26);
+  // VERIFY_IS_EQUAL(info, 1);
+  VERIFY(lm.nfev <= 26*LM_EVAL_COUNT_TOL);

  // check norm
  fnorm = lm.fvec.blueNorm();
@@ -686,9 +692,10 @@ void testNistChwirut2(void)
  chwirut2_functor functor;
  LevenbergMarquardt<chwirut2_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 10, 8);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02);
@@ -706,9 +713,10 @@ void testNistChwirut2(void)
  lm.parameters.ftol = 1.E6*NumTraits<double>::epsilon();
  lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 7, 6);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.1304802941E+02);
@@ -764,9 +772,10 @@ void testNistMisra1a(void)
  misra1a_functor functor;
  LevenbergMarquardt<misra1a_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 19, 15);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01);
@@ -780,9 +789,10 @@ void testNistMisra1a(void)
  x<< 250., 0.0005;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 5, 4);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.2455138894E-01);
@@ -852,9 +862,10 @@ void testNistHahn1(void)
  hahn1_functor functor;
  LevenbergMarquardt<hahn1_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 11, 10);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00);
@@ -873,9 +884,10 @@ void testNistHahn1(void)
  x<< .1, -.1, .005, -.000001, -.005, .0001, -.0000001;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 11, 10);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.5324382854E+00);
@@ -936,9 +948,10 @@ void testNistMisra1d(void)
  misra1d_functor functor;
  LevenbergMarquardt<misra1d_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 3);
+  // VERIFY_IS_EQUAL(info, 3);
  LM_CHECK_N_ITERS(lm, 9, 7);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02);
@@ -952,9 +965,10 @@ void testNistMisra1d(void)
  x<< 450., 0.0003;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 4, 3);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6419295283E-02);
@@ -1012,13 +1026,14 @@ void testNistLanczos1(void)
  lanczos1_functor functor;
  LevenbergMarquardt<lanczos1_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 2);
+  // VERIFY_IS_EQUAL(info, 2);
  LM_CHECK_N_ITERS(lm, 79, 72);
  // check norm^2
-  std::cout.precision(30);
-  std::cout << lm.fvec.squaredNorm() << "\n";
+  // std::cout.precision(30);
+  // std::cout << lm.fvec.squaredNorm() << "\n";
  VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
  // check x
  VERIFY_IS_APPROX(x[0], 9.5100000027E-02);
@@ -1034,9 +1049,10 @@ void testNistLanczos1(void)
  x<< 0.5, 0.7, 3.6, 4.2, 4., 6.3;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 2);
+  // VERIFY_IS_EQUAL(info, 2);
  LM_CHECK_N_ITERS(lm, 9, 8);
  // check norm^2
  VERIFY(lm.fvec.squaredNorm() <= 1.4307867721E-25);
@@ -1098,9 +1114,10 @@ void testNistRat42(void)
  rat42_functor functor;
  LevenbergMarquardt<rat42_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 10, 8);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00);
@@ -1115,9 +1132,10 @@ void testNistRat42(void)
  x<< 75., 2.5, 0.07;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 6, 5);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.0565229338E+00);
@@ -1174,9 +1192,10 @@ void testNistMGH10(void)
  MGH10_functor functor;
  LevenbergMarquardt<MGH10_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 2); 
+  // VERIFY_IS_EQUAL(info, 2); 
  LM_CHECK_N_ITERS(lm, 284, 249); 
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01);
@@ -1191,9 +1210,10 @@ void testNistMGH10(void)
  x<< 0.02, 4000., 250.;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 3);
+  // VERIFY_IS_EQUAL(info, 3);
  LM_CHECK_N_ITERS(lm, 126, 116);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7945855171E+01);
@@ -1251,9 +1271,10 @@ void testNistBoxBOD(void)
  lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
  lm.parameters.factor = 10.;
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 31, 25);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
@@ -1270,10 +1291,11 @@ void testNistBoxBOD(void)
  lm.parameters.ftol = NumTraits<double>::epsilon();
  lm.parameters.xtol = NumTraits<double>::epsilon();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  LM_CHECK_N_ITERS(lm, 15, 14);
+  // VERIFY_IS_EQUAL(info, 1);
+  LM_CHECK_N_ITERS(lm, 20, 14);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.1680088766E+03);
  // check x
@@ -1331,6 +1353,7 @@ void testNistMGH17(void)
  lm.parameters.xtol = NumTraits<double>::epsilon();
  lm.parameters.maxfev = 1000;
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
@@ -1342,7 +1365,7 @@ void testNistMGH17(void)
  VERIFY_IS_APPROX(x[4], 2.2122699662E-02);
  
  // check return value
-  VERIFY_IS_EQUAL(info, 2); 
+  // VERIFY_IS_EQUAL(info, 2); 
  LM_CHECK_N_ITERS(lm, 602, 545);

  /*
@@ -1352,9 +1375,10 @@ void testNistMGH17(void)
  // do the computation
  lm.resetParameters();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 18, 15);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.4648946975E-05);
@@ -1417,9 +1441,10 @@ void testNistMGH09(void)
  LevenbergMarquardt<MGH09_functor> lm(functor);
  lm.parameters.maxfev = 1000;
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 490, 376);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04);
@@ -1436,9 +1461,10 @@ void testNistMGH09(void)
  // do the computation
  lm.resetParameters();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 18, 16);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 3.0750560385E-04);
@@ -1501,9 +1527,10 @@ void testNistBennett5(void)
  LevenbergMarquardt<Bennett5_functor> lm(functor);
  lm.parameters.maxfev = 1000;
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 758, 744);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04);
@@ -1518,9 +1545,10 @@ void testNistBennett5(void)
  // do the computation
  lm.resetParameters();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 203, 192);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.2404744073E-04);
@@ -1587,9 +1615,10 @@ void testNistThurber(void)
  lm.parameters.ftol = 1.E4*NumTraits<double>::epsilon();
  lm.parameters.xtol = 1.E4*NumTraits<double>::epsilon();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 39,36);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03);
@@ -1611,9 +1640,10 @@ void testNistThurber(void)
  lm.parameters.ftol = 1.E4*NumTraits<double>::epsilon();
  lm.parameters.xtol = 1.E4*NumTraits<double>::epsilon();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 29, 28);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 5.6427082397E+03);
@@ -1677,9 +1707,10 @@ void testNistRat43(void)
  lm.parameters.ftol = 1.E6*NumTraits<double>::epsilon();
  lm.parameters.xtol = 1.E6*NumTraits<double>::epsilon();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 27, 20);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03);
@@ -1698,9 +1729,10 @@ void testNistRat43(void)
  lm.parameters.ftol = 1.E5*NumTraits<double>::epsilon();
  lm.parameters.xtol = 1.E5*NumTraits<double>::epsilon();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 9, 8);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 8.7864049080E+03);
@@ -1760,9 +1792,10 @@ void testNistEckerle4(void)
  eckerle4_functor functor;
  LevenbergMarquardt<eckerle4_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 18, 15);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03);
@@ -1777,9 +1810,10 @@ void testNistEckerle4(void)
  x<< 1.5, 5., 450.;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
  LM_CHECK_N_ITERS(lm, 7, 6);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec.squaredNorm(), 1.4635887487E-03);
--- a/libs/eigen/unsupported/test/NumericalDiff.cpp
+++ b/libs/eigen/unsupported/test/NumericalDiff.cpp
@@ -9,10 +9,10 @@
 #include <unsupported/Eigen/NumericalDiff>
    
 // Generic functor
-template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
+template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
 struct Functor
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
  enum {
    InputsAtCompileTime = NX,
    ValuesAtCompileTime = NY
--- a/libs/eigen/unsupported/test/alignedvector3.cpp
+++ b/libs/eigen/unsupported/test/alignedvector3.cpp
@@ -7,6 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

+#define EIGEN_NO_STATIC_ASSERT
+
 #include "main.h"
 #include <unsupported/Eigen/AlignedVector3>

--- a/libs/eigen/unsupported/test/autodiff.cpp
+++ b/libs/eigen/unsupported/test/autodiff.cpp
@@ -29,10 +29,10 @@ EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p)
  return (p-Vector(Scalar(-1),Scalar(1.))).norm() + (p.array() * p.array()).sum() + p.dot(p);
 }

-template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
+template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
 struct TestFunc1
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
  enum {
    InputsAtCompileTime = NX,
    ValuesAtCompileTime = NY
@@ -106,7 +106,6 @@ struct TestFunc1
 };


-#if EIGEN_HAS_VARIADIC_TEMPLATES
 /* Test functor for the C++11 features. */
 template <typename Scalar>
 struct integratorFunctor
@@ -186,7 +185,6 @@ template<typename Func> void forward_jacobian_cpp11(const Func& f)
    VERIFY_IS_APPROX(y, yref);
    VERIFY_IS_APPROX(j, jref);
 }
-#endif

 template<typename Func> void forward_jacobian(const Func& f)
 {
@@ -247,9 +245,7 @@ void test_autodiff_jacobian()
  CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,2>()) ));
  CALL_SUBTEST(( forward_jacobian(TestFunc1<double,3,3>()) ));
  CALL_SUBTEST(( forward_jacobian(TestFunc1<double>(3,3)) ));
-#if EIGEN_HAS_VARIADIC_TEMPLATES
  CALL_SUBTEST(( forward_jacobian_cpp11(integratorFunctor<double>(10)) ));
-#endif
 }


--- a/libs/eigen/unsupported/test/autodiff_scalar.cpp
+++ b/libs/eigen/unsupported/test/autodiff_scalar.cpp
@@ -84,9 +84,7 @@ void check_limits_specialization()
  // workaround "unused typedef" warning:
  VERIFY(!bool(internal::is_same<B, A>::value));

-#if EIGEN_HAS_CXX11
  VERIFY(bool(std::is_base_of<B, A>::value));
-#endif
 }

 EIGEN_DECLARE_TEST(autodiff_scalar)
--- a/libs/eigen/unsupported/test/bicgstabl.cpp
+++ b/libs/eigen/unsupported/test/bicgstabl.cpp
@@ -0,0 +1,31 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
+// Copyright (C) 2012 Kolja Brix <brix@igpm.rwth-aaachen.de>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "../../test/sparse_solver.h"
+#include <Eigen/IterativeSolvers>
+
+template<typename T> void test_bicgstabl_T()
+{
+  BiCGSTABL<SparseMatrix<T>, DiagonalPreconditioner<T> > bicgstabl_colmajor_diag;
+  BiCGSTABL<SparseMatrix<T>, IncompleteLUT<T> >           bicgstabl_colmajor_ilut;
+
+  //This does not change the tolerance of the test, only the tolerance of the solver.
+  bicgstabl_colmajor_diag.setTolerance(NumTraits<T>::epsilon()*20);
+  bicgstabl_colmajor_ilut.setTolerance(NumTraits<T>::epsilon()*20);
+
+  CALL_SUBTEST( check_sparse_square_solving(bicgstabl_colmajor_diag)  );
+  CALL_SUBTEST( check_sparse_square_solving(bicgstabl_colmajor_ilut)     );
+}
+
+EIGEN_DECLARE_TEST(bicgstabl)
+{
+  CALL_SUBTEST_1(test_bicgstabl_T<double>());
+  CALL_SUBTEST_2(test_bicgstabl_T<std::complex<double> >());
+}
--- a/libs/eigen/unsupported/test/cxx11_tensor_argmax.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_argmax.cpp
@@ -14,57 +14,57 @@

 using Eigen::Tensor;
 using Eigen::array;
-using Eigen::Tuple;
+using Eigen::Pair;

 template <int DataLayout>
-static void test_simple_index_tuples()
+static void test_simple_index_pairs()
 {
  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
  tensor.setRandom();
  tensor = (tensor + tensor.constant(0.5)).log();

-  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
-  index_tuples = tensor.index_tuples();
+  Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
+  index_pairs = tensor.index_pairs();

  for (DenseIndex n = 0; n < 2*3*5*7; ++n) {
-    const Tuple<DenseIndex, float>& v = index_tuples.coeff(n);
+    const Pair<DenseIndex, float>& v = index_pairs.coeff(n);
    VERIFY_IS_EQUAL(v.first, n);
    VERIFY_IS_EQUAL(v.second, tensor.coeff(n));
  }
 }

 template <int DataLayout>
-static void test_index_tuples_dim()
+static void test_index_pairs_dim()
 {
  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
  tensor.setRandom();
  tensor = (tensor + tensor.constant(0.5)).log();

-  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
+  Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);

-  index_tuples = tensor.index_tuples();
+  index_pairs = tensor.index_pairs();

  for (Eigen::DenseIndex n = 0; n < tensor.size(); ++n) {
-    const Tuple<DenseIndex, float>& v = index_tuples(n); //(i, j, k, l);
+    const Pair<DenseIndex, float>& v = index_pairs(n); //(i, j, k, l);
    VERIFY_IS_EQUAL(v.first, n);
    VERIFY_IS_EQUAL(v.second, tensor(n));
  }
 }

 template <int DataLayout>
-static void test_argmax_tuple_reducer()
+static void test_argmax_pair_reducer()
 {
  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
  tensor.setRandom();
  tensor = (tensor + tensor.constant(0.5)).log();

-  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
-  index_tuples = tensor.index_tuples();
+  Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
+  index_pairs = tensor.index_pairs();

-  Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
+  Tensor<Pair<DenseIndex, float>, 0, DataLayout> reduced;
  DimensionList<DenseIndex, 4> dims;
-  reduced = index_tuples.reduce(
-      dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
+  reduced = index_pairs.reduce(
+      dims, internal::ArgMaxPairReducer<Pair<DenseIndex, float> >());

  Tensor<float, 0, DataLayout> maxi = tensor.maximum();

@@ -72,9 +72,9 @@ static void test_argmax_tuple_reducer()

  array<DenseIndex, 3> reduce_dims;
  for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
-  Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
-  reduced_by_dims = index_tuples.reduce(
-      reduce_dims, internal::ArgMaxTupleReducer<Tuple<DenseIndex, float> >());
+  Tensor<Pair<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
+  reduced_by_dims = index_pairs.reduce(
+      reduce_dims, internal::ArgMaxPairReducer<Pair<DenseIndex, float> >());

  Tensor<float, 1, DataLayout> max_by_dims = tensor.maximum(reduce_dims);

@@ -84,19 +84,19 @@ static void test_argmax_tuple_reducer()
 }

 template <int DataLayout>
-static void test_argmin_tuple_reducer()
+static void test_argmin_pair_reducer()
 {
  Tensor<float, 4, DataLayout> tensor(2,3,5,7);
  tensor.setRandom();
  tensor = (tensor + tensor.constant(0.5)).log();

-  Tensor<Tuple<DenseIndex, float>, 4, DataLayout> index_tuples(2,3,5,7);
-  index_tuples = tensor.index_tuples();
+  Tensor<Pair<DenseIndex, float>, 4, DataLayout> index_pairs(2,3,5,7);
+  index_pairs = tensor.index_pairs();

-  Tensor<Tuple<DenseIndex, float>, 0, DataLayout> reduced;
+  Tensor<Pair<DenseIndex, float>, 0, DataLayout> reduced;
  DimensionList<DenseIndex, 4> dims;
-  reduced = index_tuples.reduce(
-      dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
+  reduced = index_pairs.reduce(
+      dims, internal::ArgMinPairReducer<Pair<DenseIndex, float> >());

  Tensor<float, 0, DataLayout> mini = tensor.minimum();

@@ -104,9 +104,9 @@ static void test_argmin_tuple_reducer()

  array<DenseIndex, 3> reduce_dims;
  for (int d = 0; d < 3; ++d) reduce_dims[d] = d;
-  Tensor<Tuple<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
-  reduced_by_dims = index_tuples.reduce(
-      reduce_dims, internal::ArgMinTupleReducer<Tuple<DenseIndex, float> >());
+  Tensor<Pair<DenseIndex, float>, 1, DataLayout> reduced_by_dims(7);
+  reduced_by_dims = index_pairs.reduce(
+      reduce_dims, internal::ArgMinPairReducer<Pair<DenseIndex, float> >());

  Tensor<float, 1, DataLayout> min_by_dims = tensor.minimum(reduce_dims);

@@ -275,14 +275,14 @@ static void test_argmin_dim()

 EIGEN_DECLARE_TEST(cxx11_tensor_argmax)
 {
-  CALL_SUBTEST(test_simple_index_tuples<RowMajor>());
-  CALL_SUBTEST(test_simple_index_tuples<ColMajor>());
-  CALL_SUBTEST(test_index_tuples_dim<RowMajor>());
-  CALL_SUBTEST(test_index_tuples_dim<ColMajor>());
-  CALL_SUBTEST(test_argmax_tuple_reducer<RowMajor>());
-  CALL_SUBTEST(test_argmax_tuple_reducer<ColMajor>());
-  CALL_SUBTEST(test_argmin_tuple_reducer<RowMajor>());
-  CALL_SUBTEST(test_argmin_tuple_reducer<ColMajor>());
+  CALL_SUBTEST(test_simple_index_pairs<RowMajor>());
+  CALL_SUBTEST(test_simple_index_pairs<ColMajor>());
+  CALL_SUBTEST(test_index_pairs_dim<RowMajor>());
+  CALL_SUBTEST(test_index_pairs_dim<ColMajor>());
+  CALL_SUBTEST(test_argmax_pair_reducer<RowMajor>());
+  CALL_SUBTEST(test_argmax_pair_reducer<ColMajor>());
+  CALL_SUBTEST(test_argmin_pair_reducer<RowMajor>());
+  CALL_SUBTEST(test_argmin_pair_reducer<ColMajor>());
  CALL_SUBTEST(test_simple_argmax<RowMajor>());
  CALL_SUBTEST(test_simple_argmax<ColMajor>());
  CALL_SUBTEST(test_simple_argmin<RowMajor>());
--- a/libs/eigen/unsupported/test/cxx11_tensor_argmax_sycl.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_argmax_sycl.cpp
@@ -16,7 +16,6 @@

 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
 #define EIGEN_USE_SYCL
-#define EIGEN_HAS_CONSTEXPR 1

 #include "main.h"

--- a/libs/eigen/unsupported/test/cxx11_tensor_assign.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_assign.cpp
@@ -25,10 +25,8 @@ static void test_1d()
  vec1(4) = 23; vec2(4) = 4;
  vec1(5) = 42; vec2(5) = 5;

-  int col_major[6];
-  int row_major[6];
-  memset(col_major, 0, 6*sizeof(int));
-  memset(row_major, 0, 6*sizeof(int));
+  int col_major[6] = {0};
+  int row_major[6] = {0};
  TensorMap<Tensor<int, 1> > vec3(col_major, 6);
  TensorMap<Tensor<int, 1, RowMajor> > vec4(row_major, 6);

@@ -88,10 +86,8 @@ static void test_2d()
  mat2(1,1) = 4;
  mat2(1,2) = 5;

-  int col_major[6];
-  int row_major[6];
-  memset(col_major, 0, 6*sizeof(int));
-  memset(row_major, 0, 6*sizeof(int));
+  int col_major[6] = {0};
+  int row_major[6] = {0};
  TensorMap<Tensor<int, 2> > mat3(row_major, 2, 3);
  TensorMap<Tensor<int, 2, RowMajor> > mat4(col_major, 2, 3);

@@ -148,10 +144,8 @@ static void test_3d()
    }
  }

-  int col_major[2*3*7];
-  int row_major[2*3*7];
-  memset(col_major, 0, 2*3*7*sizeof(int));
-  memset(row_major, 0, 2*3*7*sizeof(int));
+  int col_major[2*3*7] = {0};
+  int row_major[2*3*7] = {0};
  TensorMap<Tensor<int, 3> > mat3(col_major, 2, 3, 7);
  TensorMap<Tensor<int, 3, RowMajor> > mat4(row_major, 2, 3, 7);

@@ -286,7 +280,6 @@ static void test_compound_assign()
 }

 static void test_std_initializers_tensor() {
-#if EIGEN_HAS_VARIADIC_TEMPLATES
  Tensor<int, 1> a(3);
  a.setValues({0, 1, 2});
  VERIFY_IS_EQUAL(a(0), 0);
@@ -355,7 +348,6 @@ static void test_std_initializers_tensor() {
  VERIFY_IS_EQUAL(c(2, 1, 1), 25);
  VERIFY_IS_EQUAL(c(2, 1, 2), 26);
  VERIFY_IS_EQUAL(c(2, 1, 3), 27);
-#endif  // EIGEN_HAS_VARIADIC_TEMPLATES
 }

 EIGEN_DECLARE_TEST(cxx11_tensor_assign)
--- a/libs/eigen/unsupported/test/cxx11_tensor_block_eval.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_block_eval.cpp
@@ -244,7 +244,7 @@ static void test_eval_tensor_binary_with_unary_expr_block() {
  rhs.setRandom();

  VerifyBlockEvaluator<T, NumDims, Layout>(
-      (lhs.square() + rhs.square()).sqrt(),
+      (lhs.abs() + rhs.abs()).sqrt(),
      [&dims]() { return RandomBlock<Layout>(dims, 1, 10); });
 }

--- a/libs/eigen/unsupported/test/cxx11_tensor_broadcasting.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_broadcasting.cpp
@@ -91,15 +91,7 @@ static void test_vectorized_broadcasting()
    }
  }

-#if EIGEN_HAS_VARIADIC_TEMPLATES
  tensor.resize(11,3,5);
-#else
-  array<Index, 3> new_dims;
-  new_dims[0] = 11;
-  new_dims[1] = 3;
-  new_dims[2] = 5;
-  tensor.resize(new_dims);
-#endif

  tensor.setRandom();
  broadcast = tensor.broadcast(broadcasts);
@@ -124,15 +116,7 @@ static void test_static_broadcasting()
  Tensor<float, 3, DataLayout> tensor(8,3,5);
  tensor.setRandom();

-#if defined(EIGEN_HAS_INDEX_LIST)
  Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>, Eigen::type2index<4>> broadcasts;
-#else
-  Eigen::array<int, 3> broadcasts;
-  broadcasts[0] = 2;
-  broadcasts[1] = 3;
-  broadcasts[2] = 4;
-#endif
-
  Tensor<float, 3, DataLayout> broadcast;
  broadcast = tensor.broadcast(broadcasts);

@@ -148,15 +132,7 @@ static void test_static_broadcasting()
    }
  }

-#if EIGEN_HAS_VARIADIC_TEMPLATES
  tensor.resize(11,3,5);
-#else
-  array<Index, 3> new_dims;
-  new_dims[0] = 11;
-  new_dims[1] = 3;
-  new_dims[2] = 5;
-  tensor.resize(new_dims);
-#endif

  tensor.setRandom();
  broadcast = tensor.broadcast(broadcasts);
@@ -256,6 +232,22 @@ static void test_simple_broadcasting_n_by_one()
  }
 }

+template <int DataLayout>
+static void test_size_one_broadcasting()
+{
+  Tensor<float, 1, DataLayout> tensor(1);
+  tensor.setRandom();
+  array<ptrdiff_t, 1> broadcasts = {64};
+  Tensor<float, 1, DataLayout> broadcast;
+  broadcast = tensor.broadcast(broadcasts);
+
+  VERIFY_IS_EQUAL(broadcast.dimension(0), broadcasts[0]);
+
+  for (int i = 0; i < broadcasts[0]; ++i) {
+    VERIFY_IS_EQUAL(tensor(0), broadcast(i));
+  }
+}
+
 template <int DataLayout>
 static void test_simple_broadcasting_one_by_n_by_one_1d()
 {
@@ -328,4 +320,6 @@ EIGEN_DECLARE_TEST(cxx11_tensor_broadcasting)
  CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<ColMajor>());
  CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_1d<RowMajor>());
  CALL_SUBTEST(test_simple_broadcasting_one_by_n_by_one_2d<RowMajor>());
+  CALL_SUBTEST(test_size_one_broadcasting<ColMajor>());
+  CALL_SUBTEST(test_size_one_broadcasting<RowMajor>());
 }
--- a/libs/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_builtins_sycl.cpp
@@ -38,24 +38,24 @@ template <typename T> T cwiseMin(T x, T y) { return cl::sycl::min(x, y); }
 }
 }

-struct EqualAssignement {
+struct EqualAssignment {
  template <typename Lhs, typename Rhs>
  void operator()(Lhs& lhs, const Rhs& rhs) { lhs = rhs; }
 };

-struct PlusEqualAssignement {
+struct PlusEqualAssignment {
  template <typename Lhs, typename Rhs>
  void operator()(Lhs& lhs, const Rhs& rhs) { lhs += rhs; }
 };

 template <typename DataType, int DataLayout,
-          typename Assignement, typename Operator>
+          typename Assignment, typename Operator>
 void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
                                    const array<int64_t, 3>& tensor_range) {
  Operator op;
-  Assignement asgn;
+  Assignment asgn;
  {
-    /* Assignement(out, Operator(in)) */
+    /* Assignment(out, Operator(in)) */
    Tensor<DataType, 3, DataLayout, int64_t> in(tensor_range);
    Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
    in = in.random() + DataType(0.01);
@@ -84,9 +84,10 @@ void test_unary_builtins_for_scalar(const Eigen::SyclDevice& sycl_device,
    sycl_device.deallocate(gpu_data_out);
  }
  {
-    /* Assignement(out, Operator(out)) */
+    /* Assignment(out, Operator(out)) */
    Tensor<DataType, 3, DataLayout, int64_t> out(tensor_range);
-    out = out.random() + DataType(0.01);
+    // Offset with 1 to avoid tiny output (< 1e-6) as they can easily fail.
+    out = out.random() + DataType(1);
    Tensor<DataType, 3, DataLayout, int64_t> reference(out);
    DataType *gpu_data_out = static_cast<DataType *>(
        sycl_device.allocate(out.size() * sizeof(DataType)));
@@ -137,11 +138,11 @@ DECLARE_UNARY_STRUCT(isnan)
 DECLARE_UNARY_STRUCT(isfinite)
 DECLARE_UNARY_STRUCT(isinf)

-template <typename DataType, int DataLayout, typename Assignement>
+template <typename DataType, int DataLayout, typename Assignment>
 void test_unary_builtins_for_assignement(const Eigen::SyclDevice& sycl_device,
                                         const array<int64_t, 3>& tensor_range) {
 #define RUN_UNARY_TEST(FUNC) \
-  test_unary_builtins_for_scalar<DataType, DataLayout, Assignement, \
+  test_unary_builtins_for_scalar<DataType, DataLayout, Assignment, \
                                 op_##FUNC>(sycl_device, tensor_range)
  RUN_UNARY_TEST(abs);
  RUN_UNARY_TEST(sqrt);
@@ -190,9 +191,9 @@ template <typename DataType, int DataLayout>
 void test_unary_builtins(const Eigen::SyclDevice& sycl_device,
                         const array<int64_t, 3>& tensor_range) {
  test_unary_builtins_for_assignement<DataType, DataLayout,
-                                      PlusEqualAssignement>(sycl_device, tensor_range);
+                                      PlusEqualAssignment>(sycl_device, tensor_range);
  test_unary_builtins_for_assignement<DataType, DataLayout,
-                                      EqualAssignement>(sycl_device, tensor_range);
+                                      EqualAssignment>(sycl_device, tensor_range);
  test_unary_builtins_return_bool<DataType, DataLayout,
                                  op_isnan>(sycl_device, tensor_range);
  test_unary_builtins_return_bool<DataType, DataLayout,
--- a/libs/eigen/unsupported/test/cxx11_tensor_casts.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_casts.cpp
@@ -149,7 +149,7 @@ struct test_cast_runner {

 // Only certain types allow cast from std::complex<>.
 template<typename Scalar>
-struct test_cast_runner<Scalar, typename internal::enable_if<NumTraits<Scalar>::IsComplex>::type> {
+struct test_cast_runner<Scalar, std::enable_if_t<NumTraits<Scalar>::IsComplex>> {
  static void run() {
    test_type_cast<Scalar, half>();
    test_type_cast<Scalar, bfloat16>();
--- a/libs/eigen/unsupported/test/cxx11_tensor_contract_gpu.cu
+++ b/libs/eigen/unsupported/test/cxx11_tensor_contract_gpu.cu
@@ -25,10 +25,6 @@ typedef Tensor<float, 1>::DimensionPair DimPair;
 template<int DataLayout>
 void test_gpu_contraction(int m_size, int k_size, int n_size)
 {
-  std::cout << "Testing for (" << m_size << "," << k_size << "," << n_size << ")" << std::endl;
-  // with these dimensions, the output has 300 * 140 elements, which is
-  // more than 30 * 1024, which is the number of threads in blocks on
-  // a 15 SM GK110 GPU
  Tensor<float, 2, DataLayout> t_left(m_size, k_size);
  Tensor<float, 2, DataLayout> t_right(k_size, n_size);
  Tensor<float, 2, DataLayout> t_result(m_size, n_size);
@@ -171,25 +167,45 @@ void test_gpu_contraction_n() {

 template<int DataLayout>
 void test_gpu_contraction_sizes() {
-  int m_sizes[] = { 31,  39,   63,   64,   65,
-                   127, 129,  255,  257 , 511,
-                   512, 513, 1023, 1024, 1025};
+  int m_sizes[3][5] = {{ 31,  39,   63,   64,   65},
+                       {127, 129,  255,  257 , 511},
+                       {512, 513, 1023, 1024, 1025}};

-  int n_sizes[] = { 31,  39,   63,   64,   65,
-                   127, 129,  255,  257,  511,
-                   512, 513, 1023, 1024, 1025};
+  int n_sizes[3][5] = {{ 31,  39,   63,   64,   65},
+                       {127, 129,  255,  257,  511},
+                       {512, 513, 1023, 1024, 1025}};

-  int k_sizes[] = {  31,   39,  63,  64,   65,
-                     95,   96, 127, 129,  255,
-                    257,  511, 512, 513, 1023,
-                   1024, 1025};
+  int k_sizes[3][6] = {{ 31,   39,  63,  64,   65,   95},
+                       { 96, 127, 129,  255,  257,  511},
+                       {512, 513, 725, 1023, 1024, 1025}};

-  for (int i = 0; i < 15; i++) {
-    for (int j = 0; j < 15; j++) {
-      for (int k = 0; k < 17; k++) {
-        test_gpu_contraction<DataLayout>(m_sizes[i], n_sizes[j], k_sizes[k]);
+  // Some selection of specific cases.
+  //  - m changes rows each iteration
+  //  - n changes rows each 3 iterations
+  //  - k changes rows each 9 iterations
+  //  - within a row, advance once column each iteration
+  const int m_cols = 5;
+  const int n_cols = 5;
+  const int k_cols = 6;
+  int m_offset = 0;
+  int n_offset = 1;
+  int k_offset = 2;
+  for (int i = 0; i < 3; ++i) {
+    for (int j = 0; j < 3; ++j) {
+      for (int l = 0; l < 3; ++l) {
+        int m = m_sizes[l][m_offset];
+        int n = n_sizes[j][n_offset];
+        int k = k_sizes[i][k_offset];
+        test_gpu_contraction<DataLayout>(m, n, k);
+        n_offset = (n_offset + 1) % n_cols;
+        k_offset = (k_offset + 1) % k_cols;
+      }
+      m_offset = (m_offset + 1) % m_cols;
+      if (j < 2) {
+        n_offset = (n_offset + n_cols - 3) % n_cols;  // Rewind 3.
      }
    }
+    k_offset = (k_offset + 2 * k_cols - 9) % k_cols;  // Rewind 9.
  }
 }

--- a/libs/eigen/unsupported/test/cxx11_tensor_custom_index.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_custom_index.cpp
@@ -20,7 +20,6 @@ using Eigen::Tensor;
 template <int DataLayout>
 static void test_map_as_index()
 {
-#ifdef EIGEN_HAS_SFINAE
  Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
  tensor.setRandom();

@@ -35,14 +34,12 @@ static void test_map_as_index()

  VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
  VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
-#endif
 }


 template <int DataLayout>
 static void test_matrix_as_index()
 {
-#ifdef EIGEN_HAS_SFINAE
  Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
  tensor.setRandom();

@@ -53,14 +50,12 @@ static void test_matrix_as_index()

  VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
  VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
-#endif
 }


 template <int DataLayout>
 static void test_varlist_as_index()
 {
-#ifdef EIGEN_HAS_SFINAE
  Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
  tensor.setRandom();

@@ -68,14 +63,12 @@ static void test_varlist_as_index()

  VERIFY_IS_EQUAL(tensor.coeff({1,2,4,1}), tensor.coeff(coeff));
  VERIFY_IS_EQUAL(tensor.coeffRef({1,2,4,1}), tensor.coeffRef(coeff));
-#endif
 }


 template <int DataLayout>
 static void test_sizes_as_index()
 {
-#ifdef EIGEN_HAS_SFINAE
  Tensor<float, 4, DataLayout> tensor(2, 3, 5, 7);
  tensor.setRandom();

@@ -84,7 +77,6 @@ static void test_sizes_as_index()

  VERIFY_IS_EQUAL(tensor.coeff(coeffC), tensor.coeff(coeff));
  VERIFY_IS_EQUAL(tensor.coeffRef(coeffC), tensor.coeffRef(coeff));
-#endif
 }


--- a/libs/eigen/unsupported/test/cxx11_tensor_device.cu
+++ b/libs/eigen/unsupported/test/cxx11_tensor_device.cu
@@ -14,6 +14,7 @@
 #define EIGEN_USE_GPU

 #include "main.h"
+#include "OffByOneScalar.h"
 #include <unsupported/Eigen/CXX11/Tensor>

 #include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>
@@ -175,6 +176,44 @@ void test_3d_convolution(Context* context)
  context->out().slice(indices, sizes).device(context->device()) = context->in1().convolve(context->kernel3d(), dims);
 }

+// Helper method to synchronize device.
+template<typename Device>
+void synchronize(Device& device) { /*nothing*/ }
+template<>
+void synchronize(Eigen::GpuDevice& device) {
+  device.synchronize();
+}
+
+template <typename DataType, typename TensorDevice>
+void test_device_memory(const TensorDevice& device) {
+  int count = 100;
+  Eigen::array<int, 1> tensorRange = {{count}};
+  Eigen::Tensor<DataType, 1> host(tensorRange);
+  Eigen::Tensor<DataType, 1> expected(tensorRange);
+  DataType* device_data  = static_cast<DataType*>(device.allocate(count * sizeof(DataType)));
+  
+  // memset
+  const char byte_value = static_cast<char>(0xAB);
+  device.memset(device_data, byte_value, count * sizeof(DataType));
+  device.memcpyDeviceToHost(host.data(), device_data, count * sizeof(DataType));
+  synchronize(device);
+  memset(expected.data(), byte_value, count * sizeof(DataType));
+  for (size_t i=0; i<count; i++) {
+    VERIFY_IS_EQUAL(host(i), expected(i));
+  }
+  
+  // fill
+  DataType fill_value = DataType(7);
+  std::fill_n(expected.data(), count, fill_value);
+  device.fill(device_data, device_data + count, fill_value);
+  device.memcpyDeviceToHost(host.data(), device_data, count * sizeof(DataType));
+  synchronize(device);
+  for (int i=0; i<count; i++) {
+    VERIFY_IS_EQUAL(host(i), expected(i));
+  }
+  
+  device.deallocate(device_data);
+}

 void test_cpu() {
  Eigen::Tensor<float, 3> in1(40,50,70);
@@ -266,6 +305,9 @@ void test_cpu() {
      }
    }
  }
+  
+  test_device_memory<float>(context.device());
+  test_device_memory<OffByOneScalar<int>>(context.device());
 }

 void test_gpu() {
@@ -386,6 +428,8 @@ void test_gpu() {

 #endif
 
+  test_device_memory<float>(context.device());
+  test_device_memory<OffByOneScalar<int>>(context.device());
 }


--- a/libs/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_device_sycl.cpp
@@ -18,26 +18,36 @@
 #define EIGEN_USE_SYCL

 #include "main.h"
+#include "OffByOneScalar.h"
 #include <unsupported/Eigen/CXX11/Tensor>
 #include <stdint.h>
 #include <iostream>

 template <typename DataType, int DataLayout, typename IndexType>
 void test_device_memory(const Eigen::SyclDevice &sycl_device) {
-  std::cout << "Running on : "
-            << sycl_device.sycl_queue().get_device(). template get_info<cl::sycl::info::device::name>()
-            <<std::endl;
  IndexType sizeDim1 = 100;
  array<IndexType, 1> tensorRange = {{sizeDim1}};
  Tensor<DataType, 1, DataLayout,IndexType> in(tensorRange);
  Tensor<DataType, 1, DataLayout,IndexType> in1(tensorRange);
-  memset(in1.data(), 1, in1.size() * sizeof(DataType));
  DataType* gpu_in_data  = static_cast<DataType*>(sycl_device.allocate(in.size()*sizeof(DataType)));
+  
+  // memset
+  memset(in1.data(), 1, in1.size() * sizeof(DataType));
  sycl_device.memset(gpu_in_data, 1, in.size()*sizeof(DataType));
  sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
  for (IndexType i=0; i<in.size(); i++) {
    VERIFY_IS_EQUAL(in(i), in1(i));
  }
+  
+  // fill
+  DataType value = DataType(7);
+  std::fill_n(in1.data(), in1.size(), value);
+  sycl_device.fill(gpu_in_data, gpu_in_data + in.size(), value);
+  sycl_device.memcpyDeviceToHost(in.data(), gpu_in_data, in.size()*sizeof(DataType));
+  for (IndexType i=0; i<in.size(); i++) {
+    VERIFY_IS_EQUAL(in(i), in1(i));
+  }
+
  sycl_device.deallocate(gpu_in_data);
 }

@@ -58,6 +68,31 @@ void test_device_exceptions(const Eigen::SyclDevice &sycl_device) {
  sycl_device.deallocate(gpu_data);
 }

+template<typename DataType, int DataLayout, typename IndexType>
+void test_device_attach_buffer(const Eigen::SyclDevice &sycl_device) {
+  IndexType sizeDim1 = 100;
+  
+  array<IndexType, 1> tensorRange = {{sizeDim1}};
+  Tensor<DataType, 1, DataLayout, IndexType> in(tensorRange);
+  
+  cl::sycl::buffer<buffer_scalar_t, 1> buffer(cl::sycl::range<1>(sizeDim1 * sizeof(DataType)));
+  DataType* gpu_in_data = static_cast<DataType*>(sycl_device.attach_buffer(buffer));
+  
+  // fill
+  DataType value = DataType(7);
+  std::fill_n(in.data(), in.size(), value);
+  sycl_device.fill(gpu_in_data, gpu_in_data + in.size(), value);
+  
+  // Check that buffer is filled with the correct value.
+  auto reint = buffer.reinterpret<DataType>(cl::sycl::range<1>(sizeDim1));
+  auto access = reint.template get_access<cl::sycl::access::mode::read>();
+  for (IndexType i=0; i<in.size(); i++) {
+    VERIFY_IS_EQUAL(in(i), access[i]);
+  }
+  
+  sycl_device.detach_buffer(gpu_in_data);
+}
+
 template<typename DataType> void sycl_device_test_per_device(const cl::sycl::device& d){
  std::cout << "Running on " << d.template get_info<cl::sycl::info::device::name>() << std::endl;
  QueueInterface queueInterface(d);
@@ -68,10 +103,12 @@ template<typename DataType> void sycl_device_test_per_device(const cl::sycl::dev
  //test_device_exceptions<DataType, RowMajor>(sycl_device);
  /// this test throw an exception. enable it if you want to see the exception
  //test_device_exceptions<DataType, ColMajor>(sycl_device);
+  test_device_attach_buffer<DataType, ColMajor, int64_t>(sycl_device);
 }

 EIGEN_DECLARE_TEST(cxx11_tensor_device_sycl) {
  for (const auto& device :Eigen::get_sycl_supported_devices()) {
    CALL_SUBTEST(sycl_device_test_per_device<float>(device));
+    CALL_SUBTEST(sycl_device_test_per_device<OffByOneScalar<int>>(device));
  }
 }
--- a/libs/eigen/unsupported/test/cxx11_tensor_executor.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_executor.cpp
@@ -612,43 +612,42 @@ static void test_async_execute_binary_expr(Device d)
  }
 }

-#ifdef EIGEN_DONT_VECTORIZE
-#define VECTORIZABLE(VAL) !EIGEN_DONT_VECTORIZE && VAL
-#else
-#define VECTORIZABLE(VAL) VAL
+#ifndef EIGEN_DONT_VECTORIZE
+#define EIGEN_DONT_VECTORIZE 0
 #endif
+#define VECTORIZABLE(T, VAL) !EIGEN_DONT_VECTORIZE && Eigen::internal::packet_traits<T>::Vectorizable && VAL

 #define CALL_SUBTEST_PART(PART) \
  CALL_SUBTEST_##PART

 #define CALL_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)                                                                                 \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::Off,     ColMajor>(default_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::On,  ColMajor>(default_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::Off,     ColMajor>(default_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::On,  ColMajor>(default_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::Off,     RowMajor>(default_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,               TiledEvaluation::On,  RowMajor>(default_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::Off,     RowMajor>(default_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(true),  TiledEvaluation::On,  RowMajor>(default_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     ColMajor>(tp_device)));      \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  ColMajor>(tp_device)));          \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     ColMajor>(tp_device)));      \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  ColMajor>(tp_device)));          \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     RowMajor>(tp_device)));      \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  RowMajor>(tp_device)));          \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     RowMajor>(tp_device)));      \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  RowMajor>(tp_device)))
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,                  TiledEvaluation::Off, ColMajor>(default_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,                  TiledEvaluation::On,  ColMajor>(default_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(T, true),  TiledEvaluation::Off, ColMajor>(default_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(T, true),  TiledEvaluation::On,  ColMajor>(default_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,                  TiledEvaluation::Off, RowMajor>(default_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    false,                  TiledEvaluation::On,  RowMajor>(default_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(T, true),  TiledEvaluation::Off, RowMajor>(default_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, DefaultDevice,    VECTORIZABLE(T, true),  TiledEvaluation::On,  RowMajor>(default_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::Off, ColMajor>(tp_device)));      \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::On,  ColMajor>(tp_device)));          \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::Off, ColMajor>(tp_device)));      \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::On,  ColMajor>(tp_device)));          \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::Off, RowMajor>(tp_device)));      \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::On,  RowMajor>(tp_device)));          \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::Off, RowMajor>(tp_device)));      \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::On,  RowMajor>(tp_device)))

 // NOTE: Currently only ThreadPoolDevice supports async expression evaluation.
 #define CALL_ASYNC_SUBTEST_COMBINATIONS(PART, NAME, T, NUM_DIMS)                                                                      \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     ColMajor>(tp_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  ColMajor>(tp_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     ColMajor>(tp_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  ColMajor>(tp_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::Off,     RowMajor>(tp_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,               TiledEvaluation::On,  RowMajor>(tp_device)));     \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::Off,     RowMajor>(tp_device))); \
-  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(true),  TiledEvaluation::On,  RowMajor>(tp_device)))
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::Off, ColMajor>(tp_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::On,  ColMajor>(tp_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::Off, ColMajor>(tp_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::On,  ColMajor>(tp_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::Off, RowMajor>(tp_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, false,                  TiledEvaluation::On,  RowMajor>(tp_device)));     \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::Off, RowMajor>(tp_device))); \
+  CALL_SUBTEST_PART(PART)((NAME<T, NUM_DIMS, ThreadPoolDevice, VECTORIZABLE(T, true),  TiledEvaluation::On,  RowMajor>(tp_device)))

 EIGEN_DECLARE_TEST(cxx11_tensor_executor) {
  Eigen::DefaultDevice default_device;
--- a/libs/eigen/unsupported/test/cxx11_tensor_expr.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_expr.cpp
@@ -130,7 +130,7 @@ static void test_3d()
  Tensor<float, 3, RowMajor> mat4(2,3,7);
  mat4 = mat2 * 3.14f;
  Tensor<float, 3> mat5(2,3,7);
-  mat5 = mat1.inverse().log();
+  mat5 = (mat1 + mat1.constant(1)).inverse().log();
  Tensor<float, 3, RowMajor> mat6(2,3,7);
  mat6 = mat2.pow(0.5f) * 3.14f;
  Tensor<float, 3> mat7(2,3,7);
@@ -150,7 +150,7 @@ static void test_3d()
      for (int k = 0; k < 7; ++k) {
        VERIFY_IS_APPROX(mat3(i,j,k), val + val);
        VERIFY_IS_APPROX(mat4(i,j,k), val * 3.14f);
-        VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/val));
+        VERIFY_IS_APPROX(mat5(i,j,k), logf(1.0f/(val + 1)));
        VERIFY_IS_APPROX(mat6(i,j,k), sqrtf(val) * 3.14f);
        VERIFY_IS_APPROX(mat7(i,j,k), expf((std::max)(val, mat5(i,j,k) * 2.0f)));
        VERIFY_IS_APPROX(mat8(i,j,k), expf(-val) * 3.14f);
@@ -305,10 +305,10 @@ void test_minmax_nan_propagation_templ() {
    const Scalar kNaN = std::numeric_limits<Scalar>::quiet_NaN();
    const Scalar kInf = std::numeric_limits<Scalar>::infinity();
    const Scalar kZero(0);
-    Tensor<Scalar, 1> vec_all_nan(size);
+    Tensor<Scalar, 1> vec_full_nan(size);
    Tensor<Scalar, 1> vec_one_nan(size);
    Tensor<Scalar, 1> vec_zero(size);
-    vec_all_nan.setConstant(kNaN);
+    vec_full_nan.setConstant(kNaN);
    vec_zero.setZero();
    vec_one_nan.setZero();
    vec_one_nan(size/2) = kNaN;
@@ -330,12 +330,12 @@ void test_minmax_nan_propagation_templ() {
    // max(nan, 0) = nan
    // max(0, nan) = nan
    // max(0, 0) = 0
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(kNaN));
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(vec_all_nan));
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(kZero));
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNaN>(vec_zero));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(kNaN));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(vec_full_nan));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(kZero));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNaN>(vec_zero));
    verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(kNaN));
-    verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(vec_all_nan));
+    verify_all_nan(vec_zero.template cwiseMax<PropagateNaN>(vec_full_nan));
    verify_all_zero(vec_zero.template cwiseMax<PropagateNaN>(kZero));
    verify_all_zero(vec_zero.template cwiseMax<PropagateNaN>(vec_zero));

@@ -344,12 +344,12 @@ void test_minmax_nan_propagation_templ() {
    // max(nan, 0) = 0
    // max(0, nan) = 0
    // max(0, 0) = 0
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNumbers>(kNaN));
-    verify_all_nan(vec_all_nan.template cwiseMax<PropagateNumbers>(vec_all_nan));
-    verify_all_zero(vec_all_nan.template cwiseMax<PropagateNumbers>(kZero));
-    verify_all_zero(vec_all_nan.template cwiseMax<PropagateNumbers>(vec_zero));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNumbers>(kNaN));
+    verify_all_nan(vec_full_nan.template cwiseMax<PropagateNumbers>(vec_full_nan));
+    verify_all_zero(vec_full_nan.template cwiseMax<PropagateNumbers>(kZero));
+    verify_all_zero(vec_full_nan.template cwiseMax<PropagateNumbers>(vec_zero));
    verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(kNaN));
-    verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_all_nan));
+    verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_full_nan));
    verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(kZero));
    verify_all_zero(vec_zero.template cwiseMax<PropagateNumbers>(vec_zero));

@@ -358,12 +358,12 @@ void test_minmax_nan_propagation_templ() {
    // min(nan, 0) = nan
    // min(0, nan) = nan
    // min(0, 0) = 0
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(kNaN));
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(vec_all_nan));
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(kZero));
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNaN>(vec_zero));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(kNaN));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(vec_full_nan));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(kZero));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNaN>(vec_zero));
    verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(kNaN));
-    verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(vec_all_nan));
+    verify_all_nan(vec_zero.template cwiseMin<PropagateNaN>(vec_full_nan));
    verify_all_zero(vec_zero.template cwiseMin<PropagateNaN>(kZero));
    verify_all_zero(vec_zero.template cwiseMin<PropagateNaN>(vec_zero));

@@ -372,12 +372,12 @@ void test_minmax_nan_propagation_templ() {
    // min(nan, 0) = 0
    // min(0, nan) = 0
    // min(0, 0) = 0
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNumbers>(kNaN));
-    verify_all_nan(vec_all_nan.template cwiseMin<PropagateNumbers>(vec_all_nan));
-    verify_all_zero(vec_all_nan.template cwiseMin<PropagateNumbers>(kZero));
-    verify_all_zero(vec_all_nan.template cwiseMin<PropagateNumbers>(vec_zero));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNumbers>(kNaN));
+    verify_all_nan(vec_full_nan.template cwiseMin<PropagateNumbers>(vec_full_nan));
+    verify_all_zero(vec_full_nan.template cwiseMin<PropagateNumbers>(kZero));
+    verify_all_zero(vec_full_nan.template cwiseMin<PropagateNumbers>(vec_zero));
    verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(kNaN));
-    verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_all_nan));
+    verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_full_nan));
    verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(kZero));
    verify_all_zero(vec_zero.template cwiseMin<PropagateNumbers>(vec_zero));

@@ -397,13 +397,13 @@ void test_minmax_nan_propagation_templ() {
    VERIFY_IS_EQUAL(val(), kZero);

    // Test NaN propagation for tensor of all NaNs.
-    val = vec_all_nan.template minimum<PropagateNaN>();
+    val = vec_full_nan.template minimum<PropagateNaN>();
    VERIFY((numext::isnan)(val()));
-    val = vec_all_nan.template minimum<PropagateNumbers>();
+    val = vec_full_nan.template minimum<PropagateNumbers>();
    VERIFY_IS_EQUAL(val(), kInf);
-    val = vec_all_nan.template maximum<PropagateNaN>();
+    val = vec_full_nan.template maximum<PropagateNaN>();
    VERIFY((numext::isnan)(val()));
-    val = vec_all_nan.template maximum<PropagateNumbers>();
+    val = vec_full_nan.template maximum<PropagateNumbers>();
    VERIFY_IS_EQUAL(val(), -kInf);

    // Test NaN propagation for tensor with a single NaN.
--- a/libs/eigen/unsupported/test/cxx11_tensor_fft.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_fft.cpp
@@ -186,7 +186,7 @@ static void test_fft_real_input_energy() {
  }
  const DSizes<ptrdiff_t, TensorRank> arr = dimensions;

-  typedef typename internal::conditional<isComplexInput == true, std::complex<RealScalar>, RealScalar>::type InputScalar;
+  typedef std::conditional_t<isComplexInput == true, std::complex<RealScalar>, RealScalar> InputScalar;

  Tensor<InputScalar, TensorRank, DataLayout> input;
  input.resize(arr);
@@ -197,7 +197,7 @@ static void test_fft_real_input_energy() {
    fft[i] = i;
  }

-  typedef typename internal::conditional<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar>::type OutputScalar;
+  typedef std::conditional_t<FFTResultType == Eigen::BothParts, std::complex<RealScalar>, RealScalar> OutputScalar;
  Tensor<OutputScalar, TensorRank, DataLayout> output;
  output = input.template fft<FFTResultType, FFTDirection>(fft);

--- a/libs/eigen/unsupported/test/cxx11_tensor_gpu.cu
+++ b/libs/eigen/unsupported/test/cxx11_tensor_gpu.cu
@@ -17,8 +17,6 @@

 #include <unsupported/Eigen/CXX11/src/Tensor/TensorGpuHipCudaDefines.h>

-#define EIGEN_GPU_TEST_C99_MATH  EIGEN_HAS_CXX11
-
 using Eigen::Tensor;

 void test_gpu_nullary() {
@@ -66,6 +64,47 @@ void test_gpu_nullary() {
  gpuFree(d_in2);
 }

+// Tests that there are no indexing overflows when computing tensors with the
+// max representable size.
+template <typename IndexType,
+          IndexType N = (std::numeric_limits<IndexType>::max)()>
+void test_gpu_nullary_max_size()
+{
+  typedef int8_t DataType;
+  typedef Tensor<DataType, 1, 0, IndexType> TensorType;
+  typedef Eigen::array<IndexType, 1> ArrayType;
+
+  const IndexType n = N;
+  TensorType in1((ArrayType(n)));
+  in1.setZero();
+
+  std::size_t in1_bytes = in1.size() * sizeof(DataType);
+
+  DataType* d_in1;
+  gpuMalloc((void**)(&d_in1), in1_bytes);
+
+  gpuMemcpy(d_in1, in1.data(), in1_bytes, gpuMemcpyHostToDevice);
+
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+
+  Eigen::TensorMap<TensorType> gpu_in1(d_in1, ArrayType(n));
+
+  gpu_in1.device(gpu_device) = gpu_in1.constant(123);
+
+  TensorType new1((ArrayType(n)));
+
+  assert(gpuMemcpyAsync(new1.data(), d_in1, in1_bytes, gpuMemcpyDeviceToHost,
+                        gpu_device.stream()) == gpuSuccess);
+  assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);
+
+  for (IndexType i = 0; i < n; ++i) {
+    VERIFY_IS_EQUAL(new1(ArrayType(i)), 123);
+  }
+
+  gpuFree(d_in1);
+}
+
 void test_gpu_elementwise_small() {
  Tensor<float, 1> in1(Eigen::array<Eigen::DenseIndex, 1>(2));
  Tensor<float, 1> in2(Eigen::array<Eigen::DenseIndex, 1>(2));
@@ -619,7 +658,6 @@ void test_gpu_convolution_3d()
 }


-#if EIGEN_GPU_TEST_C99_MATH
 template <typename Scalar>
 void test_gpu_lgamma(const Scalar stddev)
 {
@@ -658,7 +696,6 @@ void test_gpu_lgamma(const Scalar stddev)
  gpuFree(d_in);
  gpuFree(d_out);
 }
-#endif

 template <typename Scalar>
 void test_gpu_digamma()
@@ -681,8 +718,8 @@ void test_gpu_digamma()
  expected_out(2) = Scalar(1.2561176684318);
  expected_out(3) = Scalar(2.398239129535781);
  expected_out(4) = Scalar(9.210340372392849);
-  expected_out(5) = std::numeric_limits<Scalar>::infinity();
-  expected_out(6) = std::numeric_limits<Scalar>::infinity();
+  expected_out(5) = std::numeric_limits<Scalar>::quiet_NaN();
+  expected_out(6) = std::numeric_limits<Scalar>::quiet_NaN();

  std::size_t bytes = in.size() * sizeof(Scalar);

@@ -704,11 +741,8 @@ void test_gpu_digamma()
  assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
  assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);

-  for (int i = 0; i < 5; ++i) {
-    VERIFY_IS_APPROX(out(i), expected_out(i));
-  }
-  for (int i = 5; i < 7; ++i) {
-    VERIFY_IS_EQUAL(out(i), expected_out(i));
+  for (int i = 0; i < 7; ++i) {
+    VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
  }

  gpuFree(d_in);
@@ -741,7 +775,7 @@ void test_gpu_zeta()
  expected_out(0) = std::numeric_limits<Scalar>::infinity();
  expected_out(1) = Scalar(1.61237534869);
  expected_out(2) = Scalar(0.234848505667);
-  expected_out(3) = Scalar(1.03086757337e-5);
+  expected_out(3) = std::numeric_limits<Scalar>::quiet_NaN();
  expected_out(4) = Scalar(0.367879440865);
  expected_out(5) = Scalar(0.054102025820864097);

@@ -769,13 +803,8 @@ void test_gpu_zeta()
  assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
  assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);

-  VERIFY_IS_EQUAL(out(0), expected_out(0));
-  VERIFY((std::isnan)(out(3)));
-
-  for (int i = 1; i < 6; ++i) {
-    if (i != 3) {
-      VERIFY_IS_APPROX(out(i), expected_out(i));
-    }
+  for (int i = 0; i < 6; ++i) {
+    VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
  }

  gpuFree(d_in_x);
@@ -990,7 +1019,6 @@ void test_gpu_igammac()
  gpuFree(d_out);
 }

-#if EIGEN_GPU_TEST_C99_MATH
 template <typename Scalar>
 void test_gpu_erf(const Scalar stddev)
 {
@@ -1068,7 +1096,7 @@ void test_gpu_erfc(const Scalar stddev)
  gpuFree(d_in);
  gpuFree(d_out);
 }
-#endif
+
 template <typename Scalar>
 void test_gpu_ndtri()
 {
@@ -1117,13 +1145,8 @@ void test_gpu_ndtri()
  assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
  assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);

-  VERIFY_IS_EQUAL(out(0), expected_out(0));
-  VERIFY((std::isnan)(out(3)));
-
-  for (int i = 1; i < 6; ++i) {
-    if (i != 3) {
-      VERIFY_IS_APPROX(out(i), expected_out(i));
-    }
+  for (int i = 0; i < 6; ++i) {    
+    VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
  }

  gpuFree(d_in_x);
@@ -1262,12 +1285,8 @@ void test_gpu_betainc()
  assert(gpuMemcpyAsync(out.data(), d_out, bytes, gpuMemcpyDeviceToHost, gpu_device.stream()) == gpuSuccess);
  assert(gpuStreamSynchronize(gpu_device.stream()) == gpuSuccess);

-  for (int i = 1; i < 125; ++i) {
-    if ((std::isnan)(expected_out(i))) {
-      VERIFY((std::isnan)(out(i)));
-    } else {
-      VERIFY_IS_APPROX(out(i), expected_out(i));
-    }
+  for (int i = 0; i < 125; ++i) {
+    VERIFY_IS_CWISE_APPROX(out(i), expected_out(i));
  }

  gpuFree(d_in_x);
@@ -1541,6 +1560,10 @@ void test_gpu_gamma_sample_der_alpha()
 EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
 {
  CALL_SUBTEST_1(test_gpu_nullary());
+  CALL_SUBTEST_1(test_gpu_nullary_max_size<int16_t>());
+  CALL_SUBTEST_1(test_gpu_nullary_max_size<int32_t>());
+  CALL_SUBTEST_1((test_gpu_nullary_max_size<
+                  int64_t, (std::numeric_limits<int32_t>::max)() + 100ll>()));
  CALL_SUBTEST_1(test_gpu_elementwise_small());
  CALL_SUBTEST_1(test_gpu_elementwise());
  CALL_SUBTEST_1(test_gpu_props());
@@ -1560,7 +1583,6 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
  CALL_SUBTEST_3(test_gpu_convolution_3d<RowMajor>());
 #endif

-#if EIGEN_GPU_TEST_C99_MATH
  // std::erf, std::erfc, and so on where only added in c++11. We use them
  // as a golden reference to validate the results produced by Eigen. Therefore
  // we can only run these tests if we use a c++11 compiler.
@@ -1638,6 +1660,4 @@ EIGEN_DECLARE_TEST(cxx11_tensor_gpu)
  CALL_SUBTEST_6(test_gpu_gamma_sample_der_alpha<float>());
  CALL_SUBTEST_6(test_gpu_gamma_sample_der_alpha<double>());
 #endif
-
-#endif
 }
--- a/libs/eigen/unsupported/test/cxx11_tensor_index_list.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_index_list.cpp
@@ -11,8 +11,6 @@

 #include <Eigen/CXX11/Tensor>

-#ifdef EIGEN_HAS_INDEX_LIST
-
 static void test_static_index_list()
 {
  Tensor<float, 4> tensor(2,3,5,7);
@@ -26,6 +24,8 @@ static void test_static_index_list()
  VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[1]), 1);
  VERIFY_IS_EQUAL(static_cast<Index>(reduction_axis[2]), 2);

+  VERIFY_IS_EQUAL(reduction_axis.size(), std::size_t(3));
+
  EIGEN_STATIC_ASSERT((internal::array_get<0>(reduction_axis) == 0), YOU_MADE_A_PROGRAMMING_MISTAKE);
  EIGEN_STATIC_ASSERT((internal::array_get<1>(reduction_axis) == 1), YOU_MADE_A_PROGRAMMING_MISTAKE);
  EIGEN_STATIC_ASSERT((internal::array_get<2>(reduction_axis) == 2), YOU_MADE_A_PROGRAMMING_MISTAKE);
@@ -370,16 +370,12 @@ static void test_dim_check()
 }


-#endif
-
 EIGEN_DECLARE_TEST(cxx11_tensor_index_list)
 {
-#ifdef EIGEN_HAS_INDEX_LIST
  CALL_SUBTEST(test_static_index_list());
  CALL_SUBTEST(test_type2index_list());
  CALL_SUBTEST(test_type2indexpair_list());
  CALL_SUBTEST(test_dynamic_index_list());
  CALL_SUBTEST(test_mixed_index_list());
  CALL_SUBTEST(test_dim_check());
-#endif
 }
--- a/libs/eigen/unsupported/test/cxx11_tensor_io.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_io.cpp
@@ -6,131 +6,137 @@
 // This Source Code Form is subject to the terms of the Mozilla
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
-
 #include "main.h"
+
 #include <sstream>
-#include <string>
 #include <Eigen/CXX11/Tensor>

+template <typename Scalar, int rank, int Layout>
+struct test_tensor_ostream_impl {};

-template<int DataLayout>
-static void test_output_0d()
-{
-  Tensor<int, 0, DataLayout> tensor;
-  tensor() = 123;
-
-  std::stringstream os;
-  os << tensor;
-
-  std::string expected("123");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
-}
-
-
-template<int DataLayout>
-static void test_output_1d()
-{
-  Tensor<int, 1, DataLayout> tensor(5);
-  for (int i = 0; i < 5; ++i) {
-    tensor(i) = i;
+template<typename Scalar, int Layout>
+struct test_tensor_ostream_impl<Scalar, 0, Layout> {
+  static void run() {
+    Eigen::Tensor<Scalar, 0> t;
+    t.setValues(1);
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == "1");
  }
+};

-  std::stringstream os;
-  os << tensor;
-
-  std::string expected("0\n1\n2\n3\n4");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
-
-  Eigen::Tensor<double,1,DataLayout> empty_tensor(0);
-  std::stringstream empty_os;
-  empty_os << empty_tensor;
-  std::string empty_string;
-  VERIFY_IS_EQUAL(std::string(empty_os.str()), empty_string);
-}
-
-
-template<int DataLayout>
-static void test_output_2d()
-{
-  Tensor<int, 2, DataLayout> tensor(5, 3);
-  for (int i = 0; i < 5; ++i) {
-    for (int j = 0; j < 3; ++j) {
-      tensor(i, j) = i*j;
-    }
+template<typename Scalar, int Layout>
+struct test_tensor_ostream_impl<Scalar, 1, Layout> {
+  static void run() {
+    Eigen::Tensor<Scalar, 1> t = {3};
+    t.setValues({1, 2, 3});
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == "1 2 3");
  }
+};

-  std::stringstream os;
-  os << tensor;
-
-  std::string expected("0  0  0\n0  1  2\n0  2  4\n0  3  6\n0  4  8");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
-}
-
-
-template<int DataLayout>
-static void test_output_expr()
-{
-  Tensor<int, 1, DataLayout> tensor1(5);
-  Tensor<int, 1, DataLayout> tensor2(5);
-  for (int i = 0; i < 5; ++i) {
-    tensor1(i) = i;
-    tensor2(i) = 7;
+template<typename Scalar, int Layout>
+struct test_tensor_ostream_impl<Scalar, 2, Layout> {
+  static void run() {
+    Eigen::Tensor<Scalar, 2> t = {3, 2};
+    t.setValues({{1, 2}, {3, 4}, {5, 6}});
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == "1 2\n3 4\n5 6");
  }
+};

-  std::stringstream os;
-  os << tensor1 + tensor2;
-
-  std::string expected(" 7\n 8\n 9\n10\n11");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
-}
-
-
-template<int DataLayout>
-static void test_output_string()
-{
-  Tensor<std::string, 2, DataLayout> tensor(5, 3);
-  tensor.setConstant(std::string("foo"));
-
-  std::cout << tensor << std::endl;
-
-  std::stringstream os;
-  os << tensor;
-
-  std::string expected("foo  foo  foo\nfoo  foo  foo\nfoo  foo  foo\nfoo  foo  foo\nfoo  foo  foo");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
-}
-
-
-template<int DataLayout>
-static void test_output_const()
-{
-  Tensor<int, 1, DataLayout> tensor(5);
-  for (int i = 0; i < 5; ++i) {
-    tensor(i) = i;
+template<typename Scalar, int Layout>
+struct test_tensor_ostream_impl<Scalar, 3, Layout> {
+  static void run() {
+    Eigen::Tensor<Scalar, 3> t = {4, 3, 2};
+    t.setValues({{{1, 2}, {3, 4}, {5, 6}},
+                 {{7, 8}, {9, 10}, {11, 12}},
+                 {{13, 14}, {15, 16}, {17, 18}},
+                 {{19, 20}, {21, 22}, {23, 24}}});
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == " 1  2\n 3  4\n 5  6\n\n 7  8\n 9 10\n11 12\n\n13 14\n15 16\n17 18\n\n19 20\n21 22\n23 24");
  }
+};

-  TensorMap<Tensor<const int, 1, DataLayout> > tensor_map(tensor.data(), 5);
+template<int Layout>
+struct test_tensor_ostream_impl<bool, 2, Layout> {
+  static void run() {
+    Eigen::Tensor<bool, 2> t = {3, 2};
+    t.setValues({{false, true}, {true, false}, {false, false}});
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == "0 1\n1 0\n0 0");
+  }
+};

-  std::stringstream os;
-  os << tensor_map;
+template<typename Scalar, int Layout>
+struct test_tensor_ostream_impl<std::complex<Scalar>, 2, Layout> {
+  static void run() {
+    Eigen::Tensor<std::complex<Scalar>, 2> t = {3, 2};
+    t.setValues({{std::complex<Scalar>(1, 2), std::complex<Scalar>(12, 3)},
+                 {std::complex<Scalar>(-4, 2), std::complex<Scalar>(0, 5)},
+                 {std::complex<Scalar>(-1, 4), std::complex<Scalar>(5, 27)}});
+    std::ostringstream os;
+    os << t.format(Eigen::TensorIOFormat::Plain());
+    VERIFY(os.str() == " (1,2) (12,3)\n(-4,2)  (0,5)\n(-1,4) (5,27)");
+  }
+};

-  std::string expected("0\n1\n2\n3\n4");
-  VERIFY_IS_EQUAL(std::string(os.str()), expected);
+template <typename Scalar, int rank, int Layout>
+void test_tensor_ostream() {
+  test_tensor_ostream_impl<Scalar, rank, Layout>::run();
 }

-
-EIGEN_DECLARE_TEST(cxx11_tensor_io)
-{
-  CALL_SUBTEST(test_output_0d<ColMajor>());
-  CALL_SUBTEST(test_output_0d<RowMajor>());
-  CALL_SUBTEST(test_output_1d<ColMajor>());
-  CALL_SUBTEST(test_output_1d<RowMajor>());
-  CALL_SUBTEST(test_output_2d<ColMajor>());
-  CALL_SUBTEST(test_output_2d<RowMajor>());
-  CALL_SUBTEST(test_output_expr<ColMajor>());
-  CALL_SUBTEST(test_output_expr<RowMajor>());
-  CALL_SUBTEST(test_output_string<ColMajor>());
-  CALL_SUBTEST(test_output_string<RowMajor>());
-  CALL_SUBTEST(test_output_const<ColMajor>());
-  CALL_SUBTEST(test_output_const<RowMajor>());
+void test_const_tensor_ostream() {
+  Eigen::Tensor<float, 0> t;
+  t.setValues(1);
+  const Eigen::TensorMap<Eigen::Tensor<const float, 0, Eigen::RowMajor>, Eigen::Unaligned> t_const(
+      t.data(), Eigen::DSizes<Eigen::DenseIndex, 0>{});
+  std::ostringstream os;
+  os << t_const.format(Eigen::TensorIOFormat::Plain());
+  VERIFY(os.str() == "1");
+}
+
+EIGEN_DECLARE_TEST(cxx11_tensor_io) {
+  CALL_SUBTEST((test_tensor_ostream<float, 0, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 1, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 2, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 3, Eigen::ColMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<double, 0, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 1, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 2, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 3, Eigen::ColMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<int, 0, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 1, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 2, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 3, Eigen::ColMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<float, 0, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 1, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 2, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<float, 3, Eigen::RowMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<double, 0, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 1, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 2, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<double, 3, Eigen::RowMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<int, 0, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 1, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 2, Eigen::RowMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<int, 3, Eigen::RowMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<bool, 2, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<bool, 2, Eigen::RowMajor>()));
+
+  CALL_SUBTEST((test_tensor_ostream<std::complex<double>, 2, Eigen::ColMajor>()));
+  CALL_SUBTEST((test_tensor_ostream<std::complex<float>, 2, Eigen::ColMajor>()));
+
+  // Test printing TensorMap with const elements.
+  CALL_SUBTEST((test_const_tensor_ostream()));
 }
--- a/libs/eigen/unsupported/test/cxx11_tensor_morphing.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_morphing.cpp
@@ -43,7 +43,6 @@ static void test_simple_reshape()

 template <typename>
 static void test_static_reshape() {
-#if defined(EIGEN_HAS_INDEX_LIST)
  using Eigen::type2index;

  Tensor<float, 5> tensor(2, 3, 1, 7, 1);
@@ -60,7 +59,6 @@ static void test_static_reshape() {
      }
    }
  }
-#endif
 }

 template <typename>
--- a/libs/eigen/unsupported/test/cxx11_tensor_of_bfloat16_gpu.cu
+++ b/libs/eigen/unsupported/test/cxx11_tensor_of_bfloat16_gpu.cu
@@ -0,0 +1,487 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2021 Rohit Santhanam <rohit.santhanam@amd.com>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#define EIGEN_TEST_NO_LONGDOUBLE
+#define EIGEN_TEST_NO_COMPLEX
+
+#define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
+#define EIGEN_USE_GPU
+
+#include "main.h"
+#include <unsupported/Eigen/CXX11/Tensor>
+
+
+using Eigen::Tensor;
+
+template<typename>
+void test_gpu_numext() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  bool* d_res_bfloat16 = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
+  bool* d_res_float = (bool*)gpu_device.allocate(num_elem * sizeof(bool));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<bool, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+  gpu_res_float.device(gpu_device) = gpu_float.unaryExpr(Eigen::internal::scalar_isnan_op<float>());
+  // Test bfloat16 specific isnan op.
+  gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().unaryExpr(Eigen::internal::scalar_isnan_op<Eigen::bfloat16>());
+
+  Tensor<bool, 1> bfloat16_prec(num_elem);
+  Tensor<bool, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(bool));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(bool));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_EQUAL(full_prec(i), bfloat16_prec(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+
+#ifdef EIGEN_HAS_GPU_BF16
+
+template<typename>
+void test_gpu_conversion() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::bfloat16* d_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  float* d_conv = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_bfloat16(
+      d_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_conv(
+      d_conv, num_elem);
+
+  gpu_float.device(gpu_device) = gpu_float.random();
+  gpu_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>();
+  gpu_conv.device(gpu_device) = gpu_bfloat16.cast<float>();
+
+  Tensor<float, 1> initial(num_elem);
+  Tensor<float, 1> final(num_elem);
+  gpu_device.memcpyDeviceToHost(initial.data(), d_float, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(final.data(), d_conv, num_elem*sizeof(float));
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(static_cast<Eigen::bfloat16>(initial(i)), static_cast<Eigen::bfloat16>(final(i)));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_bfloat16);
+  gpu_device.deallocate(d_conv);
+}
+
+template<typename>
+void test_gpu_unary() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_bfloat16 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+  gpu_float.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().cast<float>();
+  gpu_res_float.device(gpu_device) = gpu_float.abs();
+  gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().cast<float>();
+
+  Tensor<float, 1> bfloat16_prec(num_elem);
+  Tensor<float, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(full_prec(i), bfloat16_prec(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_gpu_elementwise() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_bfloat16 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(
+      d_float1, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(
+      d_float2, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  gpu_float1.device(gpu_device) = gpu_float1.random();
+  gpu_float1.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().cast<float>();
+  gpu_float2.device(gpu_device) = gpu_float2.random();
+  gpu_float2.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>().cast<float>();
+  gpu_res_float.device(gpu_device) = (gpu_float1 + gpu_float2) * gpu_float1;
+  gpu_res_bfloat16.device(gpu_device) = ((gpu_float1.cast<Eigen::bfloat16>() + gpu_float2.cast<Eigen::bfloat16>()) * gpu_float1.cast<Eigen::bfloat16>()).cast<float>();
+
+  Tensor<float, 1> bfloat16_prec(num_elem);
+  Tensor<float, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(static_cast<Eigen::bfloat16>(full_prec(i)), static_cast<Eigen::bfloat16>(bfloat16_prec(i)));
+  }
+
+  gpu_device.deallocate(d_float1);
+  gpu_device.deallocate(d_float2);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_gpu_trancendental() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float3 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::bfloat16* d_res1_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res1_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res2_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res2_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res3_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res3_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float1(d_float1, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float2(d_float2, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float3(d_float3, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res1_bfloat16(d_res1_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res1_float(d_res1_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res2_bfloat16(d_res2_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res2_float(d_res2_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res3_bfloat16(d_res3_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res3_float(d_res3_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res4_bfloat16(d_res3_bfloat16, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res4_float(d_res3_float, num_elem);
+
+  gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
+  gpu_float1.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().cast<float>();
+  gpu_float2.device(gpu_device) = gpu_float2.random() + gpu_float1.constant(0.5f);
+  gpu_float2.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>().cast<float>();
+  gpu_float3.device(gpu_device) = gpu_float3.random();
+  gpu_float3.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>().cast<float>();
+  gpu_res1_float.device(gpu_device) = gpu_float1.exp().cast<Eigen::bfloat16>();
+  gpu_res2_float.device(gpu_device) = gpu_float2.log().cast<Eigen::bfloat16>();
+  gpu_res3_float.device(gpu_device) = gpu_float3.log1p().cast<Eigen::bfloat16>();
+  gpu_res4_float.device(gpu_device) = gpu_float3.expm1().cast<Eigen::bfloat16>();
+
+  gpu_res1_bfloat16.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>();
+  gpu_res1_bfloat16.device(gpu_device) = gpu_res1_bfloat16.exp();
+
+  gpu_res2_bfloat16.device(gpu_device) = gpu_float2.cast<Eigen::bfloat16>();
+  gpu_res2_bfloat16.device(gpu_device) = gpu_res2_bfloat16.log();
+
+  gpu_res3_bfloat16.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>();
+  gpu_res3_bfloat16.device(gpu_device) = gpu_res3_bfloat16.log1p();
+
+  gpu_res3_bfloat16.device(gpu_device) = gpu_float3.cast<Eigen::bfloat16>();
+  gpu_res3_bfloat16.device(gpu_device) = gpu_res3_bfloat16.expm1();
+
+  Tensor<float, 1> input1(num_elem);
+  Tensor<Eigen::bfloat16, 1> bfloat16_prec1(num_elem);
+  Tensor<Eigen::bfloat16, 1> full_prec1(num_elem);
+  Tensor<float, 1> input2(num_elem);
+  Tensor<Eigen::bfloat16, 1> bfloat16_prec2(num_elem);
+  Tensor<Eigen::bfloat16, 1> full_prec2(num_elem);
+  Tensor<float, 1> input3(num_elem);
+  Tensor<Eigen::bfloat16, 1> bfloat16_prec3(num_elem);
+  Tensor<Eigen::bfloat16, 1> full_prec3(num_elem);
+  gpu_device.memcpyDeviceToHost(input1.data(), d_float1, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(input2.data(), d_float2, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(input3.data(), d_float3, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(bfloat16_prec1.data(), d_res1_bfloat16, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec1.data(), d_res1_float, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(bfloat16_prec2.data(), d_res2_bfloat16, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec2.data(), d_res2_float, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(bfloat16_prec3.data(), d_res3_bfloat16, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec3.data(), d_res3_float, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(full_prec1(i), bfloat16_prec1(i));
+  }
+  for (int i = 0; i < num_elem; ++i) {
+    if(std::abs(input2(i)-1.f)<0.05f) // log lacks accuracy nearby 1
+      VERIFY_IS_APPROX(full_prec2(i)+Eigen::bfloat16(0.1f), bfloat16_prec2(i)+Eigen::bfloat16(0.1f));
+    else
+      VERIFY_IS_APPROX(full_prec2(i), bfloat16_prec2(i));
+  }
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(full_prec3(i), bfloat16_prec3(i));
+  }
+  gpu_device.deallocate(d_float1);
+  gpu_device.deallocate(d_float2);
+  gpu_device.deallocate(d_float3);
+  gpu_device.deallocate(d_res1_bfloat16);
+  gpu_device.deallocate(d_res1_float);
+  gpu_device.deallocate(d_res2_bfloat16);
+  gpu_device.deallocate(d_res2_float);
+  gpu_device.deallocate(d_res3_float);
+  gpu_device.deallocate(d_res3_bfloat16);
+}
+
+template<typename>
+void test_gpu_contractions() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int rows = 23;
+  int cols = 23;
+  int num_elem = rows*cols;
+
+  float* d_float1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_float2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(num_elem * sizeof(Eigen::bfloat16));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float1(
+      d_float1, rows, cols);
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float2(
+      d_float2, rows, cols);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 2>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16, rows, cols);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 2>, Eigen::Aligned> gpu_res_float(
+      d_res_float, rows, cols);
+
+  gpu_float1.device(gpu_device) = gpu_float1.random() - gpu_float1.constant(0.5f);
+  gpu_float2.device(gpu_device) = gpu_float2.random() - gpu_float2.constant(0.5f);
+
+  typedef Tensor<float, 2>::DimensionPair DimPair;
+  Eigen::array<DimPair, 1> dims(DimPair(1, 0));
+  gpu_res_float.device(gpu_device) = gpu_float1.contract(gpu_float2, dims).cast<Eigen::bfloat16>();
+  gpu_res_bfloat16.device(gpu_device) = gpu_float1.cast<Eigen::bfloat16>().contract(gpu_float2.cast<Eigen::bfloat16>(), dims);
+
+  Tensor<Eigen::bfloat16, 2> bfloat16_prec(rows, cols);
+  Tensor<Eigen::bfloat16, 2> full_prec(rows, cols);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(Eigen::bfloat16));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < rows; ++i) {
+    for (int j = 0; j < cols; ++j) {
+      if (numext::abs(full_prec(i, j) - bfloat16_prec(i, j)) > Eigen::bfloat16(1e-2f)) {
+        VERIFY_IS_APPROX(full_prec(i, j), bfloat16_prec(i, j));
+      }
+    }
+  }
+
+  gpu_device.deallocate(d_float1);
+  gpu_device.deallocate(d_float2);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_gpu_reductions(int size1, int size2, int redux) {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = size1*size2;
+  int result_size = (redux == 1 ? size1 : size2);
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(result_size * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(result_size * sizeof(Eigen::bfloat16));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
+      d_float, size1, size2);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16, result_size);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, result_size);
+
+  gpu_float.device(gpu_device) = gpu_float.random() * 2.0f;
+
+  Eigen::array<int, 1> redux_dim = {redux};
+  gpu_res_float.device(gpu_device) = gpu_float.sum(redux_dim).cast<Eigen::bfloat16>();
+  gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().sum(redux_dim);
+
+  Tensor<Eigen::bfloat16, 1> bfloat16_prec(result_size);
+  Tensor<Eigen::bfloat16, 1> full_prec(result_size);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, result_size*sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, result_size*sizeof(Eigen::bfloat16));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < result_size; ++i) {
+    VERIFY_IS_APPROX(full_prec(i), bfloat16_prec(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_gpu_reductions() {
+  test_gpu_reductions<void>(13, 13, 0);
+  test_gpu_reductions<void>(13, 13, 1);
+
+  test_gpu_reductions<void>(35, 36, 0);
+  test_gpu_reductions<void>(35, 36, 1);
+
+  test_gpu_reductions<void>(36, 35, 0);
+  test_gpu_reductions<void>(36, 35, 1);
+}
+
+template<typename>
+void test_gpu_full_reductions() {
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int size = 13;
+  int num_elem = size*size;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  Eigen::bfloat16* d_res_bfloat16 = (Eigen::bfloat16*)gpu_device.allocate(1 * sizeof(Eigen::bfloat16));
+  Eigen::bfloat16* d_res_float = (Eigen::bfloat16*)gpu_device.allocate(1 * sizeof(Eigen::bfloat16));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 2>, Eigen::Aligned> gpu_float(
+      d_float, size, size);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 0>, Eigen::Aligned> gpu_res_bfloat16(
+      d_res_bfloat16);
+  Eigen::TensorMap<Eigen::Tensor<Eigen::bfloat16, 0>, Eigen::Aligned> gpu_res_float(
+      d_res_float);
+
+  gpu_float.device(gpu_device) = gpu_float.random();
+
+  gpu_res_float.device(gpu_device) = gpu_float.sum().cast<Eigen::bfloat16>();
+  gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().sum();
+
+  Tensor<Eigen::bfloat16, 0> bfloat16_prec;
+  Tensor<Eigen::bfloat16, 0> full_prec;
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::bfloat16));
+  gpu_device.synchronize();
+
+  VERIFY_IS_APPROX(full_prec(), bfloat16_prec());
+
+  gpu_res_float.device(gpu_device) = gpu_float.maximum().cast<Eigen::bfloat16>();
+  gpu_res_bfloat16.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().maximum();
+  gpu_device.memcpyDeviceToHost(bfloat16_prec.data(), d_res_bfloat16, sizeof(Eigen::bfloat16));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, sizeof(Eigen::bfloat16));
+  gpu_device.synchronize();
+
+  VERIFY_IS_APPROX(full_prec(), bfloat16_prec());
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_bfloat16);
+  gpu_device.deallocate(d_res_float);
+}
+
+template<typename>
+void test_gpu_forced_evals() {
+
+  Eigen::GpuStreamDevice stream;
+  Eigen::GpuDevice gpu_device(&stream);
+  int num_elem = 101;
+
+  float* d_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_bfloat16_1 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_bfloat16_2 = (float*)gpu_device.allocate(num_elem * sizeof(float));
+  float* d_res_float = (float*)gpu_device.allocate(num_elem * sizeof(float));
+
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_float(
+      d_float, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_bfloat16_1(
+      d_res_bfloat16_1, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Unaligned> gpu_res_bfloat16_2(
+      d_res_bfloat16_2, num_elem);
+  Eigen::TensorMap<Eigen::Tensor<float, 1>, Eigen::Aligned> gpu_res_float(
+      d_res_float, num_elem);
+
+  Eigen::array<int, 1> no_bcast;
+  no_bcast[0] = 1;
+
+  gpu_float.device(gpu_device) = gpu_float.random() - gpu_float.constant(0.5f);
+  gpu_float.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().cast<float>();
+  gpu_res_float.device(gpu_device) = gpu_float.abs();
+  gpu_res_bfloat16_1.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().eval().cast<float>();
+  gpu_res_bfloat16_2.device(gpu_device) = gpu_float.cast<Eigen::bfloat16>().abs().broadcast(no_bcast).eval().cast<float>();
+
+  Tensor<float, 1> bfloat16_prec1(num_elem);
+  Tensor<float, 1> bfloat16_prec2(num_elem);
+  Tensor<float, 1> full_prec(num_elem);
+  gpu_device.memcpyDeviceToHost(bfloat16_prec1.data(), d_res_bfloat16_1, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(bfloat16_prec2.data(), d_res_bfloat16_2, num_elem*sizeof(float));
+  gpu_device.memcpyDeviceToHost(full_prec.data(), d_res_float, num_elem*sizeof(float));
+  gpu_device.synchronize();
+
+  for (int i = 0; i < num_elem; ++i) {
+    VERIFY_IS_APPROX(full_prec(i), bfloat16_prec1(i));
+    VERIFY_IS_APPROX(full_prec(i), bfloat16_prec2(i));
+  }
+
+  gpu_device.deallocate(d_float);
+  gpu_device.deallocate(d_res_bfloat16_1);
+  gpu_device.deallocate(d_res_bfloat16_2);
+  gpu_device.deallocate(d_res_float);
+}
+
+#endif
+
+EIGEN_DECLARE_TEST(cxx11_tensor_of_bfloat16_gpu)
+{
+  CALL_SUBTEST_1(test_gpu_numext<void>());
+
+// The reduction unit tests have been excluded until a working
+// implementation to expand the accumulator data type to float32
+// is available.
+// TODO: add reduction unit tests
+#ifdef EIGEN_HAS_GPU_BF16
+  CALL_SUBTEST_2(test_gpu_conversion<void>());
+  CALL_SUBTEST_3(test_gpu_unary<void>());
+  CALL_SUBTEST_4(test_gpu_elementwise<void>());
+  CALL_SUBTEST_5(test_gpu_trancendental<void>());
+  CALL_SUBTEST_6(test_gpu_contractions<void>());
+  CALL_SUBTEST_7(test_gpu_reductions<void>());
+  CALL_SUBTEST_8(test_gpu_full_reductions<void>());
+  CALL_SUBTEST_9(test_gpu_forced_evals<void>());
+#else
+  std::cout << "bfloat16 floats are not supported by this version of gpu: skipping the test" << std::endl;
+#endif
+}
--- a/libs/eigen/unsupported/test/cxx11_tensor_of_complex.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_of_complex.cpp
@@ -47,6 +47,20 @@ static void test_abs()
  }
 }

+static void test_arg()
+{
+  Tensor<std::complex<float>, 1> data1(3);
+  Tensor<std::complex<double>, 1> data2(3);
+  data1.setRandom();
+  data2.setRandom();
+
+  Tensor<float, 1> arg1 = data1.arg();
+  Tensor<double, 1> arg2 = data2.arg();
+  for (int i = 0; i < 3; ++i) {
+    VERIFY_IS_APPROX(arg1(i), std::arg(data1(i)));
+    VERIFY_IS_APPROX(arg2(i), std::arg(data2(i)));
+  }
+}

 static void test_conjugate()
 {
@@ -98,6 +112,7 @@ EIGEN_DECLARE_TEST(cxx11_tensor_of_complex)
 {
  CALL_SUBTEST(test_additions());
  CALL_SUBTEST(test_abs());
+  CALL_SUBTEST(test_arg());
  CALL_SUBTEST(test_conjugate());
  CALL_SUBTEST(test_contractions());
 }
--- a/libs/eigen/unsupported/test/cxx11_tensor_random_sycl.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_random_sycl.cpp
@@ -37,14 +37,8 @@ static void test_sycl_random_uniform(const Eigen::SyclDevice& sycl_device)

  gpu_out.device(sycl_device)=gpu_out.random();
  sycl_device.memcpyDeviceToHost(out.data(), d_out,out_bytes);
-  for(IndexType i=1; i<sizeDim0; i++)
-    for(IndexType j=1; j<sizeDim1; j++)
-    {
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j));
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i,j-1));
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j-1));    }

-  // For now we just check thes code doesn't crash.
+  // For now we just check the code doesn't crash.
  // TODO: come up with a valid test of randomness
  sycl_device.deallocate(d_out);
 }
@@ -66,16 +60,8 @@ void test_sycl_random_normal(const Eigen::SyclDevice& sycl_device)
  Eigen::internal::NormalRandomGenerator<DataType> gen(true);
  gpu_out.device(sycl_device)=gpu_out.random(gen);
  sycl_device.memcpyDeviceToHost(out.data(), d_out,out_bytes);
-  for(IndexType i=1; i<sizeDim0; i++)
-    for(IndexType j=1; j<sizeDim1; j++)
-    {
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j));
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i,j-1));
-      VERIFY_IS_NOT_EQUAL(out(i,j), out(i-1,j-1));

-    }
-
-  // For now we just check thes code doesn't crash.
+  // For now we just check the code doesn't crash.
  // TODO: come up with a valid test of randomness
  sycl_device.deallocate(d_out);
 }
--- a/libs/eigen/unsupported/test/cxx11_tensor_reduction.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_reduction.cpp
@@ -370,13 +370,7 @@ static void test_static_dims() {
  Tensor<float, 2, DataLayout> out(72, 97);
  in.setRandom();

-#if !EIGEN_HAS_CONSTEXPR
-  array<int, 2> reduction_axis;
-  reduction_axis[0] = 1;
-  reduction_axis[1] = 3;
-#else
  Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<3> > reduction_axis;
-#endif

  out = in.maximum(reduction_axis);

@@ -400,14 +394,8 @@ static void test_innermost_last_dims() {
  in.setRandom();

 // Reduce on the innermost dimensions.
-#if !EIGEN_HAS_CONSTEXPR
-  array<int, 2> reduction_axis;
-  reduction_axis[0] = 0;
-  reduction_axis[1] = 1;
-#else
  // This triggers the use of packets for ColMajor.
  Eigen::IndexList<Eigen::type2index<0>, Eigen::type2index<1> > reduction_axis;
-#endif

  out = in.maximum(reduction_axis);

@@ -431,14 +419,8 @@ static void test_innermost_first_dims() {
  in.setRandom();

 // Reduce on the innermost dimensions.
-#if !EIGEN_HAS_CONSTEXPR
-  array<int, 2> reduction_axis;
-  reduction_axis[0] = 2;
-  reduction_axis[1] = 3;
-#else
  // This triggers the use of packets for RowMajor.
  Eigen::IndexList<Eigen::type2index<2>, Eigen::type2index<3>> reduction_axis;
-#endif

  out = in.maximum(reduction_axis);

@@ -462,14 +444,8 @@ static void test_reduce_middle_dims() {
  in.setRandom();

 // Reduce on the innermost dimensions.
-#if !EIGEN_HAS_CONSTEXPR
-  array<int, 2> reduction_axis;
-  reduction_axis[0] = 1;
-  reduction_axis[1] = 2;
-#else
  // This triggers the use of packets for RowMajor.
  Eigen::IndexList<Eigen::type2index<1>, Eigen::type2index<2>> reduction_axis;
-#endif

  out = in.maximum(reduction_axis);

@@ -486,22 +462,31 @@ static void test_reduce_middle_dims() {
  }
 }

-static void test_sum_accuracy() {
-  Tensor<float, 3> tensor(101, 101, 101);
-  for (float prescribed_mean : {1.0f, 10.0f, 100.0f, 1000.0f, 10000.0f}) {
-    tensor.setRandom();
-    tensor += tensor.constant(prescribed_mean);
+template <typename ScalarType, int num_elements, int max_mean>
+void test_sum_accuracy() {
+  Tensor<double, 1> double_tensor(num_elements);
+  Tensor<ScalarType, 1> tensor(num_elements);
+  for (double prescribed_mean = 0; prescribed_mean <= max_mean; prescribed_mean = numext::maxi(1.0, prescribed_mean*3.99)) {
+    // FIXME: NormalRandomGenerator doesn't work in bfloat and half.
+    double_tensor.setRandom<Eigen::internal::NormalRandomGenerator<double>>();
+    double_tensor += double_tensor.constant(prescribed_mean);
+    tensor = double_tensor.cast<ScalarType>();

-    Tensor<float, 0> sum = tensor.sum();
+    Tensor<ScalarType, 0> sum;
+    sum = tensor.sum();
+
+    // Compute the reference value in double precsion.
    double expected_sum = 0.0;
-    for (int i = 0; i < 101; ++i) {
-      for (int j = 0; j < 101; ++j) {
-        for (int k = 0; k < 101; ++k) {
-          expected_sum += static_cast<double>(tensor(i, j, k));
-        }
-      }
+    double abs_sum = 0.0;
+    for (int i = 0; i < num_elements; ++i) {
+      expected_sum += static_cast<double>(tensor(i));
+      abs_sum += static_cast<double>(numext::abs(tensor(i)));
    }
-    VERIFY_IS_APPROX(sum(), static_cast<float>(expected_sum));
+    // Test against probabilistic forward error bound. In reality, the error is much smaller
+    // when we use tree summation.
+    double err = Eigen::numext::abs(static_cast<double>(sum()) - expected_sum);
+    double tol = numext::sqrt(num_elements) * NumTraits<ScalarType>::epsilon() * static_cast<ScalarType>(abs_sum);
+    VERIFY_LE(err, tol);
  }
 }

@@ -528,5 +513,11 @@ EIGEN_DECLARE_TEST(cxx11_tensor_reduction) {
  CALL_SUBTEST(test_innermost_first_dims<RowMajor>());
  CALL_SUBTEST(test_reduce_middle_dims<ColMajor>());
  CALL_SUBTEST(test_reduce_middle_dims<RowMajor>());
-  CALL_SUBTEST(test_sum_accuracy());
+  CALL_SUBTEST((test_sum_accuracy<float,10*1024*1024,8*1024>()));
+  CALL_SUBTEST((test_sum_accuracy<Eigen::bfloat16,10*1024*1024,8*1024>()));
+  // The range of half is limited to 65519 when using round-to-even,
+  // so we are severely limited in the size and mean of the tensors
+  // we can reduce without overflow.
+  CALL_SUBTEST((test_sum_accuracy<Eigen::half,4*1024,16>()));
+  CALL_SUBTEST((test_sum_accuracy<Eigen::half,10*1024*1024,0>()));
 }
--- a/libs/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp
+++ b/libs/eigen/unsupported/test/cxx11_tensor_reduction_sycl.cpp
@@ -16,7 +16,6 @@

 #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int64_t
 #define EIGEN_USE_SYCL
-#define EIGEN_HAS_CONSTEXPR 1

 #include "main.h"

--- a/libs/eigen/unsupported/test/fft_test_shared.h
+++ b/libs/eigen/unsupported/test/fft_test_shared.h
@@ -0,0 +1,277 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2009 Mark Borgerding mark a borgerding net
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "main.h"
+#include <unsupported/Eigen/FFT>
+
+template <typename T>
+inline std::complex<T> RandomCpx() {
+  return std::complex<T>((T)(rand() / (T)RAND_MAX - .5), (T)(rand() / (T)RAND_MAX - .5));
+}
+
+using namespace std;
+using namespace Eigen;
+
+template <typename T>
+inline complex<long double> promote(complex<T> x) {
+  return complex<long double>((long double)x.real(), (long double)x.imag());
+}
+
+inline complex<long double> promote(float x) { return complex<long double>((long double)x); }
+inline complex<long double> promote(double x) { return complex<long double>((long double)x); }
+inline complex<long double> promote(long double x) { return complex<long double>((long double)x); }
+
+template <typename VT1, typename VT2>
+long double fft_rmse(const VT1& fftbuf, const VT2& timebuf) {
+  long double totalpower = 0;
+  long double difpower = 0;
+  long double pi = acos((long double)-1);
+  for (size_t k0 = 0; k0 < (size_t)fftbuf.size(); ++k0) {
+    complex<long double> acc = 0;
+    long double phinc = (long double)(-2.) * k0 * pi / timebuf.size();
+    for (size_t k1 = 0; k1 < (size_t)timebuf.size(); ++k1) {
+      acc += promote(timebuf[k1]) * exp(complex<long double>(0, k1 * phinc));
+    }
+    totalpower += numext::abs2(acc);
+    complex<long double> x = promote(fftbuf[k0]);
+    complex<long double> dif = acc - x;
+    difpower += numext::abs2(dif);
+    // cerr << k0 << "\t" << acc << "\t" <<  x << "\t" << sqrt(numext::abs2(dif)) << endl;
+  }
+  // cerr << "rmse:" << sqrt(difpower/totalpower) << endl;
+  return sqrt(difpower / totalpower);
+}
+
+template <typename VT1, typename VT2>
+long double dif_rmse(const VT1 buf1, const VT2 buf2) {
+  long double totalpower = 0;
+  long double difpower = 0;
+  size_t n = (min)(buf1.size(), buf2.size());
+  for (size_t k = 0; k < n; ++k) {
+    totalpower += (long double)((numext::abs2(buf1[k]) + numext::abs2(buf2[k])) / 2);
+    difpower += (long double)(numext::abs2(buf1[k] - buf2[k]));
+  }
+  return sqrt(difpower / totalpower);
+}
+
+enum { StdVectorContainer, EigenVectorContainer };
+
+template <int Container, typename Scalar>
+struct VectorType;
+
+template <typename Scalar>
+struct VectorType<StdVectorContainer, Scalar> {
+  typedef vector<Scalar> type;
+};
+
+template <typename Scalar>
+struct VectorType<EigenVectorContainer, Scalar> {
+  typedef Matrix<Scalar, Dynamic, 1> type;
+};
+
+template <int Container, typename T>
+void test_scalar_generic(int nfft) {
+  typedef typename FFT<T>::Complex Complex;
+  typedef typename FFT<T>::Scalar Scalar;
+  typedef typename VectorType<Container, Scalar>::type ScalarVector;
+  typedef typename VectorType<Container, Complex>::type ComplexVector;
+
+  FFT<T> fft;
+  ScalarVector tbuf(nfft);
+  ComplexVector freqBuf;
+  for (int k = 0; k < nfft; ++k) tbuf[k] = (T)(rand() / (double)RAND_MAX - .5);
+
+  // make sure it DOESN'T give the right full spectrum answer
+  // if we've asked for half-spectrum
+  fft.SetFlag(fft.HalfSpectrum);
+  fft.fwd(freqBuf, tbuf);
+  VERIFY((size_t)freqBuf.size() == (size_t)((nfft >> 1) + 1));
+  VERIFY(T(fft_rmse(freqBuf, tbuf)) < test_precision<T>());  // gross check
+
+  fft.ClearFlag(fft.HalfSpectrum);
+  fft.fwd(freqBuf, tbuf);
+  VERIFY((size_t)freqBuf.size() == (size_t)nfft);
+  VERIFY(T(fft_rmse(freqBuf, tbuf)) < test_precision<T>());  // gross check
+
+  if (nfft & 1) return;  // odd FFTs get the wrong size inverse FFT
+
+  ScalarVector tbuf2;
+  fft.inv(tbuf2, freqBuf);
+  VERIFY(T(dif_rmse(tbuf, tbuf2)) < test_precision<T>());  // gross check
+
+  // verify that the Unscaled flag takes effect
+  ScalarVector tbuf3;
+  fft.SetFlag(fft.Unscaled);
+
+  fft.inv(tbuf3, freqBuf);
+
+  for (int k = 0; k < nfft; ++k) tbuf3[k] *= T(1. / nfft);
+
+  // for (size_t i=0;i<(size_t) tbuf.size();++i)
+  //     cout << "freqBuf=" << freqBuf[i] << " in2=" << tbuf3[i] << " -  in=" << tbuf[i] << " => " << (tbuf3[i] -
+  //     tbuf[i] ) <<  endl;
+
+  VERIFY(T(dif_rmse(tbuf, tbuf3)) < test_precision<T>());  // gross check
+
+  // verify that ClearFlag works
+  fft.ClearFlag(fft.Unscaled);
+  fft.inv(tbuf2, freqBuf);
+  VERIFY(T(dif_rmse(tbuf, tbuf2)) < test_precision<T>());  // gross check
+}
+
+template <typename T>
+void test_scalar(int nfft) {
+  test_scalar_generic<StdVectorContainer, T>(nfft);
+  // test_scalar_generic<EigenVectorContainer,T>(nfft);
+}
+
+template <int Container, typename T>
+void test_complex_generic(int nfft) {
+  typedef typename FFT<T>::Complex Complex;
+  typedef typename VectorType<Container, Complex>::type ComplexVector;
+
+  FFT<T> fft;
+
+  ComplexVector inbuf(nfft);
+  ComplexVector outbuf;
+  ComplexVector buf3;
+  for (int k = 0; k < nfft; ++k)
+    inbuf[k] = Complex((T)(rand() / (double)RAND_MAX - .5), (T)(rand() / (double)RAND_MAX - .5));
+  fft.fwd(outbuf, inbuf);
+
+  VERIFY(T(fft_rmse(outbuf, inbuf)) < test_precision<T>());  // gross check
+  fft.inv(buf3, outbuf);
+
+  VERIFY(T(dif_rmse(inbuf, buf3)) < test_precision<T>());  // gross check
+
+  // verify that the Unscaled flag takes effect
+  ComplexVector buf4;
+  fft.SetFlag(fft.Unscaled);
+  fft.inv(buf4, outbuf);
+  for (int k = 0; k < nfft; ++k) buf4[k] *= T(1. / nfft);
+  VERIFY(T(dif_rmse(inbuf, buf4)) < test_precision<T>());  // gross check
+
+  // verify that ClearFlag works
+  fft.ClearFlag(fft.Unscaled);
+  fft.inv(buf3, outbuf);
+  VERIFY(T(dif_rmse(inbuf, buf3)) < test_precision<T>());  // gross check
+}
+
+template <typename T>
+void test_complex(int nfft) {
+  test_complex_generic<StdVectorContainer, T>(nfft);
+  test_complex_generic<EigenVectorContainer, T>(nfft);
+}
+
+template <typename T, int nrows, int ncols>
+void test_complex2d() {
+  typedef typename Eigen::FFT<T>::Complex Complex;
+  FFT<T> fft;
+  Eigen::Matrix<Complex, nrows, ncols> src, src2, dst, dst2;
+
+  src = Eigen::Matrix<Complex, nrows, ncols>::Random();
+  // src =  Eigen::Matrix<Complex,nrows,ncols>::Identity();
+
+  for (int k = 0; k < ncols; k++) {
+    Eigen::Matrix<Complex, nrows, 1> tmpOut;
+    fft.fwd(tmpOut, src.col(k));
+    dst2.col(k) = tmpOut;
+  }
+
+  for (int k = 0; k < nrows; k++) {
+    Eigen::Matrix<Complex, 1, ncols> tmpOut;
+    fft.fwd(tmpOut, dst2.row(k));
+    dst2.row(k) = tmpOut;
+  }
+
+  fft.fwd2(dst.data(), src.data(), ncols, nrows);
+  fft.inv2(src2.data(), dst.data(), ncols, nrows);
+  VERIFY((src - src2).norm() < test_precision<T>());
+  VERIFY((dst - dst2).norm() < test_precision<T>());
+}
+
+inline void test_return_by_value(int len) {
+  VectorXf in;
+  VectorXf in1;
+  in.setRandom(len);
+  VectorXcf out1, out2;
+  FFT<float> fft;
+
+  fft.SetFlag(fft.HalfSpectrum);
+
+  fft.fwd(out1, in);
+  out2 = fft.fwd(in);
+  VERIFY((out1 - out2).norm() < test_precision<float>());
+  in1 = fft.inv(out1);
+  VERIFY((in1 - in).norm() < test_precision<float>());
+}
+
+EIGEN_DECLARE_TEST(FFTW) {
+  CALL_SUBTEST(test_return_by_value(32));
+  CALL_SUBTEST(test_complex<float>(32));
+  CALL_SUBTEST(test_complex<double>(32));
+  CALL_SUBTEST(test_complex<float>(256));
+  CALL_SUBTEST(test_complex<double>(256));
+  CALL_SUBTEST(test_complex<float>(3 * 8));
+  CALL_SUBTEST(test_complex<double>(3 * 8));
+  CALL_SUBTEST(test_complex<float>(5 * 32));
+  CALL_SUBTEST(test_complex<double>(5 * 32));
+  CALL_SUBTEST(test_complex<float>(2 * 3 * 4));
+  CALL_SUBTEST(test_complex<double>(2 * 3 * 4));
+  CALL_SUBTEST(test_complex<float>(2 * 3 * 4 * 5));
+  CALL_SUBTEST(test_complex<double>(2 * 3 * 4 * 5));
+  CALL_SUBTEST(test_complex<float>(2 * 3 * 4 * 5 * 7));
+  CALL_SUBTEST(test_complex<double>(2 * 3 * 4 * 5 * 7));
+
+  CALL_SUBTEST(test_scalar<float>(32));
+  CALL_SUBTEST(test_scalar<double>(32));
+  CALL_SUBTEST(test_scalar<float>(45));
+  CALL_SUBTEST(test_scalar<double>(45));
+  CALL_SUBTEST(test_scalar<float>(50));
+  CALL_SUBTEST(test_scalar<double>(50));
+  CALL_SUBTEST(test_scalar<float>(256));
+  CALL_SUBTEST(test_scalar<double>(256));
+  CALL_SUBTEST(test_scalar<float>(2 * 3 * 4 * 5 * 7));
+  CALL_SUBTEST(test_scalar<double>(2 * 3 * 4 * 5 * 7));
+
+#if defined EIGEN_HAS_FFTWL || defined EIGEN_POCKETFFT_DEFAULT
+  CALL_SUBTEST(test_complex<long double>(32));
+  CALL_SUBTEST(test_complex<long double>(256));
+  CALL_SUBTEST(test_complex<long double>(3 * 8));
+  CALL_SUBTEST(test_complex<long double>(5 * 32));
+  CALL_SUBTEST(test_complex<long double>(2 * 3 * 4));
+  CALL_SUBTEST(test_complex<long double>(2 * 3 * 4 * 5));
+  CALL_SUBTEST(test_complex<long double>(2 * 3 * 4 * 5 * 7));
+
+  CALL_SUBTEST(test_scalar<long double>(32));
+  CALL_SUBTEST(test_scalar<long double>(45));
+  CALL_SUBTEST(test_scalar<long double>(50));
+  CALL_SUBTEST(test_scalar<long double>(256));
+  CALL_SUBTEST(test_scalar<long double>(2 * 3 * 4 * 5 * 7));
+
+  CALL_SUBTEST((test_complex2d<long double, 2 * 3 * 4, 2 * 3 * 4>()));
+  CALL_SUBTEST((test_complex2d<long double, 3 * 4 * 5, 3 * 4 * 5>()));
+  CALL_SUBTEST((test_complex2d<long double, 24, 60>()));
+  CALL_SUBTEST((test_complex2d<long double, 60, 24>()));
+// fail to build since Eigen limit the stack allocation size,too big here.
+// CALL_SUBTEST( ( test_complex2d<long double, 256, 256> () ) );
+#endif
+#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
+  CALL_SUBTEST((test_complex2d<float, 24, 24>()));
+  CALL_SUBTEST((test_complex2d<float, 60, 60>()));
+  CALL_SUBTEST((test_complex2d<float, 24, 60>()));
+  CALL_SUBTEST((test_complex2d<float, 60, 24>()));
+#endif
+#if defined EIGEN_FFTW_DEFAULT || defined EIGEN_POCKETFFT_DEFAULT || defined EIGEN_MKL_DEFAULT
+  CALL_SUBTEST((test_complex2d<double, 24, 24>()));
+  CALL_SUBTEST((test_complex2d<double, 60, 60>()));
+  CALL_SUBTEST((test_complex2d<double, 24, 60>()));
+  CALL_SUBTEST((test_complex2d<double, 60, 24>()));
+#endif
+}
--- a/libs/eigen/unsupported/test/forward_adolc.cpp
+++ b/libs/eigen/unsupported/test/forward_adolc.cpp
@@ -20,10 +20,10 @@ EIGEN_DONT_INLINE typename Vector::Scalar foo(const Vector& p)
  return (p-Vector(Scalar(-1),Scalar(1.))).norm() + (p.array().sqrt().abs() * p.array().sin()).sum() + p.dot(p);
 }

-template<typename _Scalar, int NX=Dynamic, int NY=Dynamic>
+template<typename Scalar_, int NX=Dynamic, int NY=Dynamic>
 struct TestFunc1
 {
-  typedef _Scalar Scalar;
+  typedef Scalar_ Scalar;
  enum {
    InputsAtCompileTime = NX,
    ValuesAtCompileTime = NY
--- a/libs/eigen/unsupported/test/idrstabl.cpp
+++ b/libs/eigen/unsupported/test/idrstabl.cpp
@@ -0,0 +1,28 @@
+// This file is part of Eigen, a lightweight C++ template library
+// for linear algebra.
+//
+// Copyright (C) 2011 Gael Guennebaud <g.gael@free.fr>
+//
+// This Source Code Form is subject to the terms of the Mozilla
+// Public License v. 2.0. If a copy of the MPL was not distributed
+// with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
+
+#include "../../test/sparse_solver.h"
+#include <unsupported/Eigen/IterativeSolvers>
+
+template <typename T>
+void test_idrstabl_T() {
+  IDRSTABL<SparseMatrix<T>, DiagonalPreconditioner<T> > idrstabl_colmajor_diag;
+  IDRSTABL<SparseMatrix<T>, IncompleteLUT<T> > idrstabl_colmajor_ilut;
+
+  idrstabl_colmajor_diag.setTolerance(NumTraits<T>::epsilon() * 4);
+  idrstabl_colmajor_ilut.setTolerance(NumTraits<T>::epsilon() * 4);
+
+  CALL_SUBTEST(check_sparse_square_solving(idrstabl_colmajor_diag));
+  CALL_SUBTEST(check_sparse_square_solving(idrstabl_colmajor_ilut));
+}
+
+EIGEN_DECLARE_TEST(idrstabl) {
+  CALL_SUBTEST_1((test_idrstabl_T<double>()));
+  CALL_SUBTEST_2((test_idrstabl_T<std::complex<double> >()));
+}
--- a/libs/eigen/unsupported/test/kronecker_product.cpp
+++ b/libs/eigen/unsupported/test/kronecker_product.cpp
@@ -29,7 +29,7 @@ void check_kronecker_product(const MatrixType& ab)
 {
  VERIFY_IS_EQUAL(ab.rows(), 6);
  VERIFY_IS_EQUAL(ab.cols(), 6);
-  VERIFY_IS_EQUAL(ab.nonZeros(),  36);
+  VERIFY_IS_EQUAL(ab.size(),  36);
  VERIFY_IS_APPROX(ab.coeff(0,0), -0.4017367630386106);
  VERIFY_IS_APPROX(ab.coeff(0,1),  0.1056863433932735);
  VERIFY_IS_APPROX(ab.coeff(0,2), -0.7255206194554212);
--- a/libs/eigen/unsupported/test/levenberg_marquardt.cpp
+++ b/libs/eigen/unsupported/test/levenberg_marquardt.cpp
@@ -24,7 +24,7 @@
 using std::sqrt;

 // tolerance for chekcing number of iterations
-#define LM_EVAL_COUNT_TOL 4/3
+#define LM_EVAL_COUNT_TOL 2

 struct lmder_functor : DenseFunctor<double>
 {
@@ -75,11 +75,11 @@ void testLmder1()
  lmder_functor functor;
  LevenbergMarquardt<lmder_functor> lm(functor);
  info = lm.lmder1(x);
-
+  EIGEN_UNUSED_VARIABLE(info)
  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 6);
-  VERIFY_IS_EQUAL(lm.njev(), 5);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 6);
+  // VERIFY_IS_EQUAL(lm.njev(), 5);

  // check norm
  VERIFY_IS_APPROX(lm.fvec().blueNorm(), 0.09063596);
@@ -104,11 +104,12 @@ void testLmder()
  lmder_functor functor;
  LevenbergMarquardt<lmder_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return values
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 6);
-  VERIFY_IS_EQUAL(lm.njev(), 5);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 6);
+  // VERIFY_IS_EQUAL(lm.njev(), 5);

  // check norm
  fnorm = lm.fvec().blueNorm();
@@ -177,9 +178,10 @@ void testLmdif1()
  lmdif_functor functor;
  DenseIndex nfev;
  info = LevenbergMarquardt<lmdif_functor>::lmdif1(functor, x, &nfev);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
 //   VERIFY_IS_EQUAL(nfev, 26);

  // check norm
@@ -208,9 +210,10 @@ void testLmdif()
  NumericalDiff<lmdif_functor> numDiff(functor);
  LevenbergMarquardt<NumericalDiff<lmdif_functor> > lm(numDiff);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return values
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
 //   VERIFY_IS_EQUAL(lm.nfev(), 26);

  // check norm
@@ -293,11 +296,12 @@ void testNistChwirut2(void)
  chwirut2_functor functor;
  LevenbergMarquardt<chwirut2_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
 //   VERIFY_IS_EQUAL(lm.nfev(), 10);
-  VERIFY_IS_EQUAL(lm.njev(), 8);
+  // VERIFY_IS_EQUAL(lm.njev(), 8);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.1304802941E+02);
  // check x
@@ -314,11 +318,12 @@ void testNistChwirut2(void)
  lm.setFtol(1.E6*NumTraits<double>::epsilon());
  lm.setXtol(1.E6*NumTraits<double>::epsilon());
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
 //   VERIFY_IS_EQUAL(lm.nfev(), 7);
-  VERIFY_IS_EQUAL(lm.njev(), 6);
+  // VERIFY_IS_EQUAL(lm.njev(), 6);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.1304802941E+02);
  // check x
@@ -373,11 +378,12 @@ void testNistMisra1a(void)
  misra1a_functor functor;
  LevenbergMarquardt<misra1a_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 19);
-  VERIFY_IS_EQUAL(lm.njev(), 15);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 19);
+  // VERIFY_IS_EQUAL(lm.njev(), 15);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.2455138894E-01);
  // check x
@@ -390,11 +396,12 @@ void testNistMisra1a(void)
  x<< 250., 0.0005;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 5);
-  VERIFY_IS_EQUAL(lm.njev(), 4);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 5);
+  // VERIFY_IS_EQUAL(lm.njev(), 4);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.2455138894E-01);
  // check x
@@ -464,11 +471,12 @@ void testNistHahn1(void)
  hahn1_functor functor;
  LevenbergMarquardt<hahn1_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 11);
-  VERIFY_IS_EQUAL(lm.njev(), 10);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 11);
+  // VERIFY_IS_EQUAL(lm.njev(), 10);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.5324382854E+00);
  // check x
@@ -486,11 +494,12 @@ void testNistHahn1(void)
  x<< .1, -.1, .005, -.000001, -.005, .0001, -.0000001;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(info, 1);
 //   VERIFY_IS_EQUAL(lm.nfev(), 11);
-  VERIFY_IS_EQUAL(lm.njev(), 10);
+  // VERIFY_IS_EQUAL(lm.njev(), 10);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.5324382854E+00);
  // check x
@@ -550,11 +559,12 @@ void testNistMisra1d(void)
  misra1d_functor functor;
  LevenbergMarquardt<misra1d_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 9);
-  VERIFY_IS_EQUAL(lm.njev(), 7);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 9);
+  // VERIFY_IS_EQUAL(lm.njev(), 7);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6419295283E-02);
  // check x
@@ -567,11 +577,12 @@ void testNistMisra1d(void)
  x<< 450., 0.0003;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 4);
-  VERIFY_IS_EQUAL(lm.njev(), 3);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 4);
+  // VERIFY_IS_EQUAL(lm.njev(), 3);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6419295283E-02);
  // check x
@@ -628,11 +639,12 @@ void testNistLanczos1(void)
  lanczos1_functor functor;
  LevenbergMarquardt<lanczos1_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
-  VERIFY_IS_EQUAL(lm.nfev(), 79);
-  VERIFY_IS_EQUAL(lm.njev(), 72);
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
+  // VERIFY_IS_EQUAL(lm.nfev(), 79);
+  // VERIFY_IS_EQUAL(lm.njev(), 72);
  // check norm^2
  VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
  // check x
@@ -649,11 +661,12 @@ void testNistLanczos1(void)
  x<< 0.5, 0.7, 3.6, 4.2, 4., 6.3;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
-  VERIFY_IS_EQUAL(lm.nfev(), 9);
-  VERIFY_IS_EQUAL(lm.njev(), 8);
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeErrorTooSmall);
+  // VERIFY_IS_EQUAL(lm.nfev(), 9);
+  // VERIFY_IS_EQUAL(lm.njev(), 8);
  // check norm^2
  VERIFY(lm.fvec().squaredNorm() <= 1.4307867721E-25);
  // check x
@@ -714,11 +727,12 @@ void testNistRat42(void)
  rat42_functor functor;
  LevenbergMarquardt<rat42_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
-  VERIFY_IS_EQUAL(lm.nfev(), 10);
-  VERIFY_IS_EQUAL(lm.njev(), 8);
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+  // VERIFY_IS_EQUAL(lm.nfev(), 10);
+  // VERIFY_IS_EQUAL(lm.njev(), 8);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.0565229338E+00);
  // check x
@@ -732,11 +746,12 @@ void testNistRat42(void)
  x<< 75., 2.5, 0.07;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
-  VERIFY_IS_EQUAL(lm.nfev(), 6);
-  VERIFY_IS_EQUAL(lm.njev(), 5);
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+  // VERIFY_IS_EQUAL(lm.nfev(), 6);
+  // VERIFY_IS_EQUAL(lm.njev(), 5);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.0565229338E+00);
  // check x
@@ -787,14 +802,15 @@ void testNistMGH10(void)
  /*
   * First try
   */
-  x<< 2., 400000., 25000.;
+  x << 2., 400000., 25000.;
  // do the computation
  MGH10_functor functor;
  LevenbergMarquardt<MGH10_functor> lm(functor);
  info = lm.minimize(x);
-  ++g_test_level;
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
-  --g_test_level;
+  EIGEN_UNUSED_VARIABLE(info)
+  // ++g_test_level;
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+  // --g_test_level;
  // was: VERIFY_IS_EQUAL(info, 1);

  // check norm^2
@@ -805,11 +821,11 @@ void testNistMGH10(void)
  VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
  
  // check return value
-
-  ++g_test_level;
-  VERIFY_IS_EQUAL(lm.nfev(), 284 );
-  VERIFY_IS_EQUAL(lm.njev(), 249 );
-  --g_test_level;
+  
+  // ++g_test_level;
+  // VERIFY_IS_EQUAL(lm.nfev(), 284 );
+  // VERIFY_IS_EQUAL(lm.njev(), 249 );
+  // --g_test_level;
  VERIFY(lm.nfev() < 284 * LM_EVAL_COUNT_TOL);
  VERIFY(lm.njev() < 249 * LM_EVAL_COUNT_TOL);

@@ -819,11 +835,12 @@ void testNistMGH10(void)
  x<< 0.02, 4000., 250.;
  // do the computation
  info = lm.minimize(x);
-  ++g_test_level;
-  VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
-  // was: VERIFY_IS_EQUAL(info, 1);
-  --g_test_level;
-
+  EIGEN_UNUSED_VARIABLE(info)
+  // ++g_test_level;
+  // VERIFY_IS_EQUAL(info, LevenbergMarquardtSpace::RelativeReductionTooSmall);
+  // // was: VERIFY_IS_EQUAL(info, 1);
+  // --g_test_level;
+  
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7945855171E+01);
  // check x
@@ -832,10 +849,10 @@ void testNistMGH10(void)
  VERIFY_IS_APPROX(x[2], 3.4522363462E+02);
  
  // check return value
-  ++g_test_level;
-  VERIFY_IS_EQUAL(lm.nfev(), 126);
-  VERIFY_IS_EQUAL(lm.njev(), 116);
-  --g_test_level;
+  // ++g_test_level;
+  // VERIFY_IS_EQUAL(lm.nfev(), 126);
+  // VERIFY_IS_EQUAL(lm.njev(), 116);
+  // --g_test_level;
  VERIFY(lm.nfev() < 126 * LM_EVAL_COUNT_TOL);
  VERIFY(lm.njev() < 116 * LM_EVAL_COUNT_TOL);
 }
@@ -888,6 +905,7 @@ void testNistBoxBOD(void)
  lm.setXtol(1.E6*NumTraits<double>::epsilon());
  lm.setFactor(10);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.1680088766E+03);
@@ -896,9 +914,9 @@ void testNistBoxBOD(void)
  VERIFY_IS_APPROX(x[1], 5.4723748542E-01);
  
  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY(lm.nfev() < 31); // 31
-  VERIFY(lm.njev() < 25); // 25
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY(lm.nfev() < 31); // 31
+  // VERIFY(lm.njev() < 25); // 25

  /*
   * Second try
@@ -909,13 +927,14 @@ void testNistBoxBOD(void)
  lm.setFtol(NumTraits<double>::epsilon());
  lm.setXtol( NumTraits<double>::epsilon());
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1); 
-  ++g_test_level;
-  VERIFY_IS_EQUAL(lm.nfev(), 16 );
-  VERIFY_IS_EQUAL(lm.njev(), 15 );
-  --g_test_level;
+  // VERIFY_IS_EQUAL(info, 1);
+  // ++g_test_level;
+  // VERIFY_IS_EQUAL(lm.nfev(), 16 );
+  // VERIFY_IS_EQUAL(lm.njev(), 15 );
+  // --g_test_level;
  VERIFY(lm.nfev() < 16 * LM_EVAL_COUNT_TOL);
  VERIFY(lm.njev() < 15 * LM_EVAL_COUNT_TOL);
  // check norm^2
@@ -975,6 +994,7 @@ void testNistMGH17(void)
  lm.setXtol(NumTraits<double>::epsilon());
  lm.setMaxfev(1000);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.4648946975E-05);
@@ -987,8 +1007,8 @@ void testNistMGH17(void)
  
    // check return value
 //   VERIFY_IS_EQUAL(info, 2);  //FIXME Use (lm.info() == Success)
-  VERIFY(lm.nfev() < 700 ); // 602
-  VERIFY(lm.njev() < 600 ); // 545
+  // VERIFY(lm.nfev() < 700 ); // 602
+  // VERIFY(lm.njev() < 600 ); // 545

  /*
   * Second try
@@ -997,11 +1017,12 @@ void testNistMGH17(void)
  // do the computation
  lm.resetParameters();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 18);
-  VERIFY_IS_EQUAL(lm.njev(), 15);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 18);
+  // VERIFY_IS_EQUAL(lm.njev(), 15);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.4648946975E-05);
  // check x
@@ -1063,6 +1084,7 @@ void testNistMGH09(void)
  LevenbergMarquardt<MGH09_functor> lm(functor);
  lm.setMaxfev(1000);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 3.0750560385E-04);
@@ -1072,9 +1094,9 @@ void testNistMGH09(void)
  VERIFY_IS_APPROX(x[2], 0.12305309914); // should be 1.2305650693E-01
  VERIFY_IS_APPROX(x[3], 0.13605395375); // should be 1.3606233068E-01
  // check return value
-  VERIFY_IS_EQUAL(info, 1); 
-  VERIFY(lm.nfev() < 510 ); // 490
-  VERIFY(lm.njev() < 400 ); // 376
+  // VERIFY_IS_EQUAL(info, 1); 
+  // VERIFY(lm.nfev() < 510 ); // 490
+  // VERIFY(lm.njev() < 400 ); // 376

  /*
   * Second try
@@ -1083,11 +1105,12 @@ void testNistMGH09(void)
  // do the computation
  lm.resetParameters();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 18);
-  VERIFY_IS_EQUAL(lm.njev(), 16);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 18);
+  // VERIFY_IS_EQUAL(lm.njev(), 16);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 3.0750560385E-04);
  // check x
@@ -1149,11 +1172,12 @@ void testNistBennett5(void)
  LevenbergMarquardt<Bennett5_functor> lm(functor);
  lm.setMaxfev(1000);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 758);
-  VERIFY_IS_EQUAL(lm.njev(), 744);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 758);
+  // VERIFY_IS_EQUAL(lm.njev(), 744);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.2404744073E-04);
  // check x
@@ -1167,11 +1191,12 @@ void testNistBennett5(void)
  // do the computation
  lm.resetParameters();
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 203);
-  VERIFY_IS_EQUAL(lm.njev(), 192);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 203);
+  // VERIFY_IS_EQUAL(lm.njev(), 192);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.2404744073E-04);
  // check x
@@ -1237,11 +1262,12 @@ void testNistThurber(void)
  lm.setFtol(1.E4*NumTraits<double>::epsilon());
  lm.setXtol(1.E4*NumTraits<double>::epsilon());
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 39);
-  VERIFY_IS_EQUAL(lm.njev(), 36);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 39);
+  // VERIFY_IS_EQUAL(lm.njev(), 36);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6427082397E+03);
  // check x
@@ -1262,11 +1288,12 @@ void testNistThurber(void)
  lm.setFtol(1.E4*NumTraits<double>::epsilon());
  lm.setXtol(1.E4*NumTraits<double>::epsilon());
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 29);
-  VERIFY_IS_EQUAL(lm.njev(), 28);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 29);
+  // VERIFY_IS_EQUAL(lm.njev(), 28);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 5.6427082397E+03);
  // check x
@@ -1329,11 +1356,12 @@ void testNistRat43(void)
  lm.setFtol(1.E6*NumTraits<double>::epsilon());
  lm.setXtol(1.E6*NumTraits<double>::epsilon());
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 27);
-  VERIFY_IS_EQUAL(lm.njev(), 20);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 27);
+  // VERIFY_IS_EQUAL(lm.njev(), 20);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7864049080E+03);
  // check x
@@ -1351,11 +1379,12 @@ void testNistRat43(void)
  lm.setFtol(1.E5*NumTraits<double>::epsilon());
  lm.setXtol(1.E5*NumTraits<double>::epsilon());
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 9);
-  VERIFY_IS_EQUAL(lm.njev(), 8);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 9);
+  // VERIFY_IS_EQUAL(lm.njev(), 8);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 8.7864049080E+03);
  // check x
@@ -1414,11 +1443,12 @@ void testNistEckerle4(void)
  eckerle4_functor functor;
  LevenbergMarquardt<eckerle4_functor> lm(functor);
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 18);
-  VERIFY_IS_EQUAL(lm.njev(), 15);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 18);
+  // VERIFY_IS_EQUAL(lm.njev(), 15);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.4635887487E-03);
  // check x
@@ -1432,11 +1462,12 @@ void testNistEckerle4(void)
  x<< 1.5, 5., 450.;
  // do the computation
  info = lm.minimize(x);
+  EIGEN_UNUSED_VARIABLE(info)

  // check return value
-  VERIFY_IS_EQUAL(info, 1);
-  VERIFY_IS_EQUAL(lm.nfev(), 7);
-  VERIFY_IS_EQUAL(lm.njev(), 6);
+  // VERIFY_IS_EQUAL(info, 1);
+  // VERIFY_IS_EQUAL(lm.nfev(), 7);
+  // VERIFY_IS_EQUAL(lm.njev(), 6);
  // check norm^2
  VERIFY_IS_APPROX(lm.fvec().squaredNorm(), 1.4635887487E-03);
  // check x
--- a/libs/eigen/unsupported/test/matrix_power.cpp
+++ b/libs/eigen/unsupported/test/matrix_power.cpp
@@ -104,8 +104,8 @@ void testSingular(const MatrixType& m_const, const typename MatrixType::RealScal
  MatrixType& m = const_cast<MatrixType&>(m_const);

  const int IsComplex = NumTraits<typename internal::traits<MatrixType>::Scalar>::IsComplex;
-  typedef typename internal::conditional<IsComplex, TriangularView<MatrixType,Upper>, const MatrixType&>::type TriangularType;
-  typename internal::conditional< IsComplex, ComplexSchur<MatrixType>, RealSchur<MatrixType> >::type schur;
+  typedef std::conditional_t<IsComplex, TriangularView<MatrixType,Upper>, const MatrixType&> TriangularType;
+  std::conditional_t< IsComplex, ComplexSchur<MatrixType>, RealSchur<MatrixType> > schur;
  MatrixType T;

  for (int i=0; i < g_repeat; ++i) {
@@ -171,7 +171,7 @@ EIGEN_DECLARE_TEST(matrix_power)
  CALL_SUBTEST_5(testGeneral(Matrix3cf(),        1e-4f));
  CALL_SUBTEST_8(testGeneral(Matrix4f(),         1e-4f));
  CALL_SUBTEST_6(testGeneral(MatrixXf(2,2),      1e-3f)); // see bug 614
-  CALL_SUBTEST_9(testGeneral(MatrixXe(7,7),      1e-13L));
+  CALL_SUBTEST_9(testGeneral(MatrixXe(7,7),      1e-12L));
  CALL_SUBTEST_10(testGeneral(Matrix3d(),        1e-13));
  CALL_SUBTEST_11(testGeneral(Matrix3f(),        1e-4f));
  CALL_SUBTEST_12(testGeneral(Matrix3e(),        1e-13L));
@@ -184,7 +184,7 @@ EIGEN_DECLARE_TEST(matrix_power)
  CALL_SUBTEST_5(testSingular(Matrix3cf(),        1e-4f));
  CALL_SUBTEST_8(testSingular(Matrix4f(),         1e-4f));
  CALL_SUBTEST_6(testSingular(MatrixXf(2,2),      1e-3f));
-  CALL_SUBTEST_9(testSingular(MatrixXe(7,7),      1e-13L));
+  CALL_SUBTEST_9(testSingular(MatrixXe(7,7),      1e-12L));
  CALL_SUBTEST_10(testSingular(Matrix3d(),        1e-13));
  CALL_SUBTEST_11(testSingular(Matrix3f(),        1e-4f));
  CALL_SUBTEST_12(testSingular(Matrix3e(),        1e-13L));
@@ -197,7 +197,7 @@ EIGEN_DECLARE_TEST(matrix_power)
  CALL_SUBTEST_5(testLogThenExp(Matrix3cf(),        1e-4f));
  CALL_SUBTEST_8(testLogThenExp(Matrix4f(),         1e-4f));
  CALL_SUBTEST_6(testLogThenExp(MatrixXf(2,2),      1e-3f));
-  CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7),      1e-13L));
+  CALL_SUBTEST_9(testLogThenExp(MatrixXe(7,7),      1e-12L));
  CALL_SUBTEST_10(testLogThenExp(Matrix3d(),        1e-13));
  CALL_SUBTEST_11(testLogThenExp(Matrix3f(),        1e-4f));
  CALL_SUBTEST_12(testLogThenExp(Matrix3e(),        1e-13L));
--- a/libs/eigen/unsupported/test/mklfft.cpp
+++ b/libs/eigen/unsupported/test/mklfft.cpp
@@ -0,0 +1,2 @@
+#define EIGEN_MKL_DEFAULT 1
+#include "fft_test_shared.h"
--- a/libs/eigen/unsupported/test/pocketfft.cpp
+++ b/libs/eigen/unsupported/test/pocketfft.cpp
@@ -0,0 +1,2 @@
+#define EIGEN_POCKETFFT_DEFAULT 1
+#include "fft_test_shared.h"
--- a/libs/eigen/unsupported/test/polynomialsolver.cpp
+++ b/libs/eigen/unsupported/test/polynomialsolver.cpp
@@ -179,29 +179,29 @@ void evalSolverSugarFunction( const POLYNOMIAL& pols, const ROOTS& roots, const
 }


-template<typename _Scalar, int _Deg>
+template<typename Scalar_, int Deg_>
 void polynomialsolver(int deg)
 {
-  typedef typename NumTraits<_Scalar>::Real RealScalar;
-  typedef internal::increment_if_fixed_size<_Deg>     Dim;
-  typedef Matrix<_Scalar,Dim::ret,1>                  PolynomialType;
-  typedef Matrix<_Scalar,_Deg,1>                      EvalRootsType;
-  typedef Matrix<RealScalar,_Deg,1>                   RealRootsType;
+  typedef typename NumTraits<Scalar_>::Real RealScalar;
+  typedef internal::increment_if_fixed_size<Deg_>     Dim;
+  typedef Matrix<Scalar_,Dim::ret,1>                  PolynomialType;
+  typedef Matrix<Scalar_,Deg_,1>                      EvalRootsType;
+  typedef Matrix<RealScalar,Deg_,1>                   RealRootsType;

  cout << "Standard cases" << endl;
  PolynomialType pols = PolynomialType::Random(deg+1);
-  evalSolver<_Deg,PolynomialType>( pols );
+  evalSolver<Deg_,PolynomialType>( pols );

  cout << "Hard cases" << endl;
-  _Scalar multipleRoot = internal::random<_Scalar>();
+  Scalar_ multipleRoot = internal::random<Scalar_>();
  EvalRootsType allRoots = EvalRootsType::Constant(deg,multipleRoot);
  roots_to_monicPolynomial( allRoots, pols );
-  evalSolver<_Deg,PolynomialType>( pols );
+  evalSolver<Deg_,PolynomialType>( pols );

  cout << "Test sugar" << endl;
  RealRootsType realRoots = RealRootsType::Random(deg);
  roots_to_monicPolynomial( realRoots, pols );
-  evalSolverSugarFunction<_Deg>(
+  evalSolverSugarFunction<Deg_>(
      pols,
      realRoots.template cast <std::complex<RealScalar> >().eval(),
      realRoots );
--- a/libs/eigen/unsupported/test/polynomialutils.cpp
+++ b/libs/eigen/unsupported/test/polynomialutils.cpp
@@ -25,12 +25,12 @@ struct increment_if_fixed_size
 }
 }

-template<typename _Scalar, int _Deg>
+template<typename Scalar_, int Deg_>
 void realRoots_to_monicPolynomial_test(int deg)
 {
-  typedef internal::increment_if_fixed_size<_Deg>            Dim;
-  typedef Matrix<_Scalar,Dim::ret,1>                  PolynomialType;
-  typedef Matrix<_Scalar,_Deg,1>                      EvalRootsType;
+  typedef internal::increment_if_fixed_size<Deg_>            Dim;
+  typedef Matrix<Scalar_,Dim::ret,1>                  PolynomialType;
+  typedef Matrix<Scalar_,Deg_,1>                      EvalRootsType;

  PolynomialType pols(deg+1);
  EvalRootsType roots = EvalRootsType::Random(deg);
@@ -40,43 +40,43 @@ void realRoots_to_monicPolynomial_test(int deg)
  for( int i=0; i<roots.size(); ++i ){
    evr[i] = std::abs( poly_eval( pols, roots[i] ) ); }

-  bool evalToZero = evr.isZero( test_precision<_Scalar>() );
+  bool evalToZero = evr.isZero( test_precision<Scalar_>() );
  if( !evalToZero ){
    cerr << evr.transpose() << endl; }
  VERIFY( evalToZero );
 }

-template<typename _Scalar> void realRoots_to_monicPolynomial_scalar()
+template<typename Scalar_> void realRoots_to_monicPolynomial_scalar()
 {
-  CALL_SUBTEST_2( (realRoots_to_monicPolynomial_test<_Scalar,2>(2)) );
-  CALL_SUBTEST_3( (realRoots_to_monicPolynomial_test<_Scalar,3>(3)) );
-  CALL_SUBTEST_4( (realRoots_to_monicPolynomial_test<_Scalar,4>(4)) );
-  CALL_SUBTEST_5( (realRoots_to_monicPolynomial_test<_Scalar,5>(5)) );
-  CALL_SUBTEST_6( (realRoots_to_monicPolynomial_test<_Scalar,6>(6)) );
-  CALL_SUBTEST_7( (realRoots_to_monicPolynomial_test<_Scalar,7>(7)) );
-  CALL_SUBTEST_8( (realRoots_to_monicPolynomial_test<_Scalar,17>(17)) );
+  CALL_SUBTEST_2( (realRoots_to_monicPolynomial_test<Scalar_,2>(2)) );
+  CALL_SUBTEST_3( (realRoots_to_monicPolynomial_test<Scalar_,3>(3)) );
+  CALL_SUBTEST_4( (realRoots_to_monicPolynomial_test<Scalar_,4>(4)) );
+  CALL_SUBTEST_5( (realRoots_to_monicPolynomial_test<Scalar_,5>(5)) );
+  CALL_SUBTEST_6( (realRoots_to_monicPolynomial_test<Scalar_,6>(6)) );
+  CALL_SUBTEST_7( (realRoots_to_monicPolynomial_test<Scalar_,7>(7)) );
+  CALL_SUBTEST_8( (realRoots_to_monicPolynomial_test<Scalar_,17>(17)) );

-  CALL_SUBTEST_9( (realRoots_to_monicPolynomial_test<_Scalar,Dynamic>(
+  CALL_SUBTEST_9( (realRoots_to_monicPolynomial_test<Scalar_,Dynamic>(
          internal::random<int>(18,26) )) );
 }




-template<typename _Scalar, int _Deg>
+template<typename Scalar_, int Deg_>
 void CauchyBounds(int deg)
 {
-  typedef internal::increment_if_fixed_size<_Deg>            Dim;
-  typedef Matrix<_Scalar,Dim::ret,1>                  PolynomialType;
-  typedef Matrix<_Scalar,_Deg,1>                      EvalRootsType;
+  typedef internal::increment_if_fixed_size<Deg_>            Dim;
+  typedef Matrix<Scalar_,Dim::ret,1>                  PolynomialType;
+  typedef Matrix<Scalar_,Deg_,1>                      EvalRootsType;

  PolynomialType pols(deg+1);
  EvalRootsType roots = EvalRootsType::Random(deg);
  roots_to_monicPolynomial( roots, pols );
-  _Scalar M = cauchy_max_bound( pols );
-  _Scalar m = cauchy_min_bound( pols );
-  _Scalar Max = roots.array().abs().maxCoeff();
-  _Scalar min = roots.array().abs().minCoeff();
+  Scalar_ M = cauchy_max_bound( pols );
+  Scalar_ m = cauchy_min_bound( pols );
+  Scalar_ Max = roots.array().abs().maxCoeff();
+  Scalar_ min = roots.array().abs().minCoeff();
  bool eval = (M >= Max) && (m <= min);
  if( !eval )
  {
@@ -87,17 +87,17 @@ void CauchyBounds(int deg)
  VERIFY( eval );
 }

-template<typename _Scalar> void CauchyBounds_scalar()
+template<typename Scalar_> void CauchyBounds_scalar()
 {
-  CALL_SUBTEST_2( (CauchyBounds<_Scalar,2>(2)) );
-  CALL_SUBTEST_3( (CauchyBounds<_Scalar,3>(3)) );
-  CALL_SUBTEST_4( (CauchyBounds<_Scalar,4>(4)) );
-  CALL_SUBTEST_5( (CauchyBounds<_Scalar,5>(5)) );
-  CALL_SUBTEST_6( (CauchyBounds<_Scalar,6>(6)) );
-  CALL_SUBTEST_7( (CauchyBounds<_Scalar,7>(7)) );
-  CALL_SUBTEST_8( (CauchyBounds<_Scalar,17>(17)) );
+  CALL_SUBTEST_2( (CauchyBounds<Scalar_,2>(2)) );
+  CALL_SUBTEST_3( (CauchyBounds<Scalar_,3>(3)) );
+  CALL_SUBTEST_4( (CauchyBounds<Scalar_,4>(4)) );
+  CALL_SUBTEST_5( (CauchyBounds<Scalar_,5>(5)) );
+  CALL_SUBTEST_6( (CauchyBounds<Scalar_,6>(6)) );
+  CALL_SUBTEST_7( (CauchyBounds<Scalar_,7>(7)) );
+  CALL_SUBTEST_8( (CauchyBounds<Scalar_,17>(17)) );

-  CALL_SUBTEST_9( (CauchyBounds<_Scalar,Dynamic>(
+  CALL_SUBTEST_9( (CauchyBounds<Scalar_,Dynamic>(
          internal::random<int>(18,26) )) );
 }

--- a/libs/eigen/unsupported/test/sparse_extra.cpp
+++ b/libs/eigen/unsupported/test/sparse_extra.cpp
@@ -7,32 +7,8 @@
 // Public License v. 2.0. If a copy of the MPL was not distributed
 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

-
-// import basic and product tests for deprecated DynamicSparseMatrix
-#if 0 // sparse_basic(DynamicSparseMatrix) does not compile at all -> disabled
-static long g_realloc_count = 0;
-#define EIGEN_SPARSE_COMPRESSED_STORAGE_REALLOCATE_PLUGIN g_realloc_count++;
-
-static long g_dense_op_sparse_count = 0;
-#define EIGEN_SPARSE_ASSIGNMENT_FROM_DENSE_OP_SPARSE_PLUGIN g_dense_op_sparse_count++;
-#define EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_ADD_DENSE_PLUGIN g_dense_op_sparse_count+=10;
-#define EIGEN_SPARSE_ASSIGNMENT_FROM_SPARSE_SUB_DENSE_PLUGIN g_dense_op_sparse_count+=20;
-
-#define EIGEN_SPARSE_TEST_INCLUDED_FROM_SPARSE_EXTRA 1
-#endif
-
-#define EIGEN_NO_DEPRECATED_WARNING
-// Disable counting of temporaries, since sparse_product(DynamicSparseMatrix)
-// has an extra copy-assignment.
-#define EIGEN_SPARSE_PRODUCT_IGNORE_TEMPORARY_COUNT
 #include "sparse_product.cpp"

-#if 0 // sparse_basic(DynamicSparseMatrix) does not compile at all -> disabled
-#include "sparse_basic.cpp"
-#endif
-
-#if EIGEN_HAS_CXX11
-
 #ifdef min
 #undef min
 #endif
@@ -41,12 +17,6 @@ static long g_dense_op_sparse_count = 0;
 #undef max
 #endif

-#include <unordered_map>
-#define EIGEN_UNORDERED_MAP_SUPPORT
-
-#endif
-
-
 #include <Eigen/SparseExtra>

 template<typename SetterType,typename DenseType, typename Scalar, int Options>
@@ -67,21 +37,6 @@ bool test_random_setter(SparseMatrix<Scalar,Options>& sm, const DenseType& ref,
  return sm.isApprox(ref);
 }

-template<typename SetterType,typename DenseType, typename T>
-bool test_random_setter(DynamicSparseMatrix<T>& sm, const DenseType& ref, const std::vector<Vector2i>& nonzeroCoords)
-{
-  sm.setZero();
-  std::vector<Vector2i> remaining = nonzeroCoords;
-  while(!remaining.empty())
-  {
-    int i = internal::random<int>(0,static_cast<int>(remaining.size())-1);
-    sm.coeffRef(remaining[i].x(),remaining[i].y()) = ref.coeff(remaining[i].x(),remaining[i].y());
-    remaining[i] = remaining.back();
-    remaining.pop_back();
-  }
-  return sm.isApprox(ref);
-}
-
 template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& ref)
 {
  const Index rows = ref.rows();
@@ -136,9 +91,7 @@ template<typename SparseMatrixType> void sparse_extra(const SparseMatrixType& re
 //   VERIFY_IS_APPROX(m, refMat);

    VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdMapTraits> >(m,refMat,nonzeroCoords) ));
-    #ifdef EIGEN_UNORDERED_MAP_SUPPORT
    VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, StdUnorderedMapTraits> >(m,refMat,nonzeroCoords) ));
-    #endif
    #ifdef EIGEN_GOOGLEHASH_SUPPORT
    VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleDenseHashMapTraits> >(m,refMat,nonzeroCoords) ));
    VERIFY(( test_random_setter<RandomSetter<SparseMatrixType, GoogleSparseHashMapTraits> >(m,refMat,nonzeroCoords) ));
@@ -187,6 +140,72 @@ void check_marketio_vector()
  VERIFY_IS_EQUAL(v1,v2);
 }

+template<typename DenseMatrixType>
+void check_marketio_dense()
+{
+  Index rows=DenseMatrixType::MaxRowsAtCompileTime;
+  if (DenseMatrixType::MaxRowsAtCompileTime==Dynamic){
+    rows=internal::random<Index>(1,100);
+  }else if(DenseMatrixType::RowsAtCompileTime==Dynamic){
+    rows=internal::random<Index>(1,DenseMatrixType::MaxRowsAtCompileTime);
+  }
+
+  Index cols =DenseMatrixType::MaxColsAtCompileTime; 
+  if (DenseMatrixType::MaxColsAtCompileTime==Dynamic){
+    cols=internal::random<Index>(1,100);
+  }else if(DenseMatrixType::ColsAtCompileTime==Dynamic){
+    cols=internal::random<Index>(1,DenseMatrixType::MaxColsAtCompileTime);
+  }
+
+  DenseMatrixType m1, m2;
+  m1= DenseMatrixType::Random(rows,cols);
+  saveMarketDense(m1, "dense_extra.mtx");
+  loadMarketDense(m2, "dense_extra.mtx");
+  VERIFY_IS_EQUAL(m1,m2);
+}
+
+template <typename Scalar>
+void check_sparse_inverse() {
+  typedef SparseMatrix<Scalar> MatrixType;
+ 
+  Matrix<Scalar, -1, -1> A;
+  A.resize(1000, 1000);
+  A.fill(0);
+  A.setIdentity();
+  A.col(0).array() += 1;
+  A.row(0).array() += 2;
+  A.col(2).array() += 3;
+  A.row(7).array() += 3;
+  A.col(9).array() += 3;
+  A.block(3, 4, 4, 2).array() += 9;
+  A.middleRows(10, 50).array() += 3;
+  A.middleCols(50, 50).array() += 40;
+  A.block(500, 300, 40, 20).array() += 10;
+  A.transposeInPlace();
+
+  Eigen::SparseLU<MatrixType> slu;
+  slu.compute(A.sparseView());
+  Matrix<Scalar, -1, -1> Id(A.rows(), A.cols());
+  Id.setIdentity();
+  Matrix<Scalar, -1, -1> inv = slu.solve(Id);
+
+  const MatrixType sparseInv = Eigen::SparseInverse<Scalar>().compute(A.sparseView()).inverse();
+
+  Scalar sumdiff = 0;  // Check the diff only of the non-zero elements
+  for (Eigen::Index j = 0; j < A.cols(); j++) {
+    for (typename MatrixType::InnerIterator iter(sparseInv, j); iter; ++iter) {
+      const Scalar diff = std::abs(inv(iter.row(), iter.col()) - iter.value());
+      VERIFY_IS_APPROX_OR_LESS_THAN(diff, 1e-11);
+
+      if (iter.value() != 0) {
+        sumdiff += diff;
+      }
+    }
+  }
+
+  VERIFY_IS_APPROX_OR_LESS_THAN(sumdiff, 1e-10);
+}
+
 EIGEN_DECLARE_TEST(sparse_extra)
 {
  for(int i = 0; i < g_repeat; i++) {
@@ -195,22 +214,24 @@ EIGEN_DECLARE_TEST(sparse_extra)
    CALL_SUBTEST_2( sparse_extra(SparseMatrix<std::complex<double> >(s, s)) );
    CALL_SUBTEST_1( sparse_extra(SparseMatrix<double>(s, s)) );

-    CALL_SUBTEST_3( sparse_extra(DynamicSparseMatrix<double>(s, s)) );
-//    CALL_SUBTEST_3(( sparse_basic(DynamicSparseMatrix<double>(s, s)) ));
-//    CALL_SUBTEST_3(( sparse_basic(DynamicSparseMatrix<double,ColMajor,long int>(s, s)) ));
-
-    CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, ColMajor> >()) );
-    CALL_SUBTEST_3( (sparse_product<DynamicSparseMatrix<float, RowMajor> >()) );
-
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) );
-    CALL_SUBTEST_4( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<float,ColMajor,int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<double,ColMajor,int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<float,ColMajor,long int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<double,ColMajor,long int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<float>,ColMajor,long int> >()) );
+    CALL_SUBTEST_3( (check_marketio<SparseMatrix<std::complex<double>,ColMajor,long int> >()) );

+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,Dynamic> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,Dynamic,RowMajor> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,Dynamic,Dynamic> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<std::complex<float>,Dynamic,Dynamic> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<std::complex<double>,Dynamic,Dynamic> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<float,Dynamic,3> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,3,Dynamic> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,3,4> >()) );
+    CALL_SUBTEST_4( (check_marketio_dense<Matrix<double,Dynamic,Dynamic,ColMajor,5,5> >()) );

    CALL_SUBTEST_5( (check_marketio_vector<Matrix<float,1,Dynamic> >()) );
    CALL_SUBTEST_5( (check_marketio_vector<Matrix<double,1,Dynamic> >()) );
@@ -221,6 +242,8 @@ EIGEN_DECLARE_TEST(sparse_extra)
    CALL_SUBTEST_5( (check_marketio_vector<Matrix<std::complex<float>,Dynamic,1> >()) );
    CALL_SUBTEST_5( (check_marketio_vector<Matrix<std::complex<double>,Dynamic,1> >()) );

+    CALL_SUBTEST_6((check_sparse_inverse<double>()));
+
    TEST_SET_BUT_UNUSED_VARIABLE(s);
  }
 }
--- a/libs/eigen/unsupported/test/special_functions.cpp
+++ b/libs/eigen/unsupported/test/special_functions.cpp
@@ -191,10 +191,10 @@ template<typename ArrayType> void array_special_functions()

  // Check the zeta function against scipy.special.zeta
  {
-    ArrayType x(10), q(10), res(10), ref(10);
-    x << 1.5,   4, 10.5, 10000.5,    3,      1,    0.9,  2,  3,  4;
-    q <<   2, 1.5,    3,  1.0001, -2.5, 1.2345, 1.2345, -1, -2, -3;
-    ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan, plusinf, nan, plusinf;
+    ArrayType x(11), q(11), res(11), ref(11);
+    x << 1.5,   4, 10.5, 10000.5,    3,      1,    0.9,  2,  3,  4, 2000;
+    q <<   2, 1.5,    3,  1.0001, -2.5, 1.2345, 1.2345, -1, -2, -3, 2000;
+    ref << 1.61237534869, 0.234848505667, 1.03086757337e-5, 0.367879440865, 0.054102025820864097, plusinf, nan, plusinf, nan, plusinf, 0;
    CALL_SUBTEST( verify_component_wise(ref, ref); );
    CALL_SUBTEST( res = x.zeta(q); verify_component_wise(res, ref); );
    CALL_SUBTEST( res = zeta(x,q); verify_component_wise(res, ref); );
--- a/libs/eigen/unsupported/test/special_packetmath.cpp
+++ b/libs/eigen/unsupported/test/special_packetmath.cpp
@@ -114,7 +114,7 @@ template<typename Scalar,typename Packet> void packetmath_real()
                  Scalar(std::pow(Scalar(10), internal::random<Scalar>(Scalar(-1),Scalar(2))));
  }

-#if EIGEN_HAS_C99_MATH && (EIGEN_COMP_CXXVER >= 11)
+#if EIGEN_HAS_C99_MATH
  CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLGamma, std::lgamma, internal::plgamma);
  CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErf, std::erf, internal::perf);
  CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErfc, std::erfc, internal::perfc);